I'm trying to parse word document and simulate the look from original file. Im using PHP word.
My code:
require 'vendor/autoload.php';
use PhpOffice\PhpWord\IOFactory;
$path = '/pathtofiles';
$files = scandir($path);
unset($files[0]);
unset($files[1]);
$bold_style = ['' => '', 1 => ' font-weight:700; '];
function readDoc($document)
{
global $bold_style;
$phpWord = IOFactory::createReader('Word2007');
$doc = $phpWord->load($document);
$content = '';
foreach ($doc->getSections() as $section) {
foreach ($section->getElements() as $element) {
if(get_class($element)=='PhpOffice\PhpWord\Element\TextBreak')$content .= '<br>';
if (method_exists($element, 'getElements')) {
foreach ($element->getElements() as $k=>$childElement) {
if (method_exists($childElement, 'getText')) {
$font_size = $childElement->getFontStyle()->getSize();
$font_bold = $childElement->getFontStyle()->isBold();
$content .= '<span style="font-size:' . $font_size . '; ' . $bold_style[$font_bold] . '">' . $childElement->getText() . '</span> ';
} else if (method_exists($childElement, 'getContent')) {
$content .= $childElement->getContent() . ' ';
}
}
} else if (method_exists($element, 'getText')) {
$content .= $element->getText() . ' ';
}
$content .= '<br>';
}
}
return $content;
}
Is there a better way to extract font styles, line breaks etc. from file ?