So I made PHP code that is used for signature, that is not problem right now the problem is that when I add <html></html> tags on the beginning of the code it won't show anything that I have done in PHP. So when I put html tags or make space in front of php there is nothing in page, but when I remove them everything work fine. I'm not expert in PHP so..
<?php
include_once("functions.php");
$sign = !isset($_GET['s']) ? 1 : $_GET['s'];
$uname = !isset($_GET["name"]) ? "None" : $_GET['name'];
// Connection & Website Settings
$ftpad = "";
$ftpuser = "";
$ftppass = "";
$comm = "SAMP";
$weburl = "www.incoming.com";
$usersdir = "/scriptfiles/Users";
//
$ftpcon = ftp_connect($ftpad,4112) or die("Error|I can't connect to the database: $ftpad, contact web master");
$login = ftp_login($ftpcon,$ftpuser,$ftppass);
$uname = "";
$name = $_GET['name'];
ftp_pasv($ftpcon,true);
$fhandle = fopen("tempsign_".hash('sha256',"$name").".tmp","w+");
ftp_fget($ftpcon,$fhandle,"$usersdir/$name.ini",FTP_ASCII);
$str = parse_ini_file2("tempsign_".hash('sha256',"$ime").".tmp");
fclose($fhandle);
$skin = $str['Skin'];
$rImg = ImageCreateFromPNG("./signs/1.png");
if (file_exists("./signs/skins/$skin.jpg"))
{
$skinImg = ImageCreateFromjpeg("./signs/skins/$skin.jpg");
}
else
{
$skinImg = ImageCreateFromjpeg("./signs/skins/0.jpg");
}
$cor_black = imagecolorallocate($rImg,0,0,0);
$cor_blue = imagecolorallocate($rImg,0,0,255);
$cor_lblue = imagecolorallocate($rImg,30,144,255);
$cor_green = imagecolorallocate($rImg,69,139,116);
$cor_red = imagecolorallocate($rImg,220,20,65);
$cor_wh = imagecolorallocate($rImg,255,255,255);
$cor_n = imagecolorallocate($rImg,246,74,14);
if(ftp_size($ftpcon,"$usersdir/$name.ini") == -1 || $ime == "None")
{
?>
<tr>
<center><td align="center" valign="top"><img src="logo.png" width="400" height="155" alt="logo" /></td></center>
</tr>
<style type="text/css">
body{
background-color:#000;
background-image:url(backg.png);
}
.
</style>
<?php
echo "<center><br/><br/><br/><br/><font color='#FF3333'>Error | That signature doesn't exist! Possible reasons:<br/><br/></font></center>";
echo "<center><font color='#0FB9FC'>1. You didn't enter your username in field for that<br/></font></center>";
echo "<center><font color='#0FB9FC'>2. You entered wrong username(Example Your_Name)<br/></font></center>";
echo "<center><font color='#0FB9FC'>3. Username that you entered isn't in our databse<br/></font></center>";
return 1;
}
else
{
ftp_pasv($ftpcon,true);
$fhandle = fopen("tempsign_".hash('sha256',"$name").".tmp","w+");
ftp_fget($ftpcon,$fhandle,"$usersdir/$name.ini",FTP_ASCII);
$str = parse_ini_file2("tempsign_".hash('sha256',"$name").".tmp");
fclose($fhandle);
imagettftext($rImg,12,0,11,39,$cor_n,"font.TTF",urldecode($name));
imagettftext($rImg,12,0,11,79,$cor_n,"font.TTF",urldecode($str['Level']));
imagettftext($rImg,12,0,173,120,$cor_n,"font.TTF",urldecode($str['Points']));
imagettftext($rImg,12,0,173,80,$cor_n,"font.TTF",urldecode($str['Hours']));
imagecopymerge($rImg, $skinImg, 308,6,0,0,80,100,100);
}
if($str['Sex'] == 0) { imagettftext($rImg,12,0,10,122,$cor_n,"font.TTF","Male"); }
else if($str['Sex'] == 1) { imagettftext($rImg,12,0,10,122,$cor_n,"font.TTF","Female"); }
if($str['GM'] > 0)
{
imagettftext($rImg, 13,0,5,161,$cor_green,"font.TTF","GameSupport");}
if($str['Admin'] > 0)
{
imagettftext($rImg, 13,0,5,161,$cor_red,"font.TTF","Admin");}
if($str['Leader'] > 0)
{
if($str['Leader'] == 1) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","LSPD"); }
else if($str['Leader'] == 2) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","FBI"); }
else if($str['Leader'] == 3) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","CNN"); }
else if($str['Leader'] == 4) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cocaina"); }
else if($str['Leader'] == 5) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cosa Nostra"); }
else if($str['Leader'] == 6) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Terror Squad Crew"); }
else if($str['Leader'] == 7) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","GSF"); }
else if($str['Leader'] == 8) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Yakuza"); }
else if($str['Leader'] == 9) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Russian Mafia"); }
else if($str['Leader'] == 10) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Underground Racers"); }
else if($str['Leader'] == 11) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Hitman Agency"); }
}
else if($str['Member'] > 0)
{
if($str['Member'] == 1) { imagettftext($rImg,13,0,171,40,$cor_wh,"font.TTF","LSPD"); }
else if($str['Member'] == 2) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","FBI"); }
else if($str['Member'] == 3) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","CNN"); }
else if($str['Member'] == 4) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cocaine"); }
else if($str['Member'] == 5) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cosa Nostra"); }
else if($str['Member'] == 6) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Terror Squad Crew"); }
else if($str['Member'] == 7) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","GSF"); }
else if($str['Member'] == 8) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Yakuza"); }
else if($str['Member'] == 9) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Russian Mafia"); }
else if($str['Member'] == 10) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Underground Racers"); }
else if($str['Member'] == 11) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Hitman Agency"); }
}
else
{
imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Civil");
}
ftp_close($ftpcon);
unset($str);
unset($ftpad);
unset($ftpuser);
unset($ftppass);
unset($ftpcon);
unset($login);
header('Content-type: image/png');
imagepng($rImg);
imagepng($skinImg);
?>
</style>
Please check your below code:
<?php
//Error reporting section
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
include_once("functions.php");
$sign = !isset($_GET['s']) ? 1 : $_GET['s'];
$uname = !isset($_GET["name"]) ? "None" : $_GET['name'];
// Connection & Website Settings
$ftpad = "";
$ftpuser = "";
$ftppass = "";
$comm = "SAMP";
$weburl = "www.incoming.com";
$usersdir = "/scriptfiles/Users";
$ftpcon = ftp_connect($ftpad,4112) or die("Error|I can't connect to the database: $ftpad, contact web master");
$login = ftp_login($ftpcon,$ftpuser,$ftppass);
$uname = "";
$name = $_GET['name'];
ftp_pasv($ftpcon,true);
$fhandle = fopen("tempsign_".hash('sha256',"$name").".tmp","w+");
ftp_fget($ftpcon,$fhandle,"$usersdir/$name.ini",FTP_ASCII);
$str = parse_ini_file2("tempsign_".hash('sha256',"$ime").".tmp");
fclose($fhandle);
$skin = $str['Skin'];
$rImg = ImageCreateFromPNG("./signs/1.png");
if (file_exists("./signs/skins/$skin.jpg"))
{
$skinImg = ImageCreateFromjpeg("./signs/skins/$skin.jpg");
}
else
{
$skinImg = ImageCreateFromjpeg("./signs/skins/0.jpg");
}
$cor_black = imagecolorallocate($rImg,0,0,0);
$cor_blue = imagecolorallocate($rImg,0,0,255);
$cor_lblue = imagecolorallocate($rImg,30,144,255);
$cor_green = imagecolorallocate($rImg,69,139,116);
$cor_red = imagecolorallocate($rImg,220,20,65);
$cor_wh = imagecolorallocate($rImg,255,255,255);
$cor_n = imagecolorallocate($rImg,246,74,14);
if(ftp_size($ftpcon,"$usersdir/$name.ini") == -1 || $ime == "None")
{
?><tr>
<center><td align="center" valign="top"><img src="logo.png" width="400" height="155" alt="logo" /></td></center>
</tr>
<style type="text/css">
body{
background-color:#000;
background-image:url(backg.png);
}
</style><?php
echo "<center><br/><br/><br/><br/><font color='#FF3333'>Error | That signature doesn't exist! Possible reasons:<br/><br/></font></center>";
echo "<center><font color='#0FB9FC'>1. You didn't enter your username in field for that<br/></font></center>";
echo "<center><font color='#0FB9FC'>2. You entered wrong username(Example Your_Name)<br/></font></center>";
echo "<center><font color='#0FB9FC'>3. Username that you entered isn't in our databse<br/></font></center>";
return 1;
}
else
{
ftp_pasv($ftpcon,true);
$fhandle = fopen("tempsign_".hash('sha256',"$name").".tmp","w+");
ftp_fget($ftpcon,$fhandle,"$usersdir/$name.ini",FTP_ASCII);
$str = parse_ini_file2("tempsign_".hash('sha256',"$name").".tmp");
fclose($fhandle);
imagettftext($rImg,12,0,11,39,$cor_n,"font.TTF",urldecode($name));
imagettftext($rImg,12,0,11,79,$cor_n,"font.TTF",urldecode($str['Level']));
imagettftext($rImg,12,0,173,120,$cor_n,"font.TTF",urldecode($str['Points']));
imagettftext($rImg,12,0,173,80,$cor_n,"font.TTF",urldecode($str['Hours']));
imagecopymerge($rImg, $skinImg, 308,6,0,0,80,100,100);
}
if($str['Sex'] == 0) { imagettftext($rImg,12,0,10,122,$cor_n,"font.TTF","Male"); }
else if($str['Sex'] == 1) { imagettftext($rImg,12,0,10,122,$cor_n,"font.TTF","Female"); }
if($str['GM'] > 0){ imagettftext($rImg, 13,0,5,161,$cor_green,"font.TTF","GameSupport");}
if($str['Admin'] > 0){ imagettftext($rImg, 13,0,5,161,$cor_red,"font.TTF","Admin");}
if($str['Leader'] > 0){
if($str['Leader'] == 1) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","LSPD"); }
else if($str['Leader'] == 2) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","FBI"); }
else if($str['Leader'] == 3) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","CNN"); }
else if($str['Leader'] == 4) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cocaina"); }
else if($str['Leader'] == 5) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cosa Nostra"); }
else if($str['Leader'] == 6) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Terror Squad Crew"); }
else if($str['Leader'] == 7) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","GSF"); }
else if($str['Leader'] == 8) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Yakuza"); }
else if($str['Leader'] == 9) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Russian Mafia"); }
else if($str['Leader'] == 10) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Underground Racers"); }
else if($str['Leader'] == 11) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Hitman Agency"); }
}
else if($str['Member'] > 0)
{
if($str['Member'] == 1) { imagettftext($rImg,13,0,171,40,$cor_wh,"font.TTF","LSPD"); }
else if($str['Member'] == 2) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","FBI"); }
else if($str['Member'] == 3) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","CNN"); }
else if($str['Member'] == 4) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cocaine"); }
else if($str['Member'] == 5) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","La Cosa Nostra"); }
else if($str['Member'] == 6) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Terror Squad Crew"); }
else if($str['Member'] == 7) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","GSF"); }
else if($str['Member'] == 8) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Yakuza"); }
else if($str['Member'] == 9) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Russian Mafia"); }
else if($str['Member'] == 10) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Underground Racers"); }
else if($str['Member'] == 11) { imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Hitman Agency"); }
}
else
{
imagettftext($rImg,13,0,171,40,$cor_n,"font.TTF","Civil");
}
ftp_close($ftpcon);
unset($str);
unset($ftpad);
unset($ftpuser);
unset($ftppass);
unset($ftpcon);
unset($login);
header('Content-type: image/png');
imagepng($rImg);
imagepng($skinImg);
?>
I want to be able to upload an MS word document and export it a page in my site.
Is there any way to accomplish this?
//FUNCTION :: read a docx file and return the string
function readDocx($filePath) {
// Create new ZIP archive
$zip = new ZipArchive;
$dataFile = 'word/document.xml';
// Open received archive file
if (true === $zip->open($filePath)) {
// If done, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// If found, read it to the string
$data = $zip->getFromIndex($index);
// Close archive file
$zip->close();
// Load XML from a string
// Skip errors and warnings
$xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
// Return data without XML formatting tags
$contents = explode('\n',strip_tags($xml->saveXML()));
$text = '';
foreach($contents as $i=>$content) {
$text .= $contents[$i];
}
return $text;
}
$zip->close();
}
// In case of failure return empty string
return "";
}
ZipArchive and DOMDocument are both inside PHP so you don't need to install/include/require additional libraries.
One may use PHPDocX.
It has support for practically all HTML CSS styles. Moreover you may use templates to add extra formatting to your HTML via the replaceTemplateVariableByHTML.
The HTML methods of PHPDocX also allow for the direct use of Word styles. You may use something like this:
$docx->embedHTML($myHTML, array('tableStyle' => 'MediumGrid3-accent5PHPDOCX'));
If you want that all your tables use the MediumGrid3-accent5 Word style. The embedHTML method as well as its version for templates (replaceTemplateVariableByHTML) preserve inheritance, meaning by that that you may use a predefined Word style and override with CSS any of its properties.
You may also extract selected parts of your HTML using 'JQuery type' selectors.
You can convert Word docx documents to html using Print2flash library. Here is an PHP excerpt from my client's site which converts a document to html:
include("const.php");
$p2fServ = new COM("Print2Flash4.Server2");
$p2fServ->DefaultProfile->DocumentType=HTML5;
$p2fServ->ConvertFile($wordfile,$htmlFile);
It converts a document which path is specified in $wordfile variable to a html page file specified by $htmlFile variable. All formatting, hyperlinks and charts are retained. You can get the required const.php file altogether with a fuller sample from Print2flash SDK.
this is a workaround based on David Lin's answer above
removing "w:" in a docx's xml tags leave behing Html like tags
function readDocx($filePath) {
// Create new ZIP archive
$zip = new ZipArchive;
$dataFile = 'word/document.xml';
// Open received archive file
if (true === $zip->open($filePath)) {
// If done, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// If found, read it to the string
$data = $zip->getFromIndex($index);
// Close archive file
$zip->close();
// Load XML from a string
// Skip errors and warnings
$xml = new DOMDocument("1.0", "utf-8");
$xml->loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING|LIBXML_PARSEHUGE);
$xml->encoding = "utf-8";
// Return data without XML formatting tags
$output = $xml->saveXML();
$output = str_replace("w:","",$output);
return $output;
}
$zip->close();
}
// In case of failure return empty string
return "";
}
Ok Im in very late, but thought I'd post this to save you all some time.
This is some php code I have put together not just to read the text from docx but the images too, currently it does not support floating images / text, but what I have done so far is a massive move forwards to whats already been posted on here - note you need to update https://example.co.uk to YOUR domain name.
<?php
class Docx_ws_imglnk {
public $originalpath = '';
public $extractedpath = '';
}
class Docx_ws_rel {
public $Id = '';
public $Target = '';
}
class Docx_ws_def {
public $styleId = '';
public $type = '';
public $color = '000000';
}
class Docx_p_def {
public $data = array();
public $text = "";
}
class Docx_p_item {
public $name = "";
public $value = "";
public $innerstyle = "";
public $type = "text";
}
class Docx_reader {
private $fileData = false;
private $errors = array();
public $rels = array();
public $imglnks = array();
public $styles = array();
public $document = null;
public $paragraphs = array();
public $path = '';
private $saveimgpath = 'docimages';
public function __construct() {
}
private function load($file) {
if (file_exists($file)) {
$zip = new ZipArchive();
$openedZip = $zip->open($file);
if ($openedZip === true) {
$this->path = $file;
//read and save images
for ( $i = 0; $i < $zip->numFiles; $i ++ ) {
$zip_element = $zip->statIndex( $i );
if ( preg_match( "([^\s]+(\.(?i)(jpg|jpeg|png|gif|bmp))$)", $zip_element['name'] ) ) {
$imglnk = new Docx_ws_imglnk;
$imglnk->originalpath = $zip_element['name'];
$imagename = explode( '/', $zip_element['name'] );
$imagename = end( $imagename );
$imglnk->extractedpath = dirname( __FILE__ ) . '/' . $this->savepath . $imagename;
$putres = file_put_contents( $imglnk->extractedpath, $zip->getFromIndex( $i ));
$imglnk->extractedpath = str_replace('var/www/', 'https://example.co.uk/', $imglnk->extractedpath);
$imglnk->extractedpath = substr($imglnk->extractedpath, 1);
array_push($this->imglnks, $imglnk);
}
}
//read relationships
if (($styleIndex = $zip->locateName('word/_rels/document.xml.rels')) !== false) {
$stylesRels = $zip->getFromIndex($styleIndex);
$xml = simplexml_load_string($stylesRels);
$XMLTEXT = $xml->saveXML();
$doc = new DOMDocument();
$doc->loadXML($XMLTEXT);
foreach($doc->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
if($childnode->hasAttributes())
{
$rel = new Docx_ws_rel;
for ($a = 0; $a < $childnode->attributes->count(); $a++)
{
$attrNode = $childnode->attributes->item($a);
if (strcmp( $attrNode->nodeName, 'Id') == 0)
{
$rel->Id = $attrNode->nodeValue;
}
if (strcmp( $attrNode->nodeName, 'Target') == 0)
{
$rel->Target = $attrNode->nodeValue;
}
}
array_push($this->rels, $rel);
}
}
}
//attempt to load styles:
if (($styleIndex = $zip->locateName('word/styles.xml')) !== false) {
$stylesXml = $zip->getFromIndex($styleIndex);
$xml = simplexml_load_string($stylesXml);
$XMLTEXT = $xml->saveXML();
$doc = new DOMDocument();
$doc->loadXML($XMLTEXT);
foreach($doc->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
//get style
if (strcmp($nodename, "w:style") == 0)
{
$ws_def = new Docx_ws_def;
for ($a=0; $a < $childnode->attributes->count(); $a++ )
{
$item = $childnode->attributes->item($a);
//style id
if (strcmp($item->nodeName, "w:styleId") == 0)
{
$ws_def->styleId = $item->nodeValue;
}
//style type
if (strcmp($item->nodeName, "w:type") == 0)
{
$ws_def->type = $item->nodeValue;
}
}
}
//push style to the array of styles
if (strcmp($ws_def->styleId, "") != 0 && strcmp($ws_def->type, "") != 0)
{
array_push($this->styles, $ws_def);
}
}
}
if (($index = $zip->locateName('word/document.xml')) !== false) {
$stylesDoc = $zip->getFromIndex($index);
$xml = simplexml_load_string($stylesDoc);
$XMLTEXT = $xml->saveXML();
$this->document = new DOMDocument();
$this->document->loadXML($XMLTEXT);
}
$zip->close();
} else {
switch($openedZip) {
case ZipArchive::ER_EXISTS:
$this->errors[] = 'File exists.';
break;
case ZipArchive::ER_INCONS:
$this->errors[] = 'Inconsistent zip file.';
break;
case ZipArchive::ER_MEMORY:
$this->errors[] = 'Malloc failure.';
break;
case ZipArchive::ER_NOENT:
$this->errors[] = 'No such file.';
break;
case ZipArchive::ER_NOZIP:
$this->errors[] = 'File is not a zip archive.';
break;
case ZipArchive::ER_OPEN:
$this->errors[] = 'Could not open file.';
break;
case ZipArchive::ER_READ:
$this->errors[] = 'Read error.';
break;
case ZipArchive::ER_SEEK:
$this->errors[] = 'Seek error.';
break;
}
}
} else {
$this->errors[] = 'File does not exist.';
}
}
public function setFile($path) {
$this->fileData = $this->load($path);
}
public function to_plain_text() {
if ($this->fileData) {
return strip_tags($this->fileData);
} else {
return false;
}
}
public function processDocument() {
$html = '';
foreach($this->document->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
//get the body of the document
if (strcmp($nodename, "w:body") == 0)
{
foreach($childnode->childNodes as $subchildnode)
{
$pnodename = $subchildnode->nodeName;
//process every paragraph
if (strcmp($pnodename, "w:p") == 0)
{
$pdef = new Docx_p_def;
foreach($subchildnode->childNodes as $pchildnode)
{
//process any inner children
if (strcmp($pchildnode, "w:pPr") == 0)
{
foreach($pchildnode->childNodes as $prchildnode)
{
//process text alignment
if (strcmp($prchildnode->nodeName, "w:pStyle") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'styleId';
$pitem->value = $prchildnode->attributes->getNamedItem('val')->nodeValue;
array_push($pdef->data, $pitem);
}
//process text alignment
if (strcmp($prchildnode->nodeName, "w:jc") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'align';
$pitem->value = $prchildnode->attributes->getNamedItem('val')->nodeValue;
if (strcmp($pitem->value, "left") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "center") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "right") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "both") == 0)
{
$pitem->innerstyle .= "word-spacing:" . 10 . "px;";
}
array_push($pdef->data, $pitem);
}
//process drawing
if (strcmp($prchildnode->nodeName, "w:drawing") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'drawing';
$pitem->value = '';
$pitem->type = 'graphic';
$extents = $prchildnode->getElementsByTagName('extent')[0];
$cx = $extents->attributes->getNamedItem('cx')->nodeValue;
$cy = $extents->attributes->getNamedItem('cy')->nodeValue;
$pcx = (int)$cx / 9525;
$pcy = (int)$cy / 9525;
$pitem->innerstyle .= "width:" . $pcx . "px;";
$pitem->innerstyle .= "height:" . $pcy . "px;";
$blip = $prchildnode->getElementsByTagName('blip')[0];
$pitem->value = $blip->attributes->getNamedItem('embed')->nodeValue;
array_push($pdef->data, $pitem);
}
//process spacing
if (strcmp($prchildnode->nodeName, "w:spacing") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'paragraphSpacing';
$bval = $prchildnode->attributes->getNamedItem('before')->nodeValue;
if (strcmp($bval, '') == 0)
$bval = 0;
$pitem->innerstyle .= "padding-top:" . $bval . "px;";
$aval = $prchildnode->attributes->getNamedItem('after')->nodeValue;
if (strcmp($aval, '') == 0)
$aval = 0;
$pitem->innerstyle .= "padding-bottom:" . $aval . "px;";
array_push($pdef->data, $pitem);
}
}
}
if (strcmp($pchildnode, "w:r") == 0)
{
foreach($pchildnode->childNodes as $rchildnode)
{
//process text
if (strcmp($rchildnode->nodeName, "w:t") == 0)
{
$pdef->text .= $rchildnode->nodeValue;
if (count($pdef->data) == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'styleId';
$pitem->value = '';
array_push($pdef->data, $pitem);
}
}
if (strcmp($rchildnode->nodeName, "w:rPr") == 0)
{
foreach($rchildnode->childNodes as $rPrchildnode)
{
if (strcmp($rPrchildnode->nodeName, "w:b") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textBold';
$pitem->value = '';
$pitem->innerstyle .= "text-weight: 500;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:i") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textItalic';
$pitem->value = '';
$pitem->innerstyle .= "text-style: italic;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:u") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textUnderline';
$pitem->value = '';
$pitem->innerstyle .= "text-decoration: underline;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:sz") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textSize';
$sz = $rPrchildnode->attributes->getNamedItem('val')->nodeValue;
if ($sz == '')
{
$sz=0;
}
$pitem->value = $sz;
array_push($pdef->data, $pitem);
}
}
}
}
}
}
array_push($this->paragraphs, $pdef);
}
}
}
}
}
public function to_html()
{
$html = '';
foreach($this->paragraphs as $para)
{
$styleselect = null;
$type = 'text';
$content = $para->text;
$sz = 0;
$extent = '';
$embedid = '';
$pinnerstylesid = '';
$pinnerstylesunderline = '';
$pinnerstylessz = '';
if (count($para->data) > 0)
{
foreach($para->data as $node)
{
if (strcmp($node->name, "styleId") == 0)
{
$type = $node->type;
$pinnerstylesid = $node->innerstyle;
foreach($this->styles as $style)
{
if (strcmp ($node->value, $style->styleId) == 0)
{
$styleselect = $style;
}
}
}
if (strcmp($node->name, "align") == 0)
{
$pinnerstylesid .= $node->innerstyle. ";";
}
if (strcmp($node->name, "drawing") == 0)
{
$type = $node->type;
$extent = $node->innerstyle;
$embedid = $node->value;
}
if (strcmp($node->name, "textSize") == 0)
{
$sz = $node->value;
}
if (strcmp($node->name, "textUnderline") == 0)
{
$pinnerstylesunderline = $node->innerstyle;
}
}
}
if (strcmp($type, 'text') == 0)
{
//echo "has valid para";
//echo "<br>";
if ($styleselect != null)
{
//echo "has valid style";
//echo "<br>";
if (strcmp($styleselect->color, '') != 0)
{
$pinnerstylesid .= "color:#" . $styleselect->color. ";";
}
}
if ($sz != 0)
{
$pinnerstylesid .= 'font-size:' . $sz . 'px;';
//echo "sz<br>";
}
$span = "<p style='". $pinnerstylesid . $pinnerstylesunderline ."'>";
$span .= $content;
$span .= "</p>";
//echo $span;
$html .= $span;
}
if (strcmp($type, 'graphic') == 0)
{
$imglnk = '';
foreach($this->rels as $rel)
{
if(strcmp($embedid, '') != 0 && strcmp($rel->Id, $embedid) == 0)
{
foreach($this->imglnks as $imgpathdef)
{
if (strpos($imgpathdef->extractedpath, $rel->Target) >= 0)
{
$imglnk = $imgpathdef->extractedpath;
//echo "has img link<br>";
//echo $imglnk . "<br>";
}
}
}
}
if ($styleselect != null)
{
//echo "has valid style";
//echo "<br>";
if (strcmp($styleselect->color, '') != 0)
{
$pinnerstylesid .= "color:#" . $styleselect->color. ";";
}
}
if ($sz != 0)
{
$pinnerstylesid .= 'font-size:' . $sz . 'px;';
//echo "sz<br>";
}
$span = "<p style='". $pinnerstylesid . $pinnerstylesunderline ."'>";
$span .= "<img style='". $extent ."' alt='image coming soon' src ='". $imglnk ."'/>";
$span .= "</p>";
//echo $span;
$html .= $span;
}
}
return $html;
}
public function get_errors() {
return $this->errors;
}
private function getStyles() {
}
}
function getDocX($path)
{
//echo $path;
$doc = new Docx_reader();
$doc->setFile($path);
if(!$doc->get_errors()) {
$doc->processDocument();
$html = $doc->to_html();
echo $html;
}
return "";
}
?>
I want to be able to upload an MS word document and export it a page in my site.
Is there any way to accomplish this?
//FUNCTION :: read a docx file and return the string
function readDocx($filePath) {
// Create new ZIP archive
$zip = new ZipArchive;
$dataFile = 'word/document.xml';
// Open received archive file
if (true === $zip->open($filePath)) {
// If done, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// If found, read it to the string
$data = $zip->getFromIndex($index);
// Close archive file
$zip->close();
// Load XML from a string
// Skip errors and warnings
$xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
// Return data without XML formatting tags
$contents = explode('\n',strip_tags($xml->saveXML()));
$text = '';
foreach($contents as $i=>$content) {
$text .= $contents[$i];
}
return $text;
}
$zip->close();
}
// In case of failure return empty string
return "";
}
ZipArchive and DOMDocument are both inside PHP so you don't need to install/include/require additional libraries.
One may use PHPDocX.
It has support for practically all HTML CSS styles. Moreover you may use templates to add extra formatting to your HTML via the replaceTemplateVariableByHTML.
The HTML methods of PHPDocX also allow for the direct use of Word styles. You may use something like this:
$docx->embedHTML($myHTML, array('tableStyle' => 'MediumGrid3-accent5PHPDOCX'));
If you want that all your tables use the MediumGrid3-accent5 Word style. The embedHTML method as well as its version for templates (replaceTemplateVariableByHTML) preserve inheritance, meaning by that that you may use a predefined Word style and override with CSS any of its properties.
You may also extract selected parts of your HTML using 'JQuery type' selectors.
You can convert Word docx documents to html using Print2flash library. Here is an PHP excerpt from my client's site which converts a document to html:
include("const.php");
$p2fServ = new COM("Print2Flash4.Server2");
$p2fServ->DefaultProfile->DocumentType=HTML5;
$p2fServ->ConvertFile($wordfile,$htmlFile);
It converts a document which path is specified in $wordfile variable to a html page file specified by $htmlFile variable. All formatting, hyperlinks and charts are retained. You can get the required const.php file altogether with a fuller sample from Print2flash SDK.
this is a workaround based on David Lin's answer above
removing "w:" in a docx's xml tags leave behing Html like tags
function readDocx($filePath) {
// Create new ZIP archive
$zip = new ZipArchive;
$dataFile = 'word/document.xml';
// Open received archive file
if (true === $zip->open($filePath)) {
// If done, search for the data file in the archive
if (($index = $zip->locateName($dataFile)) !== false) {
// If found, read it to the string
$data = $zip->getFromIndex($index);
// Close archive file
$zip->close();
// Load XML from a string
// Skip errors and warnings
$xml = new DOMDocument("1.0", "utf-8");
$xml->loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING|LIBXML_PARSEHUGE);
$xml->encoding = "utf-8";
// Return data without XML formatting tags
$output = $xml->saveXML();
$output = str_replace("w:","",$output);
return $output;
}
$zip->close();
}
// In case of failure return empty string
return "";
}
Ok Im in very late, but thought I'd post this to save you all some time.
This is some php code I have put together not just to read the text from docx but the images too, currently it does not support floating images / text, but what I have done so far is a massive move forwards to whats already been posted on here - note you need to update https://example.co.uk to YOUR domain name.
<?php
class Docx_ws_imglnk {
public $originalpath = '';
public $extractedpath = '';
}
class Docx_ws_rel {
public $Id = '';
public $Target = '';
}
class Docx_ws_def {
public $styleId = '';
public $type = '';
public $color = '000000';
}
class Docx_p_def {
public $data = array();
public $text = "";
}
class Docx_p_item {
public $name = "";
public $value = "";
public $innerstyle = "";
public $type = "text";
}
class Docx_reader {
private $fileData = false;
private $errors = array();
public $rels = array();
public $imglnks = array();
public $styles = array();
public $document = null;
public $paragraphs = array();
public $path = '';
private $saveimgpath = 'docimages';
public function __construct() {
}
private function load($file) {
if (file_exists($file)) {
$zip = new ZipArchive();
$openedZip = $zip->open($file);
if ($openedZip === true) {
$this->path = $file;
//read and save images
for ( $i = 0; $i < $zip->numFiles; $i ++ ) {
$zip_element = $zip->statIndex( $i );
if ( preg_match( "([^\s]+(\.(?i)(jpg|jpeg|png|gif|bmp))$)", $zip_element['name'] ) ) {
$imglnk = new Docx_ws_imglnk;
$imglnk->originalpath = $zip_element['name'];
$imagename = explode( '/', $zip_element['name'] );
$imagename = end( $imagename );
$imglnk->extractedpath = dirname( __FILE__ ) . '/' . $this->savepath . $imagename;
$putres = file_put_contents( $imglnk->extractedpath, $zip->getFromIndex( $i ));
$imglnk->extractedpath = str_replace('var/www/', 'https://example.co.uk/', $imglnk->extractedpath);
$imglnk->extractedpath = substr($imglnk->extractedpath, 1);
array_push($this->imglnks, $imglnk);
}
}
//read relationships
if (($styleIndex = $zip->locateName('word/_rels/document.xml.rels')) !== false) {
$stylesRels = $zip->getFromIndex($styleIndex);
$xml = simplexml_load_string($stylesRels);
$XMLTEXT = $xml->saveXML();
$doc = new DOMDocument();
$doc->loadXML($XMLTEXT);
foreach($doc->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
if($childnode->hasAttributes())
{
$rel = new Docx_ws_rel;
for ($a = 0; $a < $childnode->attributes->count(); $a++)
{
$attrNode = $childnode->attributes->item($a);
if (strcmp( $attrNode->nodeName, 'Id') == 0)
{
$rel->Id = $attrNode->nodeValue;
}
if (strcmp( $attrNode->nodeName, 'Target') == 0)
{
$rel->Target = $attrNode->nodeValue;
}
}
array_push($this->rels, $rel);
}
}
}
//attempt to load styles:
if (($styleIndex = $zip->locateName('word/styles.xml')) !== false) {
$stylesXml = $zip->getFromIndex($styleIndex);
$xml = simplexml_load_string($stylesXml);
$XMLTEXT = $xml->saveXML();
$doc = new DOMDocument();
$doc->loadXML($XMLTEXT);
foreach($doc->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
//get style
if (strcmp($nodename, "w:style") == 0)
{
$ws_def = new Docx_ws_def;
for ($a=0; $a < $childnode->attributes->count(); $a++ )
{
$item = $childnode->attributes->item($a);
//style id
if (strcmp($item->nodeName, "w:styleId") == 0)
{
$ws_def->styleId = $item->nodeValue;
}
//style type
if (strcmp($item->nodeName, "w:type") == 0)
{
$ws_def->type = $item->nodeValue;
}
}
}
//push style to the array of styles
if (strcmp($ws_def->styleId, "") != 0 && strcmp($ws_def->type, "") != 0)
{
array_push($this->styles, $ws_def);
}
}
}
if (($index = $zip->locateName('word/document.xml')) !== false) {
$stylesDoc = $zip->getFromIndex($index);
$xml = simplexml_load_string($stylesDoc);
$XMLTEXT = $xml->saveXML();
$this->document = new DOMDocument();
$this->document->loadXML($XMLTEXT);
}
$zip->close();
} else {
switch($openedZip) {
case ZipArchive::ER_EXISTS:
$this->errors[] = 'File exists.';
break;
case ZipArchive::ER_INCONS:
$this->errors[] = 'Inconsistent zip file.';
break;
case ZipArchive::ER_MEMORY:
$this->errors[] = 'Malloc failure.';
break;
case ZipArchive::ER_NOENT:
$this->errors[] = 'No such file.';
break;
case ZipArchive::ER_NOZIP:
$this->errors[] = 'File is not a zip archive.';
break;
case ZipArchive::ER_OPEN:
$this->errors[] = 'Could not open file.';
break;
case ZipArchive::ER_READ:
$this->errors[] = 'Read error.';
break;
case ZipArchive::ER_SEEK:
$this->errors[] = 'Seek error.';
break;
}
}
} else {
$this->errors[] = 'File does not exist.';
}
}
public function setFile($path) {
$this->fileData = $this->load($path);
}
public function to_plain_text() {
if ($this->fileData) {
return strip_tags($this->fileData);
} else {
return false;
}
}
public function processDocument() {
$html = '';
foreach($this->document->documentElement->childNodes as $childnode)
{
$nodename = $childnode->nodeName;
//get the body of the document
if (strcmp($nodename, "w:body") == 0)
{
foreach($childnode->childNodes as $subchildnode)
{
$pnodename = $subchildnode->nodeName;
//process every paragraph
if (strcmp($pnodename, "w:p") == 0)
{
$pdef = new Docx_p_def;
foreach($subchildnode->childNodes as $pchildnode)
{
//process any inner children
if (strcmp($pchildnode, "w:pPr") == 0)
{
foreach($pchildnode->childNodes as $prchildnode)
{
//process text alignment
if (strcmp($prchildnode->nodeName, "w:pStyle") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'styleId';
$pitem->value = $prchildnode->attributes->getNamedItem('val')->nodeValue;
array_push($pdef->data, $pitem);
}
//process text alignment
if (strcmp($prchildnode->nodeName, "w:jc") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'align';
$pitem->value = $prchildnode->attributes->getNamedItem('val')->nodeValue;
if (strcmp($pitem->value, "left") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "center") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "right") == 0)
{
$pitem->innerstyle .= "text-align:" . $pitem->value . ";";
}
if (strcmp($pitem->value, "both") == 0)
{
$pitem->innerstyle .= "word-spacing:" . 10 . "px;";
}
array_push($pdef->data, $pitem);
}
//process drawing
if (strcmp($prchildnode->nodeName, "w:drawing") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'drawing';
$pitem->value = '';
$pitem->type = 'graphic';
$extents = $prchildnode->getElementsByTagName('extent')[0];
$cx = $extents->attributes->getNamedItem('cx')->nodeValue;
$cy = $extents->attributes->getNamedItem('cy')->nodeValue;
$pcx = (int)$cx / 9525;
$pcy = (int)$cy / 9525;
$pitem->innerstyle .= "width:" . $pcx . "px;";
$pitem->innerstyle .= "height:" . $pcy . "px;";
$blip = $prchildnode->getElementsByTagName('blip')[0];
$pitem->value = $blip->attributes->getNamedItem('embed')->nodeValue;
array_push($pdef->data, $pitem);
}
//process spacing
if (strcmp($prchildnode->nodeName, "w:spacing") == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'paragraphSpacing';
$bval = $prchildnode->attributes->getNamedItem('before')->nodeValue;
if (strcmp($bval, '') == 0)
$bval = 0;
$pitem->innerstyle .= "padding-top:" . $bval . "px;";
$aval = $prchildnode->attributes->getNamedItem('after')->nodeValue;
if (strcmp($aval, '') == 0)
$aval = 0;
$pitem->innerstyle .= "padding-bottom:" . $aval . "px;";
array_push($pdef->data, $pitem);
}
}
}
if (strcmp($pchildnode, "w:r") == 0)
{
foreach($pchildnode->childNodes as $rchildnode)
{
//process text
if (strcmp($rchildnode->nodeName, "w:t") == 0)
{
$pdef->text .= $rchildnode->nodeValue;
if (count($pdef->data) == 0)
{
$pitem = new Docx_p_item;
$pitem->name = 'styleId';
$pitem->value = '';
array_push($pdef->data, $pitem);
}
}
if (strcmp($rchildnode->nodeName, "w:rPr") == 0)
{
foreach($rchildnode->childNodes as $rPrchildnode)
{
if (strcmp($rPrchildnode->nodeName, "w:b") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textBold';
$pitem->value = '';
$pitem->innerstyle .= "text-weight: 500;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:i") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textItalic';
$pitem->value = '';
$pitem->innerstyle .= "text-style: italic;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:u") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textUnderline';
$pitem->value = '';
$pitem->innerstyle .= "text-decoration: underline;";
array_push($pdef->data, $pitem);
}
if (strcmp($rPrchildnode->nodeName, "w:sz") == 0 )
{
$pitem = new Docx_p_item;
$pitem->name = 'textSize';
$sz = $rPrchildnode->attributes->getNamedItem('val')->nodeValue;
if ($sz == '')
{
$sz=0;
}
$pitem->value = $sz;
array_push($pdef->data, $pitem);
}
}
}
}
}
}
array_push($this->paragraphs, $pdef);
}
}
}
}
}
public function to_html()
{
$html = '';
foreach($this->paragraphs as $para)
{
$styleselect = null;
$type = 'text';
$content = $para->text;
$sz = 0;
$extent = '';
$embedid = '';
$pinnerstylesid = '';
$pinnerstylesunderline = '';
$pinnerstylessz = '';
if (count($para->data) > 0)
{
foreach($para->data as $node)
{
if (strcmp($node->name, "styleId") == 0)
{
$type = $node->type;
$pinnerstylesid = $node->innerstyle;
foreach($this->styles as $style)
{
if (strcmp ($node->value, $style->styleId) == 0)
{
$styleselect = $style;
}
}
}
if (strcmp($node->name, "align") == 0)
{
$pinnerstylesid .= $node->innerstyle. ";";
}
if (strcmp($node->name, "drawing") == 0)
{
$type = $node->type;
$extent = $node->innerstyle;
$embedid = $node->value;
}
if (strcmp($node->name, "textSize") == 0)
{
$sz = $node->value;
}
if (strcmp($node->name, "textUnderline") == 0)
{
$pinnerstylesunderline = $node->innerstyle;
}
}
}
if (strcmp($type, 'text') == 0)
{
//echo "has valid para";
//echo "<br>";
if ($styleselect != null)
{
//echo "has valid style";
//echo "<br>";
if (strcmp($styleselect->color, '') != 0)
{
$pinnerstylesid .= "color:#" . $styleselect->color. ";";
}
}
if ($sz != 0)
{
$pinnerstylesid .= 'font-size:' . $sz . 'px;';
//echo "sz<br>";
}
$span = "<p style='". $pinnerstylesid . $pinnerstylesunderline ."'>";
$span .= $content;
$span .= "</p>";
//echo $span;
$html .= $span;
}
if (strcmp($type, 'graphic') == 0)
{
$imglnk = '';
foreach($this->rels as $rel)
{
if(strcmp($embedid, '') != 0 && strcmp($rel->Id, $embedid) == 0)
{
foreach($this->imglnks as $imgpathdef)
{
if (strpos($imgpathdef->extractedpath, $rel->Target) >= 0)
{
$imglnk = $imgpathdef->extractedpath;
//echo "has img link<br>";
//echo $imglnk . "<br>";
}
}
}
}
if ($styleselect != null)
{
//echo "has valid style";
//echo "<br>";
if (strcmp($styleselect->color, '') != 0)
{
$pinnerstylesid .= "color:#" . $styleselect->color. ";";
}
}
if ($sz != 0)
{
$pinnerstylesid .= 'font-size:' . $sz . 'px;';
//echo "sz<br>";
}
$span = "<p style='". $pinnerstylesid . $pinnerstylesunderline ."'>";
$span .= "<img style='". $extent ."' alt='image coming soon' src ='". $imglnk ."'/>";
$span .= "</p>";
//echo $span;
$html .= $span;
}
}
return $html;
}
public function get_errors() {
return $this->errors;
}
private function getStyles() {
}
}
function getDocX($path)
{
//echo $path;
$doc = new Docx_reader();
$doc->setFile($path);
if(!$doc->get_errors()) {
$doc->processDocument();
$html = $doc->to_html();
echo $html;
}
return "";
}
?>