DOM document character error - php

The character encoding of the HTML document was not declared. The document will render with garbled text in some browser configurations if the document contains characters from outside the US-ASCII range. The character encoding of the page must be declared in the document or in the transfer protocol.
dom create error like above,, and page return blank when I create new element in meta and data node like this :
<?php
$this->module->daftarkanJs('underscore-min.js');
$form = CJSON::decode(file_get_contents(Yii::app()->getBaseUrl(true).'/index.php/odk/api/index/id/'.$_GET['id']));
$input = CJSON::decode(file_get_contents(Yii::app()->getBaseUrl(true).'/index.php/odk/api/input/id/'.$_GET['id']));
function haveChild($id, $input_id){
$child = CJSON::decode(file_get_contents(Yii::app()->getBaseUrl(true).'/index.php/odk/api/child/id/'.$id.'/parentId/'.$input_id));
if($child['result']){ // jika child ada
foreach($child['result'] as $data){
// echo '- <b>'.$data['input_id'].'</b><br/>';
haveChild($id, $data['input_id']);
}
return true;
}else{
return false;
}
}
function loop($inputResult, $id){
$dom = new DOMDocument('1.0', 'utf-8');
$dom->preserveWhiteSpace = false;
$dom->formatOutput = true;
$html = $dom->createElementNS('http://www.w3.org/2002/xforms', 'h:html');
$html->setAttributeNS('http://www.w3.org/2000/xmlns/' ,'xmlns:h', 'http://www.w3.org/1999/xhtml');
$html->setAttributeNS('http://www.w3.org/2000/xmlns/' ,'xmlns:ev', 'http://www.w3.org/2001/xml-events');
$html->setAttributeNS('http://www.w3.org/2000/xmlns/' ,'xmlns:xsd', 'http://www.w3.org/2001/XMLSchema');
$html->setAttributeNS('http://www.w3.org/2000/xmlns/' ,'xmlns:jr', 'http://openrosa.org/javarosa');
$html = $dom->appendChild($html);
$head = $dom->createElement('h:h');
$head = $html->appendChild($head);
$title = $dom->createElement('h:t', 'xxxxxxx');
$title = $head->appendChild($title);
$model = $dom->createElement('m');
$model = $head->appendChild($model);
$instance = $dom->createElement('instance');
$instance = $model->appendChild($instance);
$data = $dom->createElement('data');
$data = $instance->appendChild($data);
$meta = $dom->createElement('meta');
$meta = $data->appendChild($meta);
$instanceID = $dom->createElement('instaceID');
$instanceID = $meta->appendChild($instanceID);
$bind = $dom->createElement('bind');
$bind->setAttribute("nodeset","/data/meta/instanceID");
$bind = $model->appendChild($bind);
foreach($inputResult as $data){
if(!$data['parent_id']){ // ambil yang bukan child
$check = haveChild($id, $data['input_id']);
if(!$check){
$data = $dom->createElement('data');
$data = $instance->appendChild($data);
$meta = $dom->createElement('meta');
$meta = $data->appendChild($meta);
$bind = $dom->createElement('bind');
$bind->setAttribute("nodeset","/data/".str_replace(" ", "_", $data['name']));
$bind = $model->appendChild($bind);
}
}
}
$body = $dom->createElement('h:b');
$body = $html->appendChild($body);
printf ("<pre>%s</pre>", htmlentities ($dom->saveXML()));
}
loop($input['result'], $_GET['id']);
?>

error in this line :
$data = $dom->createElement('data');
$data = $instance->appendChild($data);
$meta = $dom->createElement('meta');
$meta = $data->appendChild($meta);

You don't output the XML, but HTML with escaped XML.
<pre>some escaped xml</pre>
This output matches the error message if it is treated as XML. Here is no XML declaration with an encoding.
Stripped down to the DOM methods, your source outputs an XML document: https://eval.in/private/1507ef8a4065d0.
However, I suggest to use createElementNS() for ALL namespaced nodes. Calls like $dom->createElement('h:h'); are ambiguous.
$xmlns = [ 'h' => 'http://www.w3.org/1999/xhtml' ];
$dom = new DOMDocument();
$html = $dom->appendChild(
$dom->createElementNS($xmlns['h'], 'h:html')
);
$head = $html->appendChild($dom->createElementNS($xmlns['h'], 'h:head'));
echo $dom->saveXml();
Output:
<?xml version="1.0"?>
<h:html xmlns:h="http://www.w3.org/1999/xhtml"><h:head/></h:html>

Related

Extract http-equiv content with php

I'm trying to extract all meta http-equiv properties from url.
Here is the code
function fetch_http_equiv($url)
{
$data = file_get_contents($url);
$dom = new DomDocument;
#$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$metas = $xpath->query('//*/meta[starts-with(#http-equiv)]');
$http_equiv = array();
foreach($metas as $meta){
$property = $meta->getAttribute('http-equiv');
$content = $meta->getAttribute('content');
$http_equiv[$property] = $content;
}
return $http_equiv;
}
// fetch meta http-equiv 's
$http_equiv = fetch_http_equiv($link);
// if $http_equiv Content-Language exists
if (empty($http_equiv['Content-Language'])) {
}else{
$meta_content_language = $http_equiv['Content-Language'];
}
For the love of god In my mind it should work, what did I missed ?
edit:
I found a problem; I did changed
$property = $meta->getAttribute('http_equiv');
to
$property = $meta->getAttribute('http-equiv');
case solved.
I found a problem; I did changed
$property = $meta->getAttribute('http_equiv');
to
$property = $meta->getAttribute('http-equiv');
case solved.
Code works now.

php DOMDocument createTextNode from blob field(text) not showing data

I am trying to create XML using DOMDocument from database table. All field types are showing in XML node except BLOB Type.
Below what I did:
$rs = ibase_query("SELECT * FROM mytable");
$coln = ibase_num_fields($rs);
$fieldnames = array();
for ($i = 0; $i < $coln; $i++) {
$col_info = ibase_field_info($rs, $i);
$fieldnames[] = array('name' => $col_info['name'], 'type' => $col_info['type']);
}
$doc = new DOMDocument('1.0');
$sth = ibase_query($dbh, $stmt);
$doc->formatOutput = true;
$root = $doc->createElement('FA_ARTIKEL');
$root = $doc->appendChild($root);
while ($row = ibase_fetch_object($sth, IBASE_TEXT)) {
$title = $doc->createElement('RECORD');
$title = $root->appendChild($title);
$text = $doc->createTextNode('');
$text = $title->appendChild($text);
foreach ($fieldnames as $value) {
switch ($value['type']) {
case 'VARCHAR':
$rtitle = $doc->createElement($value['name']);
$rtitle = $title->appendChild($rtitle);
$rtext = $doc->createTextNode($row->$value['name']);
$rtext = $rtitle->appendChild($rtext);
break;
case 'BLOB':
$rbtitle = $doc->createElement($value['name']);
$rbtitle = $title->appendChild($rbtitle);
$rbtext = $doc->createTextNode($row->$value['name']);
$rbtext = $rbtitle->appendChild($rbtext);
break;
default:
if ($row->$value['name']) {
$rtitle = $doc->createElement($value['name']);
$rtitle = $title->appendChild($rtitle);
$rtext = $doc->createTextNode($row->$value['name']);
$rtext = $rtitle->appendChild($rtext);
} else {
$rtitle = $doc->createElement($value['name']);
$rtitle = $title->appendChild($rtitle);
$rtext = $doc->createTextNode('0');
$rtext = $rtitle->appendChild($rtext);
}
break;
}
}
}
Header('Content-type: text/xml');
echo $doc->saveXML() . "\n";
ibase_free_result($sth);
ibase_close($dbh);
I tried with SimpleXMLElement also but it also failed. What I am missing?
My Database is Firebird and I set BLOB fields as
BLOB SUB_TYPE 1 SEGMENT SIZE 16384
PHPs DOMDocument expects UTF-8 strings. It is possible that the blob contains control characters/invalid unicode sequences. Try to put the data that breaks the XML into a variable and reduce your problem to the absolute minimum.
$blobData = $record['blobField'];
$document = new DOMDocument();
$document
->appendChild($document->createElement('foo'))
->appendChild($document->createTextNode($blobData));
echo $document->saveXml();
This way you can see if the blob data is really the problem or merely a symptom.
If the BLOB contains binary data you will need to convert it into a TEXT format. Atom feeds for example urlencode binary data that they want to embed. In this you will need to decode the value in the reading program.

How to save retrived URLs in a xml file using php?

I can store normal string. But if I tried to store GET method url it can not store.
function updateX_xml($id,$val,$addre){
$xml = new DOMDocument();
$xml->load('autoGen/autoGen.xml');
$node = $xml->getElementsByTagName('root')->item(0) ;
$xml_id = $xml->createElement("id");
$xml_addres = $xml->createElement("Address");
$domAttribute = $xml->createAttribute('type');
$domAttribute->value = 'xs:string';
$xml_addres->appendChild($domAttribute);
$xml_url = $xml->createElement("url");
$xml_id->nodeValue=$id;
$xml_url->nodeValue=$val;
$xml_addres->nodeValue=$addre;
$node->appendChild( $xml_id );
$node->appendChild( $xml_url );
$xml->formatOutput = true;
$xml->save("autoGen/autoGen.xml");
}
if i call this function like this updateX_xml(1,'getdata?event_id=1 &lan=en',"addaress"); it is not working.
This will generate this warning. Warning: updateX_xml(): unterminated entity reference lan=en in C:\xampp\htdocs\test_file_read\gen_url.php on line 25
If you are inserting something into XML/HTML you should always use the htmlspecialchars function. this will escape your strings into correct XML syntax.
So:
function updateX_xml($id,$val,$addre)
{
$xml = new DOMDocument();
$xml->load('autoGen/autoGen.xml');
$node = $xml->getElementsByTagName('root')->item(0) ;
$xml_id = $xml->createElement("id");
$xml_addres = $xml->createElement("Address");
$domAttribute = $xml->createAttribute('type');
$domAttribute->value = 'xs:string';
$xml_addres->appendChild($domAttribute);
$xml_url = $xml->createElement("url");
$xml_id->nodeValue=$id;
$xml_url->nodeValue = htmlspecialchars($val);
$xml_addres->nodeValue=$addre;
$node->appendChild( $xml_id );
$node->appendChild( $xml_url );
$xml->formatOutput = true;
$xml->save("autoGen/autoGen.xml");
}
You have to escape HTML entity character:
$val= htmlentities($str, ENT_XML1);
$xml_url->nodeValue=$val;
Try without space between parameters:
updateX_xml(1,'getdata?event_id=1&lan=en',"addaress");
Also, as others mentioned, you need to escape the "&", as it's a special character in xml using htmlspecialchars() :
$xml_url->nodeValue = htmlspecialchars($val);

The characters in my HTML saved from DOMdocument become escaped

I have an irritating problem using PHP's DOMdocument. I have loaded HTML, and changed some of the element's attributes. I want to save the changed HTML, and output it.
The strange thing is, when I use ->saveHTML() or ->saveXML() my closing tags' slashes become escaped. I could remove the escaping with regex, but I would like to know if there is any cleaner way...
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML ($roosterHTML);
$dom->preserveWhiteSpace = false;
libxml_clear_errors();
libxml_use_internal_errors(false);
$tables = $dom->getElementsByTagName('table');
$cols = $tables->item(0)->getElementsByTagName('td');
$name = preg_replace("/(\\n|\\r| )/", "", $cols->item(3)->nodeValue);
$sirname = preg_replace("/(\\n|\\r| )/", "", $cols->item(2)->nodeValue);
$class = preg_replace("/(\\n|\\r| )/", "", $cols->item(1)->nodeValue);
$header = "Rooster van $name $sirname ($class)";
$rooster = $tables->item(1);
$firstRow = true;
foreach ($rooster->getElementsByTagName('tr') as $row) {
if ($firstRow) {
$firstRow = false;
continue;
}
$firstCol = true;
foreach ($row->getElementsByTagName('td') as $col) {
if ($firstCol) {
$firstCol = false;
continue;
}
$text = $col->nodeValue;
$col->setAttribute('style','background-color:#FF0');
//$return.= $text;
}
}
$rooster = $dom->saveXML($rooster);
Testing (just click submit, to send a POST value):
http://bit.ly/ymK3DA
No, the escaped is caused by the json
which mean this page is not output HTML but json-alike plain text

php DOMDocument->getElementById->nodeValue sripping html

I am using php's DOMDocument->getElementById->nodeValue to set a particular DOM element's HTML. The problem is that the string is converted to HTML entities:
eg:
nodeValue = html_entity_decode('<b>test</b>'); should output 'test' but instead it outputs '<b>test</b>'
Any ideas why? This happens even if i don't use the html_entity_decode function
Here is my updated script...which is NOW working:
// Construct a DOM object for updating the affected node
$html = new DOMDocument("1.0", "utf-8");
if (!$html) return FALSE;
// Load the HTML file in question
$loaded = $html->loadHTMLFile($data['page_path']);
if (!$loaded)
{
print 'Failed to load file';
return FALSE;
}
// Establish the node being updated within the file
foreach ($data['divids'] as $divid)
{
$element = $html->getElementById($divid);
if (is_null($element))
{
print 'Failed to get existing element';
return FALSE;
}
$newelement = $html->createElement('div');
if (is_null($newelement))
{
print 'Failed to create new element';
return FALSE;
}
$newelement->setAttribute('id', $divid);
$newelement->setAttribute('class', 'reusable-block');
// Perform the replacement
$newelement->nodeValue = $replacement;
$parent = $element->parentNode;
$parent->replaceChild($newelement, $element);
// Save the file back to its location
$saved = $html->saveHTMLFile($data['page_path']);
if (!$saved)
{
print 'Failed to save file';
return FALSE;
}
}
// Replace HTML entities left over
$content = files::readFile($data['page_path']);
$content = str_replace('<', '<', $content);
$content = str_replace('>', '>', $content);
if (!#fwrite($handle, $content))
{
print 'Failed to replace entities';
return FALSE;
}
This is proper behavior - your tag is being converted to a string, and strings in XML can't contain angle brackets (only tags can). Try converting the HTML into a DOMNode and appending it instead:
$node = $mydoc->createElement("b");
$node->nodeValue = "test";
$mydoc->getElementById("whatever")->appendChild($node);
Update with working example:
$html = '<html>
<body id="myBody">
<b id="myBTag">my old element</b>
</body>
</html>';
$mydoc = new DOMDocument("1.0", "utf-8");
$mydoc->loadXML($html);
// need to do this to get getElementById() to work
$all_tags = $mydoc->documentElement->getElementsByTagName("*");
foreach ($all_tags as $element) {
$element->setIdAttribute("id", true);
}
$current_b_tag = $mydoc->getElementById("myBTag");
$new_b_tag = $mydoc->createElement("b");
$new_b_tag->nodeValue = "my new element";
$result = $mydoc->getElementById("myBody");
$result->replaceChild($new_b_tag, $current_b_tag);
echo $mydoc->saveXML($mydoc->documentElement);

Categories