$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $requestXML);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
This is working pretty straight forward with a test xml string.
$requestXML = "<?xml version='1.0' encoding='utf-8'?><request>
<id>12345</id>
<email>eoin#dolepaddy.com</email><request>";
(https://magp.ie/2010/04/12/post-xml-with-https-authentication-using-php-curl/)
I am combining this with this:
function xml_encode($mixed, $domElement=null, $DOMDocument=null) {
if (is_null($DOMDocument)) {
$DOMDocument = new DOMDocument('1.0', 'utf-8');
$DOMDocument->formatOutput = true;
xml_encode($mixed, $DOMDocument, $DOMDocument);
echo $DOMDocument->saveXML();
}
else {
if (is_array($mixed)) {
foreach ($mixed as $index => $mixedElement) {
if (is_int($index)) {
if ($index === 0) {
$node = $domElement;
}
else {
$node = $DOMDocument->createElement($domElement->tagName);
$domElement->parentNode->appendChild($node);
}
}
else {
$plural = $DOMDocument->createElement($index);
$domElement->appendChild($plural);
$node = $plural;
if (rtrim($index,'s')!==$index && count($mixedElement)>1) {
$singular = $DOMDocument->createElement(rtrim($index, 's'));
$plural->appendChild($singular);
$node = $singular;
}
}
xml_encode($mixedElement, $node, $DOMDocument);
}
}
else {
$domElement->appendChild($DOMDocument->createTextNode($mixed));
}
}
}
to generate the xml from an array (saving it to $requestXML)
(https://www.darklaunch.com/2009/05/23/php-xml-encode-using-domdocument-convert-array-to-xml-json-encode)
It works with the test xml, but unfortunately not with generated XML. I concat the $requestXML like this:
$requestXML = "&data=";$requestXML .= xml_encode($data); // if the test xml comes here, everything works well
I guess the generated xml is not really a string, so that's maybe it is not send with the POSTFIELDS?
I am using two JSON feed sources and PHP to display a real estate property slideshow with agents on a website. The code was working prior to the feed provider making changes to where they store property and agent images. I have made the necessary adjustments for the images, but the feed data is not working now. I have contacted the feed providers about the issue, but they say the problem is on my end. No changes beyond the image URLs were made, so I am unsure where the issue may be. I am new to JSON, so I might be missing something. I have included the full script below. Here are the two JSON feed URLs: http://century21.ca/FeaturedDataHandler.c?DataType=4&EntityType=2&EntityID=2119 and http://century21.ca/FeaturedDataHandler.c?DataType=3&AgentID=27830&RotationType=1. The first URL grabs all of the agents and the second grabs a single agent's properties. The AgentID value is sourced from the JSON feed URL dynamically.
class Core
{
private $base_url;
private $property_image_url;
private $agent_id;
private $request_agent_properties_url;
private $request_all_agents_url;
private function formatJSON($json)
{
$from = array('Props:', 'Success:', 'Address:', ',Price:', 'PicTicks:', ',Image:', 'Link:', 'MissingImage:', 'ShowingCount:', 'ShowcaseHD:', 'ListingStatusCode:', 'Bedrooms:', 'Bathrooms:', 'IsSold:', 'ShowSoldPrice:', 'SqFootage:', 'YearBuilt:', 'Style:', 'PriceTypeDesc:');
$to = array('"Props":', '"Success":', '"Address":', ',"Price":', '"PicTicks":', ',"Image":', '"Link":', '"MissingImage":', '"ShowingCount":', '"ShowcaseHD":', '"ListingStatusCode":', '"Bedrooms":', '"Bathrooms":', '"IsSold":', '"ShowSoldPrice":', '"SqFootage":', '"YearBuilt":', '"Style":', '"PriceTypeDesc":' );
return str_ireplace($from, $to, $json); //returns the clean JSON
}
function __construct($agent=false)
{
$this->base_url = 'http://www.century21.ca';
$this->property_image_url = 'http://images.century21.ca';
$this->agent_id = ($agent ? $agent : false);
$this->request_all_agents_url =
$this->base_url.'/FeaturedDataHandler.c?DataType=4&EntityType=3&EntityID=3454';
$this->request_agent_properties_url =
$this->base_url.'/FeaturedDataHandler.c?DataType=3'.'&AgentID='.$this->agent_id.'&RotationType=1';
}
/**
* getSlides()
*/
function getSlides()
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->request_all_agents_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, 0);
$response = curl_exec($ch);
curl_close($ch);
if (empty($response))
return false;
else
$agents = $this->decode_json_string($response);
// Loop Agents And Look For Requested ID
foreach ($agents as $agent)
{
if (($this->agent_id != false) && (isset($agent['WTLUserID'])) && ($agent['WTLUserID'] != $this->agent_id))
{
continue; // You have specified a
}
$properties = $this->getProperties($agent['WTLUserID']);
$this->print_property_details($properties, $agent);
}
}
/**
* getProperties()
*/
function getProperties($agent_id)
{
$url = $this->base_url.'/FeaturedDataHandler.c?DataType=3'.'&AgentID='.$agent_id.'&RotationType=1';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, 0);
$response = curl_exec($ch);
curl_close($ch);
$json = json_decode($response);
if (empty($response))
die('No response 2'); //return false;
else
$json = $this->formatJSON($this->decode_json_string($response));
var_dump($json);
die();
// return $json;
}
/**
* print_property_details()
*/
function print_property_details($properties, $agent, $html='')
{
$BASE_URL = $this->base_url;
$PROPERTY_IMAGE_URL = $this->property_image_url;
foreach ($properties as $property)
{
$img = $property['Image'];
// $img = ($property['Image'] ? $property['Image'] : "some url to a dummy image here")
if($property['ListingStatusCode'] != 'SOLD'){
$address = $property['Address'];
$shortaddr = substr($address, 0, -12);
$html .= "<div class='listings'>";
$html .= "<div class='property-image'>";
$html .= "<img src='". $PROPERTY_IMAGE_URL ."' width='449' height='337' alt='' />";
$html .= "</div>";
$html .= "<div class='property-info'>";
$html .= "<span class='property-price'>". $property['Price'] ."</span>";
$html .= "<span class='property-street'>". $shortaddr ."</span>";
$html .= "</div>";
$html .= "<div class='agency'>";
$html .= "<div class='agent'>";
$html .= "<img src='". $agent['PhotoUrl']. "' class='agent-image' width='320' height='240' />";
$html .= "<span class='agent-name'><b>Agent:</b>". $agent['DisplayName'] ."</span>";
$html .= "</div>";
$html .= "</div>";
$html .= "</div>";
}
}
echo $html;
}
function decode_json_string($json)
{
// Strip out junk
$strip = array("{\"Agents\": [","{Props: ",",Success:true}",",\"Success\":true","\r","\n","[{","}]");
$json = str_replace($strip,"",$json);
// Instantiate array
$json_array = array();
foreach (explode("},{",$json) as $row)
{
/// Remove commas and colons between quotes
if (preg_match_all('/"([^\\"]+)"/', $row, $match)) {
foreach ($match as $m)
{
$row = str_replace($m,str_replace(",","|comma|",$m),$row);
$row = str_replace($m,str_replace(":","|colon|",$m),$row);
}
}
// Instantiate / clear array
$array = array();
foreach (explode(',',$row) as $pair)
{
$var = explode(":",$pair);
// Add commas and colons back
$val = str_replace("|colon|",":",$var[1]);
$val = str_replace("|comma|",",",$val);
$val = trim($val,'"');
$val = trim($val);
$key = trim($var[0]);
$key = trim($key,'{');
$key = trim($key,'}');
$array[$key] = $val;
}
// Add to array
$json_array[] = $array;
}
return $json_array;
}
}
Try this code to fix the JSON:
$url = 'http://century21.ca/FeaturedDataHandler.c?DataType=3&AgentID=27830&RotationType=1';
$invalid_json = file_get_contents($url);
$json = preg_replace("/([{,])([a-zA-Z][^: ]+):/", "$1\"$2\":", $invalid_json);
var_dump($json);
All your keys need to be double-quoted
JSON on the second URL is not a valid JSON, that's why you're not getting the reults, as PHP unable to decode that feed.
I tried to process it, and get this error
Error: Parse error on line 1:
{Props: [{Address:"28
-^
Expecting 'STRING', '}'
Feed image for first URL
and here is view of 2nd URL's feed
as per error for second feed, all the keys should be wrapped within " as these are strings rather than CONSTANTS.
e.g.
Props should be "Props" and all other too.
EDIT
You need to update your functionand add this one(formatJSON($json)) to your class
// Update this function, just need to update last line of function
function getProperties($agent_id)
{
$url = $this->base_url.'/FeaturedDataHandler.c?DataType=3'.'&AgentID='.$agent_id.'&RotationType=1';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, 0);
$response = curl_exec($ch);
curl_close($ch);
$json = json_decode($response);
if (empty($response))
die('No response 2'); //return false;
else
return $this->formatJSON($this->decode_json_string($response)); //this one only need to be updated.
}
//add this function to class. This will format json
private function formatJSON($json){
$from= array('Props:', 'Success:', 'Address:', ',Price:', 'PicTicks:', ',Image:', 'Link:', 'MissingImage:', 'ShowingCount:', 'ShowcaseHD:', 'ListingStatusCode:', 'Bedrooms:', 'Bathrooms:', 'IsSold:', 'ShowSoldPrice:', 'SqFootage:', 'YearBuilt:', 'Style:', 'PriceTypeDesc:');
$to = array('"Props":', '"Success":', '"Address":', ',"Price":', '"PicTicks":', ',"Image":', '"Link":', '"MissingImage":', '"ShowingCount":', '"ShowcaseHD":', '"ListingStatusCode":', '"Bedrooms":', '"Bathrooms":', '"IsSold":', '"ShowSoldPrice":', '"SqFootage":', '"YearBuilt":', '"Style":', '"PriceTypeDesc":' );
return str_ireplace($from, $to, $json); //returns the clean JSON
}
EDIT
I've tested that function, and it's working fine, may be there is something wrong with your function decode_json_string($json)
I've taken unclean json from second URL, and cleaning it here, and putting that cleaned json in json editor to check either it's working or not HERE
I'm not sure I am going about this the right way but I am trying to echo out individual elements of data from an array, but not succeeding, I only need to grab around 10 variables for average fuel consumption from an XML File here: https://www.fueleconomy.gov/ws/rest/ympg/shared/vehicles?make=honda&model=civic
I only need make, model, year avgMpg which is a child of youMpgVehicle etc so I can place them within a table in the same was as you can echo out SQL data within PHP.
function download_page($path){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$path);
curl_setopt($ch, CURLOPT_FAILONERROR,1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
//curl_setopt($ch, CURLOPT_SSLVERSION,3);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
//curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
$retValue = curl_exec($ch);
curl_close($ch);
return $retValue;
}
$sXML = download_page('https://www.fueleconomy.gov/ws/rest/ympg/shared/vehicles?make=honda&model=civic');
$oXML = new SimpleXMLElement($sXML);
$dom = new DomDocument();
$dom->loadXml($sXML);
$dataElements = $dom->getElementsByTagName('vehicle');
$array = array();
foreach ($dataElements as $element) {
$subarray = array();
foreach ($element->childNodes as $node) {
if (!$node instanceof DomElement) {
continue;
}
$key = $node->tagName;
$value = $node->textContent;
$subarray[$key] = $value;
}
$array[] = $subarray;
// var_dump($array); // returns the array as expected
var_dump($array[0]["barrels08"]); //how can I get this and other variables?
}
The structure is like this: (Or you can click on the hyperlink above)
-<vehicles>
-<vehicle>
<atvType/>
<barrels08>10.283832</barrels08>
<barrelsA08>0.0</barrelsA08>
<charge120>0.0</charge120>
<charge240>0.0</charge240>
<city08>28</city08>
<city08U>28.0743</city08U>
<cityA08>0</cityA08>
<cityA08U>0.0</cityA08U>
<cityCD>0.0</cityCD>
<cityE>0.0</cityE>
<cityUF>0.0</cityUF>
<co2>279</co2>
<co2A>-1</co2A>
<co2TailpipeAGpm>0.0</co2TailpipeAGpm>
<co2TailpipeGpm>279.0</co2TailpipeGpm>
<comb08>32</comb08>
<comb08U>31.9768</comb08U>
<combA08>0</combA08>
<combA08U>0.0</combA08U>
<combE>0.0</combE>
<combinedCD>0.0</combinedCD>
<combinedUF>0.0</combinedUF>
<cylinders>4</cylinders>
<displ>1.8</displ>
<drive>Front-Wheel Drive</drive>
<engId>18</engId>
<eng_dscr/>
<evMotor/>
<feScore>8</feScore>
<fuelCost08>1550</fuelCost08>
<fuelCostA08>0</fuelCostA08>
<fuelType>Regular</fuelType>
<fuelType1/>
<fuelType2/>
<ghgScore>8</ghgScore>
<ghgScoreA>-1</ghgScoreA>
<guzzler/>
<highway08>39</highway08>
<highway08U>38.5216</highway08U>
<highwayA08>0</highwayA08>
<highwayA08U>0.0</highwayA08U>
<highwayCD>0.0</highwayCD>
<highwayE>0.0</highwayE>
<highwayUF>0.0</highwayUF>
<hlv>0</hlv>
<hpv>0</hpv>
<id>33504</id>
<lv2>12</lv2>
<lv4>12</lv4>
<make>Honda</make>
<mfrCode>HNX</mfrCode>
<model>Civic</model>
<mpgData>Y</mpgData>
<phevBlended>false</phevBlended>
<pv2>83</pv2>
<pv4>95</pv4>
<rangeA/>
<rangeCityA>0.0</rangeCityA>
<rangeHwyA>0.0</rangeHwyA>
<trans_dscr/>
<trany>Automatic 5-spd</trany>
<UCity>36.4794</UCity>
<UCityA>0.0</UCityA>
<UHighway>55.5375</UHighway>
<UHighwayA>0.0</UHighwayA>
<VClass>Compact Cars</VClass>
<year>2013</year>
<youSaveSpend>3000</youSaveSpend>
-
33.612226599
45
55
47
28
16
33504
You don't actually need to put everything into an array if you just want to display the data. SimpleXML makes it very simple to handle XML data. If I may suggest a maybe less complex solution:
<?php
function getFuelDataAsXml($make, $model)
{
// In most cases CURL is overkill, unless you need something more complex
$data = file_get_contents("https://www.fueleconomy.gov/ws/rest/ympg/shared/vehicles?make={$make}&model={$model}");
// If we got some data, return it as XML, otherwise return null
return $data ? simplexml_load_string($data) : null;
}
// get the data for a specific make and model
$data = getFuelDataAsXml('honda', 'civic');
// iterate over all vehicle-nodes
foreach($data->vehicle as $vehicleData)
{
echo $vehicleData->barrels08 . '<br />';
echo $vehicleData->yourMpgVehicle->avgMpg . '<br />';
echo '<hr />';
}
To fetch data from an DOM use Xpath:
$url = "https://www.fueleconomy.gov/ws/rest/ympg/shared/vehicles?make=honda&model=civic";
$dom = new DomDocument();
$dom->load($url);
$xpath = new DOMXpath($dom);
foreach ($$xpath->evaluate('/*/vehicle') as $vehicle) {
var_dump(
array(
$xpath->evaluate('string(fuelType)', $vehicle),
$xpath->evaluate('number(fuelCost08)', $vehicle),
$xpath->evaluate('number(barrels08)', $vehicle)
)
);
}
Most Xpath expressions return an a list of nodes that can be iterated using foreach. Using number() or string() will cast the value or content of the first node into a float or string. If the list was empty you will get an empty value.
This question already has answers here:
How do you parse and process HTML/XML in PHP?
(31 answers)
Closed 9 years ago.
Just wondering how this would be done. Let's say there's a simple HTML table on an external website, and you have a database with the same structure as that HTML table. I understand that you can use file_get_contents to grab that entire web page.
From there, I would assume that you would remove everything from your file_get_contents except for the stuff between the <table></table> tags, thus isolating the table containing the data you wish to write.
What is the next step? Assuming your database table structure matches the structure of the HTML table, what would be the easiest way to write the table data into your database?
Perhaps this will be of interest(hope so lol), a super simple class to parse html.
Using only DOMDocument and cURL
<?php
$scraper = new DOMScraper();
//example couldent think of a site with an example table
$scraper->setSite('http://cherone.co.uk/forum')->setSource();
//all tables on page
echo '<table>'.$scraper->getInnerHTML('table').'</table>';
//get only tables with id="some_table_id" or any attribute match eg class="somthing"
echo '<table>'.$scraper->getInnerHTML('table','id=some_table_id').'</table>';
//get all tables contents but return only nodeValue/text
echo '<table>'.$scraper->getInnerHTML('table','id=some_table_id',true).'</table>';
/**
* Generic DOM scapper using DOMDocument and cURL
*/
Class DOMScraper extends DOMDocument{
public $site;
private $source;
private $dom;
function __construct(){
libxml_use_internal_errors(true);
$this->preserveWhiteSpace = false;
$this->strictErrorChecking = false;
}
function setSite($site){
$this->site = $site;
return $this;
}
function setSource(){
if(empty($this->site))return 'Error: Missing $this->site, use setSite() first';
$this->source = $this->get_data($this->site);
return $this;
}
function getInnerHTML($tag, $id=null, $nodeValue = false){
if(empty($this->site))return 'Error: Missing $this->source, use setSource() first';
$this->loadHTML($this->source);
$tmp = $this->getElementsByTagName($tag);
$ret = null;
foreach ($tmp as $v){
if($id !== null){
$attr = explode('=',$id);
if($v->getAttribute($attr[0])==$attr[1]){
if($nodeValue == true){
$ret .= trim($v->nodeValue);
}else{
$ret .= $this->innerHTML($v);
}
}
}else{
if($nodeValue == true){
$ret .= trim($v->nodeValue);
}else{
$ret .= $this->innerHTML($v);
}
}
}
return $ret;
}
function innerHTML($dom){
$ret = "";
$nodes = $dom->childNodes;
foreach($nodes as $v){
$tmp = new DOMDocument();
$tmp->appendChild($tmp->importNode($v, true));
$ret .= trim($tmp->saveHTML());
}
return $ret;
}
function get_data($url){
if(function_exists('curl_init')){
$ch = curl_init();
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}else{
return file_get_contents($url);
}
}
}
?>
You can use PHP Simple HTML DOM Parser for example
This question already has an answer here:
Closed 10 years ago.
Possible Duplicate:
Screen scapingin in php using file_get_contents
Can anyone help me.. I am trying to scrape Hotel reviews from LateRooms.com dont tell me its a bad idea because I already have permission as an affiliate
My code:
<?php
header('content-type: text/plain');
$contents = file_get_contents('http://www.laterooms.com/en/hotel-reviews/238902_the-westfield-bb-sandown.aspx');
$contents = preg_replace('/\s(1,)/', ' ', $contents);
print $contents . "\n";
$records = preg_split('/<div id="review/', $contents);
for ($ix = 1; $ix < count($records); $ix++) {
$tmp = $records[$ix];
preg_match('/id="review"/', $tmp, $match_reviews);
print_r($match_reviews);
exit();
}
?>
This works really well the only problem is that It pulls in the whole page of code and doesnt match the div id 'review'
Thanks in advance
function file_get_contents_curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function DOMinnerHTML($element){
$innerHTML = "";
$children = $element->childNodes;
foreach ($children as $child)
{
$tmp_dom = new DOMDocument();
$tmp_dom->appendChild($tmp_dom->importNode($child, true));
$innerHTML.=trim($tmp_dom->saveHTML());
}
return $innerHTML;
}
$url = 'http://www.laterooms.com/en/hotel-reviews/238902_the-westfield-bb-sandown.aspx';
$html = file_get_contents_curl($url);
//parsing begins here:
$doc = new DOMDocument();
#$doc->loadHTML($html);
$div_elements = $doc->getElementsByTagName('div');
if ($div_elements->length <> 0){
foreach ($div_elements as $div_element) {
if ($div_element->getAttribute('class') == 'review newReview'){
$reviews[] = DOMinnerHTML($div_element);
}
}
}
print_r($reviews);
Try this, it will return all reviews. You can refine the content as per your requirement.