I want to grab first 10 results of that any url i passed it to function as parameter ..and i want to make data scraper for some sites
Getting Syntax error when I print the result on screen
syntax error on this line I don't why it giving me syntax error kindly help me....
print_r( $dse->crawl()->parse() );
<?php
class CURL_CRAWLER{
public $url;
public $request_type;
public $data;
public $post_params;
function __construct($url = '' , $request_type = 'GET')
{
$this->url = $url;
$this->request_type = $request_type;
$this->data = '';
$this->post_params = array();
}
/**crawl a document **/
function crawl()
{
$curl = curl_init( $this->url );
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 60);
curl_setopt($curl, CURLOPT_USERAGENT, 'cURL PHP');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
$this->data = curl_exec($curl);
curl_close($curl);
return $this; //make it a chainable method
}
/** Parse result data **/
function parse(){
$result = array();
$count = 0;
$dom = new DOMDocument;
$dom->preserveWhiteSpace = false;
$dom->loadHTML($this->data);
$xpath = new DOMXPath($dom);
$news = $xpath->query('//td[#bgcolor="#DDDDDD"]/table/tr[position()=2]/td[position()=2]');
foreach( $news as $n){
$result[] = $n->nodeValue;
$count++;
if ($count >9)
break; //we just need 10 results. Index starts from 0
}
return $result;
}
}
error_reporting(0);
$dse = new CURL_CRAWLER('http://www.dsebd.org/display_news.php');
echo "<pre>";
print_r( $dse->crawl()->parse() );
echo "<pre>";
?>
Your syntax error is that you should use explicit "greater than" sign instead of HTML entities > - server doesn't need those, it is not a browser that can render it correctly. Just change:
print_r( $dse->crawl()->parse() );
^^^^ ^^^^
to:
print_r( $dse->crawl()->parse() );
Related
im trying to grab the contents from a URL(which is a json) that changes for each iteration of my loop. The problem with my method of doing things is that it is very slow and if I do about 120 iterations it takes over 40sec.
Here is my code:
$GetFriendListUrl = "http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key=mykey&steamid=".$other_steamid."&relationship=friend";
$GET_GetFriendListUrl= file_get_contents($GetFriendListUrl);
$raw_ids = json_decode($GET_GetFriendListUrl , TRUE);
$count = count($raw_ids['friendslist']['friends']);
$ci = curl_init();
curl_setopt($ci, CURLOPT_RETURNTRANSFER, true);
for ($x = 0; $x <= $count; $x++) {
$friendslist = $raw_ids['friendslist']['friends'][$x]['steamid'];
curl_setopt($ci, CURLOPT_URL, "https://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=mykey&steamids=".$friendslist);
$cont = curl_exec($ci);
$contFull = json_decode($cont, true);
$steamname = $contFull['response']['players'][0]['personaname'];
$steamprofileurl = $contFull['response']['players'][0]['profileurl'];
$friendimage = $contFull['response']['players'][0]['avatar'];
$friendimageData = base64_encode(file_get_contents($friendimage));
echo '<img class="other_friendsteamimage" src="data:image/jpeg;base64,'.$friendimageData.'">';
echo "<a class='other_friendlabel' href='$steamprofileurl'>$steamname</a>";
echo "<br>";
}
curl_close($ci);
I cannot be sure of the format of the data returned by the api and I have no means of testing the following but in line with the comment I made and based upon the documentation it would appear that sending few requests but with each request dealing with 100 steamIDs you should save considerable amount of time.
/* get the intial data */
$url = "http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key=mykey&steamid=".$other_steamid."&relationship=friend";
$data= file_get_contents( $url );
$json = json_decode( $data );
$ids=array();
/* just grab the IDs and add to array - correct format to access records??? */
foreach( $json->friendslist->friends as $obj ){
$ids[]=$obj->steamid;
}
/* split the IDs into chunks of 100 */
$chunks=array_chunk( $ids, 100 );
/* send a request per chunk of 100 */
foreach( $chunks as $chunk ){
$url=sprintf('https://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=mykey&steamids=%s',implode(',',$chunk));
$curl = curl_init( $url );
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$res=curl_exec( $curl );
if( $res ){
$data=json_decode($res,true);
/* do stuff .... */
}
curl_close($curl);
}
echo 'Finito';
I've been trying to run CURL in a foreach loop to extract information from the cryptocompare.com API. As soon as I call the following function, my code just stops working. There is no output.
$fullArray[$symbol]['Price'] = getThePrice($fullArray[$symbol]['Symbol']);
What am I doing wrong? I pasted the code below
include 'helper.php';
$fullArray = array();
//Get List of All Coins and store symbol and ID
$url = "https://min-api.cryptocompare.com/data/all/coinlist";
$jsonArray = getConnection($url);
foreach($jsonArray['Data'] as $value)
{
$symbol = $value['Symbol'];
$fullArray[$symbol]['Symbol'] = $value['Symbol'];
$fullArray[$symbol]['Id'] = $value['Id'];
//call getThePrice function to get Price of ticker
$fullArray[$symbol]['Price'] = getThePrice($fullArray[$symbol]['Symbol']);
}
function getThePrice($input)
{
//Get current price of each coin and store in full array
$url = "https://www.cryptocompare.com/api/data/coinsnapshot/?fsym=".$input."&tsym=USD";
$jsonNewArray = getConnection($url);
if(array_key_exists('PRICE',$jsonNewArray['Data']['AggregatedData']))
{
$returnVariable = $jsonNewArray['Data']['AggregatedData']['PRICE'];
echo "The price of : ".$input." is ".$returnVariable;
}
else{
$returnVariable = "NA";
echo "This price is not available";
}
return $returnVariable;
}
The code in helper.php:
function getConnection($inputHelp)
{
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$inputHelp);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
//curl_setopt($ch,CURLOPT_CONNECTTIMEOUT, 4);
$json = curl_exec($ch);
if(!$json) {
echo curl_error($ch);
}
curl_close($ch);
$jsonArray = json_decode($json, true);
return $jsonArray;
}
Appreciate any help. Thanks in advance.
I got 5550 records of different routes and I need to do a foreach loop for each record and get the API data.
So I made a function with Guzzle in Laravel:
public function getDirectionDistance($origins, $distinations)
{
$client = new Client();
$res = $client->get("https://maps.googleapis.com/maps/api/distancematrix/json?origins=$origins&destinations=$distinations&key=ччч")->getBody()->getContents();
$obj = json_decode($res, true);
$distance = $obj['rows'][0]['elements'][0]['distance']['text'];
$clean = $string = str_replace(' km', '', $distance);
return $clean;
}
I used it in a store method
public function store()
{
$route = $this->route->with('from','to')->get();
$maps = new Maps();
foreach ($route as $item){
$direction = new Direction();
$from = $item->from->name;
$to = $item->to->name;
$direction->route_id = $item->id;
$direction->distance = $maps->getMapsApi("$from,israel","$to,israel");
$direction->save();
sleep(3);
}
}
But when I do It, I get 1 distance for 200 routes and then after 200 row I get the next distance for the next route. How to stop and wait for api to be completed, save it and then start the next row. I need the data to create a Machine Learning price calculator.
For me work in this way:
I created a function that make a call to google with CURL:
public function calculateDistance($origins, $destination){
$staticDistanceModel = New StaticDistance();
$url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins=" . $origins . "&destinations=" . $destination . "&mode=driving&language=it-IT&key=xyz";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXYPORT, 3128);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$response = curl_exec($ch);
curl_close($ch);
$response_a = json_decode($response, true);
if (isset($response_a['rows'][0]['elements'][0]['distance']['value'])) {
$m = $response_a['rows'][0]['elements'][0]['distance']['value'];
}else{
$m = 0;
}
$staticDistanceModel->insertStatic($origins, $destination, $m);
}
}
the function in my model is something like:
public function insertStatic($origins, $destination, $m){
$arrayInsert = array('origins'=>$origins, 'destination'=>$destination,'distance'=>$m);
Self::create($arrayInsert);
}
And in my controller I have forEach() like this:
foreach ($array as $object) {
$calculator = $this->calculateDistance($object->origins, $object->destination);
}
But be careful because google limit request, and the time for 5500 records maybe long, so you can chunk array.
Hope this can help you
i'm sending a soap request using curl in php but the response is returning as string when i var_dump the response and i can't parse it to xml
$soapUrl = "https://hosting/Wsdl"; // asmx URL of WSDL
$xml_post_string = '<?xml version="1.0" encoding="utf-8"?>
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<soapenv:Header/>
<soapenv:Body>
<MY DATA>
</soapenv:Body>
</soapenv:Envelope>';
$headers = array(
"Host: hosting,
"Connection: Keep-Alive",
"Accept: text/xml",
"Cache-Control: no-cache",
"Pragma: no-cache",
"Content-type: text/xml;charset=\"utf-8\"",
"SOAPAction: http://hosting.com/SOAPAction",
"Content-length: ".strlen($xml_post_string),
);
$url = $soapUrl;
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $xml_post_string); // the SOAP request
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// converting
$response = curl_exec($ch);
curl_close($ch);
// converting
var_dump($response);
$response1 = str_replace("<soap:Body>","",$response);
$response2 = str_replace("</soap:Body>","",$response1);
// convertingc to XML
$parser = simplexml_load_string($response);
echo '<pre>';
var_dump($parser);
echo '</pre>';
the response returning as
string(9965) "RESPONSE DATA"
object(SimpleXMLElement)#164 (0) { }
please any help and many thanks in advance.
i use the following XMLParser
<?
class XMLParser {
// raw xml
private $rawXML;
// xml parser
private $parser = null;
// array returned by the xml parser
private $valueArray = array();
private $keyArray = array();
// arrays for dealing with duplicate keys
private $duplicateKeys = array();
// return data
private $output = array();
private $status;
public function XMLParser($xml){
$this->rawXML = $xml;
$this->parser = xml_parser_create();
return $this->parse();
}
private function parse(){
$parser = $this->parser;
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); // Dont mess with my cAsE sEtTings
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); // Dont bother with empty info
if(!xml_parse_into_struct($parser, $this->rawXML, $this->valueArray, $this->keyArray)){
$this->status = 'error: '.xml_error_string(xml_get_error_code($parser)).' at line '.xml_get_current_line_number($parser);
return false;
}
xml_parser_free($parser);
$this->findDuplicateKeys();
// tmp array used for stacking
$stack = array();
$increment = 0;
foreach($this->valueArray as $val) {
if($val['type'] == "open") {
//if array key is duplicate then send in increment
if(array_key_exists($val['tag'], $this->duplicateKeys)){
array_push($stack, $this->duplicateKeys[$val['tag']]);
$this->duplicateKeys[$val['tag']]++;
}
else{
// else send in tag
array_push($stack, $val['tag']);
}
} elseif($val['type'] == "close") {
array_pop($stack);
// reset the increment if they tag does not exists in the stack
if(array_key_exists($val['tag'], $stack)){
$this->duplicateKeys[$val['tag']] = 0;
}
} elseif($val['type'] == "complete") {
//if array key is duplicate then send in increment
if(array_key_exists($val['tag'], $this->duplicateKeys)){
array_push($stack, $this->duplicateKeys[$val['tag']]);
$this->duplicateKeys[$val['tag']]++;
}
else{
// else send in tag
array_push($stack, $val['tag']);
}
$this->setArrayValue($this->output, $stack, $val['value']);
array_pop($stack);
}
$increment++;
}
$this->status = 'success: xml was parsed';
return true;
}
private function findDuplicateKeys(){
for($i=0;$i < count($this->valueArray); $i++) {
// duplicate keys are when two complete tags are side by side
if($this->valueArray[$i]['type'] == "complete"){
if( $i+1 < count($this->valueArray) ){
if($this->valueArray[$i+1]['tag'] == $this->valueArray[$i]['tag'] && $this->valueArray[$i+1]['type'] == "complete"){
$this->duplicateKeys[$this->valueArray[$i]['tag']] = 0;
}
}
}
// also when a close tag is before an open tag and the tags are the same
if($this->valueArray[$i]['type'] == "close"){
if( $i+1 < count($this->valueArray) ){
if( $this->valueArray[$i+1]['type'] == "open" && $this->valueArray[$i+1]['tag'] == $this->valueArray[$i]['tag'])
$this->duplicateKeys[$this->valueArray[$i]['tag']] = 0;
}
}
}
}
private function setArrayValue(&$array, $stack, $value){
if ($stack) {
$key = array_shift($stack);
$this->setArrayValue($array[$key], $stack, $value);
return $array;
} else {
$array = $value;
}
}
public function getOutput(){
return $this->output;
}
public function getStatus(){
return $this->status;
}
}
?>
Usage:
$p = new XMLParser($xml);
$p->getOutput();
Currently when I execute this function with say 60 URL's I get a HTTP 504 error. Is there anyway to multithread this so that I no longer get a 504 error and iterate throughout the entire list of URL's?
<?php
namespace App\Http\Controllers;
use Request;
use App\Http\Controllers\Controller;
class MainController extends Controller
{
public function parse()
{
$input = Request::all();
$csv = $input['laraCsv'];
$new_csv = trim(preg_replace('/\s\s+/', ',', $csv));
$headerInfo = [];
//$titles = [];
$csvArray = str_getcsv($new_csv, ",");
$csvLength = count($csvArray);
$i = 0;
while ($i < $csvLength) {
if(strpos($csvArray[$i], '.pdf') !== false) {
print_r($csvArray[$i]);
}
else{
array_push($headerInfo, get_headers($csvArray[$i], 1));
}
//sleep(3);
//echo file_get_contents($csvArray[$i]);
$i++;
}
return view('csvViewer')->with('data', $headerInfo)->with('urls', $csvArray);
}
}
I've used digitalocean in the past before but I'm not sure what error codes they give if you run out of time, (also set_time_limit(0); should already be in your code).
See if this works:
<?php
function getHeaders($data) {
$curly = array();
$result = array();
$mh = curl_multi_init();
foreach ($data as $id => $url) {
$curly[$id] = curl_init();
curl_setopt($curly[$id], CURLOPT_URL, $url);
curl_setopt($curly[$id], CURLOPT_HEADER, true);
curl_setopt($curly[$id], CURLOPT_NOBODY, true);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER, true);
curl_multi_add_handle($mh, $curly[$id]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running > 0);
foreach($curly as $id => $c) {
$result[$id] = array_filter(explode("\n", curl_multi_getcontent($c)));
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);
return $result;
}
$urls = array(
'http://google.com',
'http://yahoo.com',
'http://doesnotexistwillitplease.com'
);
$r = getHeaders($urls);
echo '<pre>';
print_r($r);
So once you've gotten all your URLs into an array, run it like getHeaders($urls);.
If it doesn't work try it only with 3 or 4 urls first. Also set_time_limit(0); at the top as mentioned before.
Are you sure it is because of your code ? it could also be the server configuration.
about HTTP 504
This problem is entirely due to slow IP communication between back-end
computers, possibly including the Web server. Only the people who set
up the network at the site which hosts the Web server can fix this
problem.