I have a problem with this function:
function multi_activity($nodes,$headers){
$node_count = count($nodes);
$results=array();
$curl_arr = array();
$master = curl_multi_init();
for($i = 0; $i < $node_count; $i++)
{
$url =$nodes[$i];
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl_arr[$i], CURLOPT_HEADER, false);
curl_setopt($curl_arr[$i], CURLOPT_HTTPHEADER, $headers);
curl_setopt($curl_arr[$i], CURLOPT_FOLLOWLOCATION, false);
curl_setopt($curl_arr[$i], CURLOPT_SSL_VERIFYPEER, false);
curl_multi_add_handle($master, $curl_arr[$i]);
}
do {
curl_multi_exec($master,$running);
} while($running > 0 );
for($i = 0; $i < $node_count; $i++)
{
$results[] = curl_multi_getcontent ( $curl_arr[$i] );
curl_multi_remove_handle($master, $curl_arr[$i]);
}
curl_multi_close($master);
return $results;
}
Actually, I'm calling this function 300 times and each time with a $nodes containing 30 different URLs. I don't actually understand How CUrl actually works But I have to wait for 10mn to get the job done and to print my JSON. Is there a way to improve it by using multi_curl or an other asynchronous PHP tool/API. Thanks in advance.
Related
I'm trying to run multiple cURL requests from the Google Analytics API and wanted to see if there was another more efficient way of running the requests than having to manually build them out like below. I would need to eventually build of about 10-15 requests so looking to build something more useful in that case.
<?php
$ch1 = curl_init();
$ch2 = curl_init();
$ch3 = curl_init();
curl_setopt($ch1, CURLOPT_URL, "https://www.googleapis.com/analytics/v3/data/parameters_go_here");
curl_setopt($ch2, CURLOPT_URL, "https://www.googleapis.com/analytics/v3/data/parameters_go_here");
curl_setopt($ch3, CURLOPT_URL, "https://www.googleapis.com/analytics/v3/data/parameters_go_here");
curl_exec($ch1);
curl_exec($ch2);
curl_exec($ch3);
?>
you can use multi-curl
$urls = array($url1, $url2, $url3);
$curl_arr = array();
$inits = curl_multi_init();
for($i = 0; $i < count($urls); $i++)
{
$url =$urls[$i];
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
curl_multi_add_handle($inits, $curl_arr[$i]);
}
do {
curl_multi_exec($inits, $running);
} while($running > 0);
for($i = 0; $i < count($urls); $i++)
{
$results[] = curl_multi_getcontent($curl_arr[$i]);
}
print_r($results);
or create function
function doCurl($url){
$ch = curl_init();
curl_setopt($ch1, CURLOPT_URL, $url);
curl_exec($ch);
}
Considering you're passing in different parameters each time, you do need to make separate calls to the API. Having said that, you may benefit from utilising a function() where you structure the call, and pass the parameter in as a variable:
function getData($param = "") {
$core_url = "https://www.googleapis.com/analytics/v3/data/";
$target_url = $core_url . $param;
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_URL, $target_url);
$result = curl_exec($ch);
curl_close($ch);
return json_decode($result);
}
This will 'prettify' your code, allowing you to simply pass through a one word call. The following shows how you can access properties of the returned information:
getData('stats')->item; /* Calls https://www.googleapis.com/analytics/v3/data/stats */
getData('info')->item; /* Calls https://www.googleapis.com/analytics/v3/data/info */
Note that this will still result in the same amount of data being requested from the API, though provides a much cleaner way in which to call the API each time you need to.
Hope this helps! :)
Using multi exec curl in my application
$ch = array();
$mh = curl_multi_init();
for($i=0;$i<=1000;$i++){
$ch[$i] = curl_init();
curl_setopt($ch[$i], CURLOPT_URL, '');
curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch[$i], CURLOPT_POST, false);
curl_multi_add_handle($mh, $ch[$i]);
}
$running = 0;
do {
curl_multi_exec($mh, $running);
} while($running > 0);
$output = array();
for ($j = 0; $j < 1000; $j++) {
$results = curl_multi_getcontent($ch[$j]);
$resp = json_decode($results, true);
array_push($output,$resp);
}
return $output;
This obv performs 1000 requests. The response is quite bigg / request and this gives me an out of memory issue.
Is there a way to solve this?
This is the code I am currently using
function curl_get_contents($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$data = curl_exec($ch);
return $data;
}
function meta_scrap($filename, $other, $programming) {
$link = 'https://graph.facebook.com/?id=' . $filename . '&scrape=true&method=post';
$output = curl_get_contents($link);
$output = json_decode($output);
$ogtitle = $output->title;
}
I call meta_scrap($filename); 8 times on a single webpage. This makes the page load really slow. Is there something that I can do about it? I read about curl_multi_init() I tried to use it like this
function curl_get_contents($pages) {
$ch = curl_init();
$ch = array();
$mh = curl_multi_init();
for ($i = 0; $i < count($pages); $i++) {
$page = $pages[$i];
$ch[$i] = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $page);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_multi_add_handle($mh, $ch[$i]);
}
$running = 0;
do {
curl_multi_exec($mh, $running);
} while ($running > 0);
$data = curl_exec($ch);
$results = reset(json_decode(curl_multi_getcontent($ch[$i]), true));
$resultCount = count($results);
curl_close($ch);
return $data;
}
With this I get no output? Could anyone help me modify my code so that it gives correct output?
You seem to have started working with multiple requests then cut and pasted a single curl request handler here:
$data = curl_exec($ch);
$results = reset(json_decode(curl_multi_getcontent($ch[$i]), true));
$resultCount = count($results);
....but I can't imagine where you would have found piece of code which uses reset() like this.
You try to put the response into $results, yet you then throw this array away and return something completely different.
Try this....
...
$running = count($pages);
do {
curl_multi_exec($mh, $running);
usleep(5000);
} while ($running > 0);
$responses=array();
for ($i = 0; $i < count($pages); $i++) {
$responses[$i]=json_decode(curl_multi_getcontent($ch[$i]), true);
}
return $responses;
If you will be reusing the function then you should also remove and close each curl handle then close the multi-handle before returning.
See also my recent blog post about curl_multi_exec().
<?php
ini_set('display_errors',1);
$url = 'www.google.com.my';
$header = true;
$returntransfer = true;
$connecttimeout = 3;
$timeout = 60;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, $header);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, $returntransfer);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $connecttimeout);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$execute = curl_exec($ch);
$info = curl_getinfo($ch);
header('Content-Type: text/plain');
echo $execute;
echo "\n\ncurl_getinfo() said:\n", str_repeat('-', 31 + strlen($url)), "\n";
foreach ($info as $label => $value)
{
printf("%-30s %s\n", $label, $value);
}
echo str_repeat('-', 31 + strlen($url));
?>
Here is my questions:
(1) I want to test the web services using PHP curl. Am I missing something based from the above code?
(2) If I have 2 URLs, should I use curl_setopt or curl_multi_init?
I really hope that someone will answer my questions.
It's a simultaneous requests:
$ch_1 = curl_init('http://url.one.com/');
$ch_2 = curl_init('http://url.two.com/');
curl_setopt($ch_1, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch_2, CURLOPT_RETURNTRANSFER, true);
// build the multi-curl handle, adding both $ch
$mh = curl_multi_init();
curl_multi_add_handle($mh, $ch_1);
curl_multi_add_handle($mh, $ch_2);
// execute all queries simultaneously, and continue when all are complete
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running);
// all of our requests are done, we can now access the results
$response_1 = curl_multi_getcontent($ch_1);
$response_2 = curl_multi_getcontent($ch_2);
echo "$response_1 $response_2"; // same output as first example
here's my code:
<?php
error_reporting(-1);
ini_set('max_execution_time', 0);
date_default_timezone_set('UTC');
$starttime = date("H:i:s");
$targetsfile = $argv[1];
if(!isset($argv[1])) { echo "[-] Try again...\n"; die(); }
$nodes = array();
$nodes = file("$targetsfile", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$node_count = count($nodes);
$curl_arr = array();
$master = curl_multi_init();
for($i = 0; $i < $node_count; $i++)
{
$agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; pt-pt) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27";
$url = $nodes[$i];
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_URL, $url);
curl_setopt($curl_arr[$i], CURLOPT_USERAGENT, $agent);
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl_arr[$i], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl_arr[$i], CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl_arr[$i], CURLOPT_MAXREDIRS, 2);
curl_setopt($curl_arr[$i], CURLOPT_VERBOSE, false);
curl_setopt($curl_arr[$i], CURLOPT_TIMEOUT, 3);
curl_multi_add_handle($master, $curl_arr[$i]);
}
do
{
curl_multi_exec($master,$running);
usleep(5000);
} while($running > 0);
for($i = 0; $i < $node_count; $i++)
{
$results = curl_multi_getcontent ( $curl_arr[$i] );
#HERE#
}
echo "DONE!\n";
echo "START TIME: $starttime\n";
$endtime = date("H:i:s");
echo "END TIME: $endtime\n";
?>
I try to crawling multiple sites with multi curl function but I need the value for every url which he get the content to can continue to build my script but it return me the value where I write:
see #HERE# line
There I need the exact value of $url for each $curl_arr[$i] but if I try there to echo the $curl_arr[$i] value it returns me values like this:
Resource id #27Resource id #27Resource id #28Resource id #28DONE!
Any chance to return me the exact value of variable $url in my for loop? Or shouild I think another ways to write the entire code.
I guess this would be possible with curl_getinfo:
$url = curl_getinfo($curl_arr[$i], CURLINFO_EFFECTIVE_URL);