PHP curl multi memory issues - php

Using multi exec curl in my application
$ch = array();
$mh = curl_multi_init();
for($i=0;$i<=1000;$i++){
$ch[$i] = curl_init();
curl_setopt($ch[$i], CURLOPT_URL, '');
curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch[$i], CURLOPT_POST, false);
curl_multi_add_handle($mh, $ch[$i]);
}
$running = 0;
do {
curl_multi_exec($mh, $running);
} while($running > 0);
$output = array();
for ($j = 0; $j < 1000; $j++) {
$results = curl_multi_getcontent($ch[$j]);
$resp = json_decode($results, true);
array_push($output,$resp);
}
return $output;
This obv performs 1000 requests. The response is quite bigg / request and this gives me an out of memory issue.
Is there a way to solve this?

Related

How to get `Content-length` of response php cURL

I try to write simple parser on php, with can give me only content-length of html page. For now I have this Code :
$urls = array(
'http://Link1.com/',
'http://Link2.com'
);
$mh = curl_multi_init();
$connectionArray = array();
foreach($urls as $key => $url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_multi_add_handle($mh, $ch);
$connectionArray[$key] = $ch;
}
$running = null;
do
{
curl_multi_exec($mh, $running);
}while($running > 0);
foreach($connectionArray as $key => $ch)
{
$content = curl_multi_getcontent($ch);
echo $content."<br>";
curl_multi_remove_handle($mh, $ch);
}
curl_multi_close($mh);
How can I get Content-Length from $content ?
You can use curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD) which returns:
Content length of download, read from Content-Length: field
In this particular case, -1 seems to be a valid response:
Since 7.19.4, this returns -1 if the size isn't known.

Curl_Multi Infinite Loop (10mn)

I have a problem with this function:
function multi_activity($nodes,$headers){
$node_count = count($nodes);
$results=array();
$curl_arr = array();
$master = curl_multi_init();
for($i = 0; $i < $node_count; $i++)
{
$url =$nodes[$i];
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl_arr[$i], CURLOPT_HEADER, false);
curl_setopt($curl_arr[$i], CURLOPT_HTTPHEADER, $headers);
curl_setopt($curl_arr[$i], CURLOPT_FOLLOWLOCATION, false);
curl_setopt($curl_arr[$i], CURLOPT_SSL_VERIFYPEER, false);
curl_multi_add_handle($master, $curl_arr[$i]);
}
do {
curl_multi_exec($master,$running);
} while($running > 0 );
for($i = 0; $i < $node_count; $i++)
{
$results[] = curl_multi_getcontent ( $curl_arr[$i] );
curl_multi_remove_handle($master, $curl_arr[$i]);
}
curl_multi_close($master);
return $results;
}
Actually, I'm calling this function 300 times and each time with a $nodes containing 30 different URLs. I don't actually understand How CUrl actually works But I have to wait for 10mn to get the job done and to print my JSON. Is there a way to improve it by using multi_curl or an other asynchronous PHP tool/API. Thanks in advance.

How to open multiple URLs with cURL without delay

I was wondering if it's possible to open multiple URLs with cURL or maybe something else.
I tried this until now.
$urls = array(
"http://google.com",
"http://youtube.com",
);
foreach($urls as $url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,0);
curl_setopt($ch, CURLOPT_TIMEOUT_MS, 200);
curl_exec($ch);
curl_close($ch);
}
The 200ms are there to let the site open fully.
Maybe you know any alternatives.
Is it possible to open multiple URLs in PHP at the same time? Not client sided, server side.
Your solution would be simultaneous cURL HTTP requests.
For faster implementation, you can use this function (thanks to phpied):
function multiRequest($data, $options = array()) {
// array of curl handles
$curly = array();
// data to be returned
$result = array();
// multi handle
$mh = curl_multi_init();
// loop through $data and create curl handles
// then add them to the multi-handle
foreach ($data as $id => $d) {
$curly[$id] = curl_init();
$url = (is_array($d) && !empty($d['url'])) ? $d['url'] : $d;
curl_setopt($curly[$id], CURLOPT_URL, $url);
curl_setopt($curly[$id], CURLOPT_HEADER, 0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER, 1);
// post?
if (is_array($d)) {
if (!empty($d['post'])) {
curl_setopt($curly[$id], CURLOPT_POST, 1);
curl_setopt($curly[$id], CURLOPT_POSTFIELDS, $d['post']);
}
}
// extra options?
if (!empty($options)) {
curl_setopt_array($curly[$id], $options);
}
curl_multi_add_handle($mh, $curly[$id]);
}
// execute the handles
$running = null;
do {
curl_multi_exec($mh, $running);
} while($running > 0);
// get content and remove handles
foreach($curly as $id => $c) {
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
// all done
curl_multi_close($mh);
return $result;
}
And use it like this:
$data = array(
'http://search.yahooapis.com/VideoSearchService/V1/videoSearch?appid=YahooDemo&query=Pearl+Jam&output=json',
'http://search.yahooapis.com/ImageSearchService/V1/imageSearch?appid=YahooDemo&query=Pearl+Jam&output=json',
'http://search.yahooapis.com/AudioSearchService/V1/artistSearch?appid=YahooDemo&artist=Pearl+Jam&output=json'
);
$r = multiRequest($data);
echo '<pre>';
print_r($r);
Hope it helps.
Also read this.

How to combine multiple cURL requests in one?

This is the code I am currently using
function curl_get_contents($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$data = curl_exec($ch);
return $data;
}
function meta_scrap($filename, $other, $programming) {
$link = 'https://graph.facebook.com/?id=' . $filename . '&scrape=true&method=post';
$output = curl_get_contents($link);
$output = json_decode($output);
$ogtitle = $output->title;
}
I call meta_scrap($filename); 8 times on a single webpage. This makes the page load really slow. Is there something that I can do about it? I read about curl_multi_init() I tried to use it like this
function curl_get_contents($pages) {
$ch = curl_init();
$ch = array();
$mh = curl_multi_init();
for ($i = 0; $i < count($pages); $i++) {
$page = $pages[$i];
$ch[$i] = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $page);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_multi_add_handle($mh, $ch[$i]);
}
$running = 0;
do {
curl_multi_exec($mh, $running);
} while ($running > 0);
$data = curl_exec($ch);
$results = reset(json_decode(curl_multi_getcontent($ch[$i]), true));
$resultCount = count($results);
curl_close($ch);
return $data;
}
With this I get no output? Could anyone help me modify my code so that it gives correct output?
You seem to have started working with multiple requests then cut and pasted a single curl request handler here:
$data = curl_exec($ch);
$results = reset(json_decode(curl_multi_getcontent($ch[$i]), true));
$resultCount = count($results);
....but I can't imagine where you would have found piece of code which uses reset() like this.
You try to put the response into $results, yet you then throw this array away and return something completely different.
Try this....
...
$running = count($pages);
do {
curl_multi_exec($mh, $running);
usleep(5000);
} while ($running > 0);
$responses=array();
for ($i = 0; $i < count($pages); $i++) {
$responses[$i]=json_decode(curl_multi_getcontent($ch[$i]), true);
}
return $responses;
If you will be reusing the function then you should also remove and close each curl handle then close the multi-handle before returning.
See also my recent blog post about curl_multi_exec().

PHP curl questions

<?php
ini_set('display_errors',1);
$url = 'www.google.com.my';
$header = true;
$returntransfer = true;
$connecttimeout = 3;
$timeout = 60;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, $header);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, $returntransfer);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $connecttimeout);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$execute = curl_exec($ch);
$info = curl_getinfo($ch);
header('Content-Type: text/plain');
echo $execute;
echo "\n\ncurl_getinfo() said:\n", str_repeat('-', 31 + strlen($url)), "\n";
foreach ($info as $label => $value)
{
printf("%-30s %s\n", $label, $value);
}
echo str_repeat('-', 31 + strlen($url));
?>
Here is my questions:
(1) I want to test the web services using PHP curl. Am I missing something based from the above code?
(2) If I have 2 URLs, should I use curl_setopt or curl_multi_init?
I really hope that someone will answer my questions.
It's a simultaneous requests:
$ch_1 = curl_init('http://url.one.com/');
$ch_2 = curl_init('http://url.two.com/');
curl_setopt($ch_1, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch_2, CURLOPT_RETURNTRANSFER, true);
// build the multi-curl handle, adding both $ch
$mh = curl_multi_init();
curl_multi_add_handle($mh, $ch_1);
curl_multi_add_handle($mh, $ch_2);
// execute all queries simultaneously, and continue when all are complete
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running);
// all of our requests are done, we can now access the results
$response_1 = curl_multi_getcontent($ch_1);
$response_2 = curl_multi_getcontent($ch_2);
echo "$response_1 $response_2"; // same output as first example

Categories