This question already has answers here:
Closed 10 years ago.
Possible Duplicate:
Php - Debugging Curl
I am using curl to fetch the user data from youtube API
The code is
$url_userInfo = 'https://gdata.youtube.com/feeds/api/users/default?access_token=ya29.AHES6ZS7GMdZf91LbMtoOdhFSFOpTuHHT-t7pSggAp-tS0A;
print_r($url_userInfo);
$ch = curl_init($url_userInfo);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, '3');
$content = curl_exec($ch);
curl_close($ch);
print_r( $content);
If I manually visit this url it displays the data in xml form. but there is nothing to print in $content.
Is there any problem with code??
That's actually a HTTPS link, i.e. it uses SSL, and you need to get cacert.pem, and set up cURL for SSL to make that work.
You can get the certificate here!
and you would set it up like so:
$curl = curl_init();
$browser = $_SERVER['HTTP_USER_AGENT'];
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
curl_setopt($curl, CURLOPT_FAILONERROR,true);
curl_setopt($curl, CURLOPT_USERAGENT, $browser);
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); //needed for SSL
curl_setopt($curl, CURLOPT_CAINFO, "/scripts/cacert.pem"); //path to file
curl_setopt($curl, CURLOPT_URL, $url_userInfo);
$content = curl_exec($curl);
echo curl_error($curl); //display errors under development
curl_close($ch);
print_r( $content );
Using a proper user agent, and authenticating with SSL and a certificate, just like the browser would.
Related
I find no way to PHP cURL this URL :
http://www.bvger.ch/publiws/pub/cache.jsf?displayName=A-1695/2006&decisionDate=2007-02-27
Can any of you help me ? I tried many ways without any success. For example :
FUNCTION get_data2($url)
{
$curl = curl_init();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com/bot.html');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
$html = curl_exec($curl); // execute the curl command
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
OR
FUNCTION get_data1($url)
{
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
Both return nothing echoed.
This worked fine for me...
function get_data2($url)
{
$curl = curl_init();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com/bot.html');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
$html = curl_exec($curl); // execute the curl command
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
$url = 'http://www.bvger.ch/publiws/pub/cache.jsf?displayName=A-1695/2006&decisionDate=2007-02-27';
echo get_data2($url);
Don't capitalize "function". Also you must have been cut-n-pasting and didn't correct one of the lines to match the others using $curl instead of $ch.
I have some problem with the curl function. I am fetching Instagram posts with curl, but suddenly the function stopped working. I tried various changes but I can't make it work.
This is the function:
function cek($url) {
$curl = curl_init();
/**
* Setup headers - I used the same headers from Firefox version 2.0.0.6
* below was split up because php.net said the line was too long.
*/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10); /* Max redirection to follow */
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
$html = curl_exec($curl); // execute the curl command
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
function get_curl($url) {
if(function_exists('curl_init')) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$output = curl_exec($ch);
echo curl_error($ch);
curl_close($ch);
return $output;
} else {
return file_get_contents($url);
}
}
and this is where I call the function:
#$var = cek('https://api.instagram.com/v1/tags/'.instagram_tag.'/media/recent?client_id='.intagram_clientid.'&count=2');
$var = get_curl('https://api.instagram.com/v1/tags/'.instagram_tag.'/media/recent?max_tag_id=11&min_tag_id=1&access_token='.intagram_token.'&callback=?');
$cek = json_decode($var);
So I need to load all the txt files in: http://orcahub.com/unchecked-proxy-list/ as one txt file and go into my server which is a different one to Orcahub;
For some reason it wont work. I cant get it to actually get the HTML to even do regex.
What I tried:
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://orcahub.com/unchecked-proxy-list');
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_NOBODY, FALSE); // remove body
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$st = curl_exec($ch);
//curl_close($ch);
//preg_match_all("/(.*\.txt)/", $st, $out);
var_dump($ch);
?>
UPDATE:
New issue, I get a Server Error 500 when I use the following script:
UPDATE: Found out this issue was from a newline after the URL.
<?php
function disguise_curl($url) {
//Prepare Curl;
$curl = curl_init();
//Setup Headers (Firefox 2.0.0.6);
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
//Setup Curl;
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://orcahub.com/unchecked-proxy-list/');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 60);
//Execute Curl;
$html = curl_exec($curl);
//End Curl;
curl_close($curl);
//Output the HTML;
return $html;
}
function rem_href($x) { return substr(strstr($x, '>'), strlen('>')); }
$response = disguise_curl('http://orcahub.com/unchecked-proxy-list/');
preg_match_all("/<a[\s]+[^>]*?href[\s]?=[\s\"\']+"."(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $response, $matches, PREG_SET_ORDER );
foreach($matches as $value) {
$proxylists[] = 'http://orcahub.com/unchecked-proxy-list/'.rem_href($value[0]);
};
echo $proxylists[0];
$response = disguise_curl($proxylists[0]);
//Server Error 500 Here;
echo $response;
?>
Came accross from php.net a function that add headers to disguise the call, a regex I added for parsing the response:
function disguise_curl($url)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
$html = curl_exec($curl); // execute the curl command
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
$response = disguise_curl('http://orcahub.com/unchecked-proxy-list/');
preg_match_all("/<a[\s]+[^>]*?href[\s]?=[\s\"\']+"."(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $response, $matches, PREG_SET_ORDER );
foreach($matches as $value) {
var_dump($value);
};
I'm trying to download a file that is generated when a POST button is pressed on an HTTPS page. I'm having to pass the contents of a cookie to show that I'm logged in but I've tested that elsewhere on the same site and that's all working fine.
I'm successfully generating the POST request to request the data that I need and I'm sending it but I'm getting nothing back from the web server at all.
Here's my code:
// Build header
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
$header[] = "Content-Type: application/x-www-form-urlencoded";
// Now do transfer
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://domain/pathtoscript");
// Send referer to make this look real
curl_setopt($ch, CURLOPT_REFERER, $referer);
// And disguise ourselves as a browser
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
// Return results as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// Follow redirects
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
// Now send post data
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
// Define where we can store cookies
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
// Sender header and permitted encodings
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_ENCODING, '');
// Actually do it !
$result = curl_exec ($ch);
curl_close ($ch);
// Show result in browser (should be CSV text file)
echo $result;
What am I doing wrong ? I don't want a dialogue box to open to prompt me to save the file, I'd just like the content of the file in $result so that I can parse it later in my application.
Thanks in advance !
My code
<?php
$url='Search.jsp';
// disguises the curl using fake headers and a fake user agent.
function disguise_curl($url)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_COOKIESESSION, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt( $curl, CURLOPT_POST, 1);
curl_setopt ($curl, CURLOPT_POSTFIELDS, 'search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
$html = curl_exec($curl); // execute the curl command
echo curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
// uses the function and displays the text off the website
$text = disguise_curl($url);
echo $text;
?>
It returns the page, with the form filled, but it does not post it. The curl_getinfo response I get is..
200HTTP/1.1 200 OK Pragma: no-cache Cache-Control:
no-cache,no-store,must-revalidate Expires: Thu, 01 Jan 1970 00:00:00
GMT Content-Type: text/html;charset=ISO-8859-1 Content-Language: en-GB
Content-Length: 5901 Date: Sun, 19 Feb 2012 12:24:08 GMT Server:
Apache
Any ideas ?
Thanks for your help
There's a few things you'll probably want to do, firstly I believe it works better across different operating systems if you supply an absolute path to the cookiejar:
curl_setopt($curl, CURLOPT_COOKIEJAR, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, dirname(__FILE__) . "/cookies.txt");
Also, you can have the script visit the homepage first to grab the session cookie:
disguise_curl("https://lalpacweb.blackpool.gov.uk");
Then you can post the form to https://lalpacweb.blackpool.gov.uk/protected/actions/PublicRegister.action (make sure cookies.txt exists):
<?php
// disguises the curl using fake headers and a fake user agent.
function disguise_curl($url, $post = false)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_COOKIESESSION, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_COOKIEJAR, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_HEADER, 1);
if ($post)
{
curl_setopt( $curl, CURLOPT_POST, 1);
curl_setopt ($curl, CURLOPT_POSTFIELDS, 'search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
}
$html = curl_exec($curl); // execute the curl command
//echo curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
// Visit the home-page first to get the session cookie
disguise_curl("https://lalpacweb.blackpool.gov.uk");
// uses the function and displays the text off the website
$url = 'https://lalpacweb.blackpool.gov.uk/protected/actions/PublicRegister.action';
$text = disguise_curl($url, true);
echo $text;
?>
When opening https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp with my browser, I'm redirected to https://lalpacweb.blackpool.gov.uk/sessiontimeout.jsp and presented with a "Session Timeout" error. Maybe you must make two requests. One to login (and possibly obtain the session cookie) and one to actually perform the search. curl should automatically send cookies it has received in previous requests within the same session. Otherwise set it curl_setopt($curl, CURLOPT_COOKIE, 'CookieName=CookieValue');.
$post = urlencode('search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
or
$post = array(
'search.licenceTypeID' => 34,
'search.licenceLinkFileID' => 2,
'search.vehicleRegNumber' => 5,
'publicRegisterVehicle' => 'Search'
)
curl_setopt ($init, CURLOPT_POSTFIELDS, $post);