I'm trying to download a file that is generated when a POST button is pressed on an HTTPS page. I'm having to pass the contents of a cookie to show that I'm logged in but I've tested that elsewhere on the same site and that's all working fine.
I'm successfully generating the POST request to request the data that I need and I'm sending it but I'm getting nothing back from the web server at all.
Here's my code:
// Build header
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
$header[] = "Content-Type: application/x-www-form-urlencoded";
// Now do transfer
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://domain/pathtoscript");
// Send referer to make this look real
curl_setopt($ch, CURLOPT_REFERER, $referer);
// And disguise ourselves as a browser
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
// Return results as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// Follow redirects
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
// Now send post data
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
// Define where we can store cookies
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
// Sender header and permitted encodings
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_ENCODING, '');
// Actually do it !
$result = curl_exec ($ch);
curl_close ($ch);
// Show result in browser (should be CSV text file)
echo $result;
What am I doing wrong ? I don't want a dialogue box to open to prompt me to save the file, I'd just like the content of the file in $result so that I can parse it later in my application.
Thanks in advance !
Related
i am trying to login on the page 'http://portal.demo.ascio.com/Logon.aspx' with cURL but i am getting this error "503 Service Unavailable
No server is available to handle this request. ".
I am sending right POST which i get from firebug by login to the normal page.
I have been searching for a while and figured out that maybe i am not sending right header.
Code is:
$url = 'http://portal.demo.ascio.com/Logon.aspx';
$login_string = 'THERE ARE MY POSTS WHICH CONTAINS PASSWORD...thats is definitelly right'
$headers = array();
$headers[] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$headers[] = "Connection: Keep-Alive";
$headers[] = "Host: portal.demo.ascio.com";
$headers[] = "Referer: http://portal.demo.ascio.com/Logon.aspx";
$headers[] = "Accept-Encoding: gzip, deflate";
$headers[] = "Accept-Language: en-US,en;q=0.5";
$headers[] = "Content-type: application/x-www-form-urlencoded";
//open connection
$ch = curl_init();
//set the url, number of POST vars, POST data
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, false);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, 5);
curl_setopt($ch, CURLOPT_POSTFIELDS, $login_string);
$result = curl_exec($ch);
print $result;
curl_close($ch);
Thx for help
In general, if you get the error of 500 series, such as 503, it means your request made it to the server side, and something in your input has caused the server to "break", maybe by design of the server itself. Double-check what you are sending and maybe also compare against the service API/Specs.
I'm trying to fetch the meta title of a website through the PHP function below.
It works for all websites but Facebook, I'm getting this this error:
"Update Your Browser | Facebook"
function fetchMeta($url)
{
$ch = curl_init();
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $urlX);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$sites_html = fetchMeta("https://www.facebook.com/stackexchange");
echo $sites_html;
What do I need to change to make it work?
Try sending your user agent with it, right now as it's blank (or cURL's default)
curl_setopt($ch, CURLOPT_USERAGENT, "Proper user agent string, maybe your browsers?");
Facebook is detecting you are using an unsupported/old browser as it's not able to detect a modern user agent.
TRY THIS
<?php
// Fetching the meta title of a Facebook profile through PHP
ini_set("user_agent","facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"); // EXTRA
function fetchMeta($url)
{
$ch = curl_init();
$timeout = 5; // EXTRA
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] = "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
//curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
// EXTRA
curl_setopt($ch, CURLOPT_USERAGENT, "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)");
curl_setopt($ch, CURLOPT_REFERER, "http://facebook.com");
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // SSL TRUE or FALSE
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_POST, FALSE);
// EXTRA
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$sites_html = fetchMeta("https://www.facebook.com/albdroid.official"); //Albdroid Test <Working OK>
//$sites_html = fetchMeta("https://www.facebook.com/stackexchange"); // stackexchange Test <Working OK>
echo $sites_html;
?>
This question already has answers here:
Closed 10 years ago.
Possible Duplicate:
Php - Debugging Curl
I am using curl to fetch the user data from youtube API
The code is
$url_userInfo = 'https://gdata.youtube.com/feeds/api/users/default?access_token=ya29.AHES6ZS7GMdZf91LbMtoOdhFSFOpTuHHT-t7pSggAp-tS0A;
print_r($url_userInfo);
$ch = curl_init($url_userInfo);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, '3');
$content = curl_exec($ch);
curl_close($ch);
print_r( $content);
If I manually visit this url it displays the data in xml form. but there is nothing to print in $content.
Is there any problem with code??
That's actually a HTTPS link, i.e. it uses SSL, and you need to get cacert.pem, and set up cURL for SSL to make that work.
You can get the certificate here!
and you would set it up like so:
$curl = curl_init();
$browser = $_SERVER['HTTP_USER_AGENT'];
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: ";
curl_setopt($curl, CURLOPT_FAILONERROR,true);
curl_setopt($curl, CURLOPT_USERAGENT, $browser);
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 2);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); //needed for SSL
curl_setopt($curl, CURLOPT_CAINFO, "/scripts/cacert.pem"); //path to file
curl_setopt($curl, CURLOPT_URL, $url_userInfo);
$content = curl_exec($curl);
echo curl_error($curl); //display errors under development
curl_close($ch);
print_r( $content );
Using a proper user agent, and authenticating with SSL and a certificate, just like the browser would.
My code
<?php
$url='Search.jsp';
// disguises the curl using fake headers and a fake user agent.
function disguise_curl($url)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_COOKIESESSION, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt( $curl, CURLOPT_POST, 1);
curl_setopt ($curl, CURLOPT_POSTFIELDS, 'search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
$html = curl_exec($curl); // execute the curl command
echo curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
// uses the function and displays the text off the website
$text = disguise_curl($url);
echo $text;
?>
It returns the page, with the form filled, but it does not post it. The curl_getinfo response I get is..
200HTTP/1.1 200 OK Pragma: no-cache Cache-Control:
no-cache,no-store,must-revalidate Expires: Thu, 01 Jan 1970 00:00:00
GMT Content-Type: text/html;charset=ISO-8859-1 Content-Language: en-GB
Content-Length: 5901 Date: Sun, 19 Feb 2012 12:24:08 GMT Server:
Apache
Any ideas ?
Thanks for your help
There's a few things you'll probably want to do, firstly I believe it works better across different operating systems if you supply an absolute path to the cookiejar:
curl_setopt($curl, CURLOPT_COOKIEJAR, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, dirname(__FILE__) . "/cookies.txt");
Also, you can have the script visit the homepage first to grab the session cookie:
disguise_curl("https://lalpacweb.blackpool.gov.uk");
Then you can post the form to https://lalpacweb.blackpool.gov.uk/protected/actions/PublicRegister.action (make sure cookies.txt exists):
<?php
// disguises the curl using fake headers and a fake user agent.
function disguise_curl($url, $post = false)
{
$curl = curl_init();
// Setup headers - I used the same headers from Firefox version 2.0.0.6
// below was split up because php.net said the line was too long. :/
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_REFERER, 'https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp');
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_COOKIESESSION, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_COOKIEJAR, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_COOKIEFILE, dirname(__FILE__) . "/cookies.txt");
curl_setopt($curl, CURLOPT_HEADER, 1);
if ($post)
{
curl_setopt( $curl, CURLOPT_POST, 1);
curl_setopt ($curl, CURLOPT_POSTFIELDS, 'search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
}
$html = curl_exec($curl); // execute the curl command
//echo curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl); // close the connection
return $html; // and finally, return $html
}
// Visit the home-page first to get the session cookie
disguise_curl("https://lalpacweb.blackpool.gov.uk");
// uses the function and displays the text off the website
$url = 'https://lalpacweb.blackpool.gov.uk/protected/actions/PublicRegister.action';
$text = disguise_curl($url, true);
echo $text;
?>
When opening https://lalpacweb.blackpool.gov.uk/protected/wca/publicRegisterVehicleSearch.jsp with my browser, I'm redirected to https://lalpacweb.blackpool.gov.uk/sessiontimeout.jsp and presented with a "Session Timeout" error. Maybe you must make two requests. One to login (and possibly obtain the session cookie) and one to actually perform the search. curl should automatically send cookies it has received in previous requests within the same session. Otherwise set it curl_setopt($curl, CURLOPT_COOKIE, 'CookieName=CookieValue');.
$post = urlencode('search.licenceTypeID=34&search.licenceLinkFileID=2&search.vehicleRegNumber=5&publicRegisterVehicle=Search');
or
$post = array(
'search.licenceTypeID' => 34,
'search.licenceLinkFileID' => 2,
'search.vehicleRegNumber' => 5,
'publicRegisterVehicle' => 'Search'
)
curl_setopt ($init, CURLOPT_POSTFIELDS, $post);
I am trying to obtain some information from a webpage that requires login. I am using PHP/cURL to post username and password to the login page on the target website. The website uses relative links to redirect authenticated users to a members only area.
I am getting 200 OK and I can see that I am being successfully authenticated. My issue is that I don't know how to make it go to the actual member area in the target website (targetwebsite.com/memberarea) as opposed to mywebsite.com/memberarea. Is there a way to specify the base domain of the target website in cURL? Could you also tell me if I am doing something not recommended in the following code.
Here is what I am doing...
<?php
// INIT CURL
$ch = curl_init();
// SET URL FOR THE POST FORM LOGIN
curl_setopt($ch, CURLOPT_URL, 'https://targetwebsite.com/login.php');
curl_setopt($ch, CURLOPT_HEADER, 1);
// ENABLE HTTP POST
curl_setopt ($ch, CURLOPT_POST, 1);
// SET POST PARAMETERS : FORM VALUES FOR EACH FIELD
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'username=someuser&password=mypassword');
curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12');
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_REFERER, 'http://www.google.com');
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
// IMITATE CLASSIC BROWSER'S BEHAVIOUR : HANDLE COOKIES
curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
# Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
# not to print out the results of its query.
# Instead, it will return the results as a string return value
# from curl_exec() instead of the usual true/false.
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 0);
// EXECUTE 1st REQUEST (FORM LOGIN)
$store = curl_exec ($ch);
// SET FILE TO DOWNLOAD
curl_setopt ($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($ch, CURLOPT_URL, 'https://targetwebsite.com?memberarea+welcome+UserFirstName');
// EXECUTE 2nd REQUEST (FILE DOWNLOAD)
$content = curl_exec ($ch);
//echo $store;
// CLOSE CURL
curl_close ($ch);
?>
Are you receiving a Location: redirect? If so, use CURLOPT_FOLLOWLOCATION
Also, Christian Sciberras recommends setting MAXREDIRECTS to at least 3. Proof