I have a problem with curl. I need to extract data from my profile with website (validity of the account). The problem is the lack of logging.
$login = "user";
$password = "passtest";
$ckfile = tempnam("./cookies", "cookies.txt");
$page_login = "http://host.tv/login.html";
$page_download = "http://host.tv/profil.html";
$post_data = "?login=$login&pass=$password";
$agent = "Mozilla/5.0 (X11; U; Linux i686; pl; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3";
$headers = array();
$headers[] = 'GET /login.html HTTP/1.1';
$headers[] = 'Host: host.tv';
$headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';
$headers[] = 'Accept-Language: pl-PL,pl;q=0.8,en-US;q=0.6,en;q=0.4';
$headers[] = 'Referer: http://host.tv/login.html';
$headers[] = 'Upgrade-Insecure-Requests: 1';
$headers[] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36';
$connect = curl_init ();
curl_setopt($connect, CURLOPT_URL, $page_login);
curl_setopt($connect, CURLOPT_COOKIESESSION, 1);
curl_setopt($connect, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($connect, CURLOPT_COOKIEFILE, $ckfile);
curl_setopt($connect, CURLOPT_TIMEOUT, 40);
curl_setopt($connect, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($connect, CURLOPT_HEADER, 0);
curl_setopt($connect, CURLOPT_HTTPHEADER, $headers);
curl_setopt($connect, CURLOPT_USERAGENT, $agent);
curl_setopt($connect, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($connect, CURLOPT_POST, 1);
curl_setopt($connect, CURLOPT_POSTFIELDS, $post_data);
curl_exec($connect);
curl_setopt($connect, CURLOPT_URL, $page_download);
$page = curl_exec($connect);
curl_close($connect);
echo $page;
The script does not logged do Accounts.
you are doing some mistakes here,
1: don't manually create the GET / header, let curl create it for you with CURLOPT_URL / CURLOPT_HTTPGET
2: logging in is not a GET operation, its a POST operation.
and there's a lack of error checking on the curl return values.
here's an example of logging in, going to the profile page, and extract the account type (i guess it says "free account" or something, but i dont speak that language), using hhb_curl from https://github.com/divinity76/hhb_.inc.php/blob/master/hhb_.inc.php
<?php
declare(strict_types=1);
require_once('hhb_.inc.php');
hhb_init();
$hc=new hhb_curl();
$hc->_setComfortableOptions();
//getting a session
$hc->exec('http://host.tv/login.html');
//logging in
$username='user';
$password='passtest';
$hc->setopt_array(array(
CURLOPT_POST=>true,
CURLOPT_POSTFIELDS=>http_build_query(array(
'login'=>$username,
'pass'=>$password,
'logged'=>''//??
))
));
$hc->exec('http://host.tv/login.html');
//TODO: confirm login was successful
$hc->exec('http://host.tv/profil.html');
//extract "account type" from your profile
$domd=#DOMDocument::loadHTML($hc->getResponseBody());
$accountType=trim(preg_replace('/\s+/', "\n", $domd->getElementById("content-top-right")->textContent));
echo $accountType.PHP_EOL;
output:
Konto:
Standard
Ważne
do:
Brak
Related
I'm trying by PHP and cURL to log in a website, but this website use a CSRF token. My PHP code is catching the token (checked with a echo) but I couldn't log in after that (CSRF token error).
Do you have any idea of how I could keep the same cURL session in order to have the same CSRF token (otherwise the server will generate a new one).
I have added comment to my code in order to make it more understandable
Thank you in advance for all your help
Here my code :
<?php
$url = "https://www.boursedirect.fr/fr/login";
// obtain the CSRF Token
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0");
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie'); // to handle cookies during login
curl_setopt($ch, CURLOPT_COOKIEFILE, 'tmp'); // idem
$headers = [
'Keep-Alive: timeout=5, max=100',
'Connection: keep-alive'
];
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$res = curl_exec($ch);
$dom = new DomDocument();
# $dom->loadHTML($res);
$elem = $dom->getElementById("bd_auth_login_type__token");
$token = $elem->getAttribute("value");
/* extract the token value hidden in <input type="hidden" id="bd_auth_login_type__token" name="bd_auth_login_type[_token]" class="form-control" value="yDVyvTXUhIJjnAj9mTfBO3OKgRpI0zLCUZY2BM_O1E8" /> __ FULLY WORKING */
// second part, trying to log in (same URL)
$username = "X";
$password = "X";
curl_setopt($ch, CURLOPT_POST, true); // switching to POST
curl_setopt($ch, CURLOPT_POSTFIELDS, 'bd_auth_login_type[login]='.$username.'&bd_auth_login_type[password]='.$password.'&bd_auto_login_type[submit]=&bd_auth_login_type[token]='.$token);
$res = curl_exec($ch);
echo $res; // result is a CSRF token error, so unable to log in
curl_close($ch);
?>
Working script thanks to #miken32 help !
<?php
$url = "/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0");
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$headers = [
'Keep-Alive: timeout=5, max=100',
'Connection: keep-alive'
];
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$res = curl_exec($ch);
$dom = new DomDocument();
# $dom->loadHTML($res);
$elem = $dom->getElementById("bd_auth_login_type__token");
$token = $elem->getAttribute("value");
$data = array(
"bd_auth_login_type[login]" => "X",
"bd_auth_login_type[password]" => "X",
"bd_auto_login_type[submit]" => "",
"bd_auth_login_type[_token]" => $token
);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
$res = curl_exec($ch);
curl_close($ch);
?>
When i download the html using curl or file_get_contents I don't get the <img scr=....
It's a matter with the fact that images appear after some delay? here is the site: https://www.tumbex.com/memes.tumblr/posts?page=2
and code (first try)
$html = file_get_contents('https://www.tumbex.com/memes.tumblr/posts?page=2');
and code (second try)
$html = get_dataa('https://www.tumbex.com/memes.tumblr/posts?page=2');
echo($html);
function get_dataa($url) {
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
just press F12 on your chrome, then go to Network tab.
there find the api url of tumbex, then copy that with the request header.
if done,you can use curl to that url (api url) to get response..
this my code
<?php
$page = 1; //change number of page here
$url = "https://api.1.tumbex.com/api/tumblr/posts?tumblr=memes&type=posts&page=$page&tag=";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$headers = array(
'Accept: */*',
'Authorization: Bearer 0fae0f237b33e781a6884295b39c6e903484ef1ee3190bd51f07dd9881bdccbd',
'content-type: application/json; charset=UTF-8',
'Referer: https://www.tumbex.com/',
'Accept: application/json, text/plain, */*',
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'x-csrf-token: MVMyb2hLTWtQdEJEYjJ0SER1dEwvZz09',
'x-requested-with: XMLHttpRequest'
);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$res = curl_exec($ch); //result is json
$json = json_decode($res, true);
$edan = $json['response']['posts'];
for($i=0; $i<count($edan); $i++){
$get_post = $edan[$i];
$type = $get_post['detected_type']; //get type
//$get_photo = $get_post['blocks'][0]['content'][0]['hd']; -> get image url
//$get_video = $get_post['blocks'][0]['content'][0]['media']['url']; -> get video url
//$get_text1 = $get_post['blocks'][0]['content'][0]['text']; -> get text 1
//$get_text2 = $get_post['blocks'][0]['content'][1]['text']; -> get text 2
if($type == 'photo'){
$get_photo = $get_post['blocks'][0]['content'][0]['hd'];
echo "<img src='".$get_photo."' height='120' width='160'><br>";
}
}
You must know, the bearer token and x-csrf-token always changingif the result is blank, that means the Bearer token and x-csrf-token has expiredBut, You can solve that manually or use other curl to auto-fetch the bearer token and x-csrf-token..
I want to create App Pinterest with PHP Curl but this is not working. The application I follow the POST process: Postman Interceptor
Request:
curl 'https://developers.pinterest.com/apps/'
-H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0'
-H 'Accept: */*'
-H 'Accept-Language: tr-TR,tr;q=0.8,en-US;q=0.5,en;q=0.3' --compressed
-H 'Content-Type: application/json'
-H 'X-CSRFToken: IjExYmYxYzZlZDgwM2RlMGQxNzEyZjdlZTRkZTk3NmQ0OTliZWQwYTki.XlzcOw.zET53I_suQKC2TbplvhpouiuaAM'
-H 'X-Requested-With: XMLHttpRequest'
-H 'Origin: https://developers.pinterest.com'
-H 'DNT: 1'
-H 'Connection: keep-alive'
-H 'Referer: https://developers.pinterest.com/apps/'
-H 'Cookie: _pinterest_sess=TWc9PSZESDJWQ0dCNmN0TERrZThaSEFLMnhrekhLVVRzaVMwKzBxY3YxaTJmMDdZMS9kR1V4SHB0UVorajRyY21TU1A4TCtXUElEdlpDMGVNd3BCcGhFYXg0Q3JjTm1uK2VxQ2dMTlo1UHJoQ2ZXc2t6YytLV01VYTNqNU5pL0tqQlZtWU9EMjQ0NGh4UG95VC93b1BPM21kRzNWSHlQT0tpaEdIZ2xNWmd1WUlvdUFDaHBrZDVLRHdYdWVUbnN6cEtRV1QrU1pUdzNTbElaZ0Y3VVc5VW81c1ArQzRZam41dEFCTVV6dEFCMngySnZ6dDZWeUNkeXA3cW5ZcGZUZkFGL1o4TjBnMFlCYUUzaEtEL0VwOTZIdkVHWXpQNjJ5NHRZUm5YNk5YU1hWWWgwV0FCWVJHbzFEb2Z3U24zR0FrbzZTcVJPaXZFYzduTCtCc3RPejYrVFFFd2pDOXM0czRpby96V3lzQlpFVzBLdEk1REVDVk0rajgxeGZRNldXUE9sc24xak50S3pPV3BzdWFXdkhEZ3Q5OUhiYlVjNHBPeUNsYkoySVpRMFpnd2QrcjNtNW0waUhZWkk4YjlXakFiTEtTaGJUOXpiRE9CWDJxcEUzLzZWYVNhZ1NmRDJFWTFDenhJZkZVdmoyeGI1WFZ6TldpZWFWeVhDNFozNkFpcEFsM0k3YlZUMkZiTEROZHpoNnFEdjdpdmJxMW14dVFSWnlETkRTUGFrWDNOd1MycUdETGZWYWNMT0JkbEVxS2N4TmhQekZiaE5zSkltWkVtQTZ6STV0Nm5yMzg2R2toREdiTHRPZ0ZlRXVPc1V2R3hIYUMrNjdNWGFOOGcxL1VHOStNWmZBRHFXd3JOZFJhRFVheXY2emVuRUszZVhuZFRkemdMS3hiL05aajd5WFYyMk5YQWR6MmcvYU9Pa1BtUnU1MTVZQjVmbDZyQjkyMDJXSGh1dXBjamFINGhwU2tYZXpRN1Z1UE1zbU83bytadkFnRURQSy9XcEF3d3VodDh5MXcwTU1LblFGbDUwdWk0a1VleC8vQTRlV3lzejNyeWpRc2sralV6dFZ3c0lMYjZjWFNRMTZreVh2R0Q4OHlSWG91JkE3eERJQXpWS1YyUG1GcldkZHhFeXpCdDlyST0=; _auth=1; session=eyJjc3JmX3Rva2VuIjoiMTFiZjFjNmVkODAzZGUwZDE3MTJmN2VlNGRlOTc2ZDQ5OWJlZDBhOSJ9.XlzdMQ.qu4RJUJgbv0UygBZFyPjw6-aGkw; csrftoken=lwBLncs70zm1Y2Xhx9D7M6w6pVZQw6ro; _dev_sess=TWc9PSZXMDN0NjdEK3FQSWw3WFFUaUNHSEdZVFdLYVgxME9WUkZDTlU1UGczUXRUeWpNZEVrUmZIbjFQSFlXK2V1NTBaS3JrNFZCaW1FVnY3WFRhODNVT3pwQ3ljUldQUXFmU1JFcVNuc2orNi9jSGpnRFAwVjlhRDdJUnBRekNGb3FZK21idk9IdFdlVXpJVzJkZnptaGZpRUtSQ2xpTnZPU2ZZMVdNNFY5MEM5cXV5ZUlzelYrM0tEL1JodU9yM3BBaUw2KzV2WEdJa3RJeEJQdTJSRjYrcUlIakFTS3BXRS9aNldUOXM1dmVLK2VYT1I5TENYQ0hSNGpzd21HcE9icU95M3poSVB4RDVyT1BQeUdnUnhjaGdVTFlvNHR4cUl6Zk9uZllmL1g5NEVZRDgzSzNxWjJPZFJjZXdQaVhuWnBkQzFhVUQzMm42NFplb2k5VUFMWTM3bnc9PSZEd0Y2VUlRQVlvOS9NbmMzcFBmaFNzdlQ4cGs9; country-code=TR'
-H 'TE: Trailers' --data '{"name":"test1","description":"test description"}'
Full Codes:
<?php
//preg_match('/csrftoken(.*?)[\b;\s]/i', $headers, $csrf_token);
error_reporting(E_ALL);
ini_set('display_errors', 1);
$appname = "deneme33";
$appdescription = "lorem ipsum deneme";
$username = "talithae.matson_53#hotmail.com";
$password = "SahsZGHx";
// this is the http post data for logging in - username & password are substituted in later
$login_post = array(
'source_url' => '/login/',
'data' => '{"options":{"username_or_email":"%s","password":"%s"},"context":{}}',
'module_path' => 'App()>LoginPage()>Login()>Button(class_name=primary, text=Log In, type=submit, size=large',
);
$create_app = array(
'data' => '{"name":"%s","description":"%s"}',
);
$pinterest_url = 'https://www.pinterest.com/'; //'https://www.pinterest.com/'; // pinterest home url
$developer_url = 'https://developers.pinterest.com/apps/'; // Developer home url
$login_url = $pinterest_url . 'login/'; // pinterest login page url
$login_post_url = $pinterest_url . 'resource/UserSessionResource/create/'; // pinterest login post url
// http headers to send with requests
$httpheaders = array(
'Connection: keep-alive',
'Pragma: no-cache',
'Cache-Control: no-cache',
'Accept-Language: en-US,en;q=0.5',
);
// http headers to send when logging in
$login_header = array(
'X-NEW-APP: 1',
'X-APP-VERSION: 08930e3', // THIS WILL UPDATE FREQUENTLY, CHANGE IT!!!
'X-Requested-With: XMLHttpRequest',
'Accept: application/json, text/javascript, */*; q=0.01');
$developer_header = array(
'Accept: */*',
'Sec-Fetch-Dest: empty',
'X-Requested-With: XMLHttpRequest',
'Content-Type: application/json');
// ----------------------------------------------------------------------------
// request home page to establish cookies and a session, set curl options
$ch = curl_init($pinterest_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_STDERR, fopen('debug.txt', 'w+'));
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpheaders);
$data = curl_exec($ch);
sleep(5);
//echo "Giriş Datası".$data;
// ----------------------------------------------------------------------------
// parse the csrf token out of the cookies to set later when logging in
list($headers, $body) = explode("\r\n\r\n", $data, 2);
$headers = file_get_contents('cookie.txt', true);
//print_r($headers);
preg_match("/csrftoken+(.*?)\n/i",$headers,$matches);
$csrf_token=trim($matches[1]);
preg_match("/session+(.*?)\n/i",$headers,$matches);
$session=trim($matches[1]);
echo "Token:". $csrf_token.PHP_EOL;
echo "Session:". $session.PHP_EOL;
//die();
// next request the login page
curl_setopt($ch, CURLOPT_URL, $login_url);
$data = curl_exec($ch);
//Developer Cookie Get
$ch = curl_init($developer_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_STDERR, fopen('debug.txt', 'w+'));
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpheaders);
$data = curl_exec($ch);
//Developer Cookie Get
//Make APP
$create_app['data'] = sprintf($create_app['data'], $appname, $appdescription);
$post = http_build_query($create_app);
print_r($post);
die();
curl_setopt($ch, CURLOPT_URL, $developer_url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
"Accept: */*",
"Sec-Fetch-Dest: empty",
"X-CSRFToken: ".$csrf_token,
"X-Requested-With: XMLHttpRequest",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
"Content-Type: application/json",
"Cache-Control: no-cache",
"Host: developers.pinterest.com",
"Accept-Encoding: gzip, deflate, br",
"Cookie: session=".$session,
"Connection: keep-alive"
));
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt'); // Cookie aware
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); // Cookie aware
curl_setopt($ch, CURLOPT_HEADER, 1);
$data = curl_exec($ch);
//Make APP
//echo "Baş Header".print_r($developer_header);
echo $data;
die();
$login_header[] = 'X-CSRFToken: ' . $csrf_token;
$login_post['data'] = sprintf($login_post['data'], $username, $password);
$post = http_build_query($login_post);
curl_setopt($ch, CURLOPT_URL, $login_post_url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
curl_setopt($ch, CURLOPT_HTTPHEADER, array_merge($httpheaders, $login_header));
curl_setopt($ch, CURLOPT_REFERER, $login_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
$data = curl_exec($ch);
echo $data;
// ----------------------------------------------------------------------------
// perform login post
//print_r($csrf_token);
// check response and output status
if (curl_getinfo($ch, CURLINFO_HTTP_CODE) != 200) {
echo "Error logging in.<br />";
var_dump(curl_getinfo($ch));
} else {
$response = json_decode($data, true);
if ($response === null) {
echo "Failed to decode JSON response.<br /><br />";
var_dump($response);
} else if ($response['resource_response']['status'] === "success") {
print_r($response['resource_response']);
//print_r($response['resource_response']['data']);
//print_r($response['resource_response']['username']);
echo "Login successful, " . $response['resource_response']['user']['username'] . "<br /><br />";
echo "You have {$response['resource_response']['data']['follower_count']} followers, are following {$response['resource_response']['data']['following_count']} users. You have liked {$response['resource_response']['data']['like_count']} pins.";
}
}
I think there is a problem with the data I sent after //Make APP.
Cookies: https://i.imgur.com/OoyVh4C.jpg
Body: https://i.imgur.com/A02mfGe.jpg
Headers: https://i.imgur.com/Adc8Dmd.jpg
Result: https://i.imgur.com/sXBPQxM.jpg
I might be missing something during the post operation.
What am I missing in xhr post sending?
The URL you are trying to call is not part of their normal REST API it looks like. I believe you are supposed to visit that manually and create your app first. Then work with their API after that.
Source
Did you find documentation from them that allows you to do that, or are you trying to hack a method together to accomplish it?
I know this question has been dealt with on a few occasions but none of the fixes seem to work with my particular problem.
I am trying to grab any page from http://www.lewmar.com but some how they are managing to block all attempts. My latest script is as follows:
function curl_get_contents($url)
{
$ch = curl_init();
$browser_id = "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0";
$ip = $_SERVER["SERVER_ADDR"];
curl_setopt($ch, CURLOPT_USERAGENT, $browser_id);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, $ip);
$headers = array();
$headers[] = 'Cache-Control: max-age=0';
$headers[] = 'Connection: keep-alive';
$headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$headers[] = 'Accept-Language: en-US,en;q=0.5';
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$url = 'http://www.lewmar.com';
$contents = curl_get_contents($url);
echo strlen($contents);
I have tried to replicate most of the headers and the site doesn't seem to check for 'Javascript' compatibility but yet still can't get anything returned.
Does anyone have any idea how they might be recognizing cURL and blocking.
Cheers
When you first visit that site it checks to see if you have a cookie. If you don't, it will send you one and send a redirect (to the same page). You haven't got anything in your code to store cookies so you end up going round in a circle. Curl gives up after 20 redirects. Solution: enable cookies!
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookies.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies2.txt');
I am trying to login to ets.org/toefl account using php curl. But I am unable to login to the website. I usually get an error saying server is busy, but it works when I login using a browser. I have attached my code. Can anyone see what is wrong?
<?php
include('simple_html_dom.php');
$login_url = 'https://toefl-registration.ets.org/TOEFLWeb/logon.do';
$username='****';
$password='***';
$ck = 'cookie.txt';
$agent = 'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0';
// extra headers
$headers[] = "Connection: keep-alive";
//$headers[]= "Accept-Encoding: gzip, deflate";
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $ck);
curl_setopt ($ch, CURLOPT_COOKIEFILE, $ck);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
//curl_setopt($ch, CURLOPT_URL, 'https://toefl-registration.ets.org/TOEFLWebextISERLogonPrompt.do');
$output = curl_exec($ch);
//echo $output;
$html = new simple_html_dom();
$html = str_get_html($output);
$e = $html->find(".loginform");
$a = $e[0]->find('input');
$str = $a[0]->outertext;
preg_match("/value=\"(.*)\"/",$str,$match);
$h_attr = $match[1];
$fields['org.apache.struts.taglib.html.TOKEN'] = $h_attr;
$fields['currentLocale']= 'en_US';
$fields['username'] = $username;
$fields['password'] = $password;
$fields['x'] = 11;
$fields['y'] = 4;
//print_r($fields);
//echo "\r\n";
$POSTFIELDS = http_build_query($fields);
//echo $POSTFIELDS;
$headers[] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$headers[] = "Accept-Language: en-US,en;q=0.5";
$headers[]="Referer: https://toefl-registration.ets.org/TOEFLWeb/extISERLogonPrompt.do";
curl_setopt($ch, CURLOPT_URL, $login_url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS);
$result = curl_exec($ch);
print $result;
(Update from comments)
Post by browser:
org.apache.struts.taglib.html.TOKEN=c1b88957e9914492fe8cc20b33ef1cdd¤tLocale=en_US&username=name&password=pass&x=23&y=3
By me.
org.apache.struts.taglib.html.TOKEN=345a9f935b2db8a69f55c5b4d3372190¤tLocale=en_US&username=name&password=pass&x=11&y=4
Post generated by php curl verbose:
POST /TOEFLWeb/logon.do HTTP/1.1 User-Agent: Mozilla/5.0 (Windows NT
6.1; rv:22.0) Gecko/20100101 Firefox/22.0 Host: toefl-registration.ets.org Cookie: au=MTM3Mjc4ODQwMg%3d%3d; server=3;
JSESSIONID=23C39022E2641B8F5AC944295837315E Connection: keep-alive
Accept: / Accept:
text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8
Accept-Language: en-US,en;q=0.5 Referer:
toefl-registration.ets.org/TOEFLWeb/extISERLogonPrompt.do
Content-Length: 134 Content-Type: application/x-www-form-urlencoded
Try comparing the HTTP headers sent by your CURL script to those headers sent by your browser (use chrome dev tools). Maybe the remote server is refusing you due to some missing header info.
Ensure cookie files have full permissions. From php.net:
When specifing CURLOPT_COOKIEFILE or CURLOPT_COOKIEJAR options, don't
forget to "chmod 777" that directory where cookie-file must be
created.
I got it working somehow... I added certificate verification to the code. Further i found that some delay needs to be present between the two functions get cookie and login. The working code is below
<?php
include('simple_html_dom.php');
$login_url = 'https://toefl-registration.ets.org/TOEFLWeb/logon.do';
$cookie_page = 'https://toefl-registration.ets.org/TOEFLWeb/extISERLogonPrompt.do';
$username='******';
$password='******';
//$ck = 'E:\Projects\Web Development\toefl_script\cookie.txt';
$ck = 'D:\Nikhil\Projects\Wamp\toeflscript\cookie.txt';
//$agent = 'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0';
$agent = 'Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0';
$headers[] = "Connection: keep-alive";
$headers[] = "Accept: */*";
/* Begin Program Execution */
init_curl();
get_cookie();
sleep(30);
login();
function get_cookie()
{
global $ch, $ck, $h_attr, $headers, $cookie_page;
global $ck;
curl_setopt($ch, CURLOPT_URL, $cookie_page);
//curl_setopt($ch, CURLOPT_VERBOSE, true);
$output = curl_exec($ch);
//echo $output;
/*
$html = new simple_html_dom();
$html = str_get_html($output);
$e = $html->find(".loginform");
$a = $e[0]->find('input');
$str = $a[0]->outertext;
preg_match("/value=\"(.*)\"/",$str,$match);
$h_attr = $match[1];
*/
}
function init_curl()
{
global $ch, $ck, $h_attr, $headers, $agent;
global $ck;
ini_set('max_execution_time', 300);
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_CAINFO, getcwd() . '/cacert.pem');
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $ck);
curl_setopt ($ch, CURLOPT_COOKIEFILE, $ck);
}
function login()
{
global $ch, $login_url, $password, $username, $ck, $h_attr, $headers;
//$fields['org.apache.struts.taglib.html.TOKEN'] = 'abc';//$h_attr;
$fields['currentLocale']= 'en_US';
$fields['username'] = $username;
$fields['password'] = $password;
$fields['x'] = 11;
$fields['y'] = 4;
$POSTFIELDS = http_build_query($fields);
//print_r($fields);
//echo $POSTFIELDS;
$headers[] = "Accept-Language: en-US,en;q=0.5";
$headers[]="Referer: https://toefl-registration.ets.org/TOEFLWeb/extISERLogonPrompt.do";
curl_setopt($ch, CURLOPT_URL, $login_url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result = curl_exec($ch);
print $result;
}