There has to be some form of redirect that is happening through java.
If you load this webpage https://btc-e.com/index.php
you will not actually get the webpage if you use curl. you get just a bunch of java. How do i go about getting to the actual HTML so i can start a login process.
I know this website provides an API, but i need a CURL login method, that uses the website and not the API.
here is all the code which i am using
<?php
$curl = new Curl();
$curl->setSsl();
$curl->setCookieFile('whatever_cookie_file.cook');
$page = $curl->get("https://btc-e.com/index.php");
echo $page;
class Curl {
public $curl;
public $manual_follow;
public $redirect_url;
public $cookiefile = null;
public $headers = array();
function Curl($proxy=false) {
$this->curl = curl_init();
$this->headers[] = "Accept: */*;q=0.5, text/javascript, application/javascript, application/ecmascript, application/x-ecmascript";
$this->headers[] = "Cache-Control: max-age=0";
$this->headers[] = "Connection: keep-alive";
$this->headers[] = "Keep-Alive: 300";
$this->headers[] = "Accept-Charset: utf-8;ISO-8859-1;iso-8859-2;q=0.7,*;q=0.7";
$this->headers[] = "Accept-Language: en-us,en;q=0.5";
$this->headers[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14 (.NET CLR 3.5.30729)');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($this->curl, CURLOPT_VERBOSE, false);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($this->curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
if($proxy != false){
curl_setopt($this->curl, CURLOPT_PROXY,$proxy);
}// end if proxy != false
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')){
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
$this->manual_follow = true;
}
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_TIMEOUT, 30);
$this->setRedirect();
}
function addHeader($header){
$this->headers[] = $header;
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function header($val){
curl_setopt($this->curl, CURLOPT_HEADER, $val);
}
function noAjax(){
foreach($this->headers as $key => $val){
if ($val == "X-Requested-With: XMLHttpRequest"){
unset($this->headers[$key]);
}
}
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setAjax(){
$this->headers[] = "X-Requested-With: XMLHttpRequest";
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setSsl($username = null, $password = null){
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($this->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
if ($username && $password){
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
}
function setBasicAuth($username,$password){
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
function setCookieFile($file){
if (file_exists($file)) {
} else {
$handle = fopen($file, 'w+') or print('The cookie file could not be opened. Make sure this directory has the correct permissions');
fclose($handle);
}
curl_setopt($this->curl, CURLOPT_COOKIESESSION, true);
curl_setopt($this->curl, CURLOPT_COOKIEJAR, $file);
curl_setopt($this->curl, CURLOPT_COOKIEFILE, $file);
$this->cookiefile = $file;
}
function getCookies(){
$contents = file_get_contents($this->cookiefile);
$cookies = array();
if ($contents){
$lines = explode("\n",$contents);
if (count($lines)){
foreach($lines as $key=>$val){
$tmp = explode("\t",$val);
if (count($tmp)>3){
$tmp[count($tmp)-1] = str_replace("\n","",$tmp[count($tmp)-1]);
$tmp[count($tmp)-1] = str_replace("\r","",$tmp[count($tmp)-1]);
$cookies[$tmp[count($tmp)-2]]=$tmp[count($tmp)-1];
}
}
}
}
return $cookies;
}
function setDataMode($val){
curl_setopt($this->curl, CURLOPT_BINARYTRANSFER, $val);
}
function close() {
curl_close($this->curl);
}
function getInfo(){
return curl_getinfo($this->curl);
}
function getInstance() {
static $instance;
if (!isset($instance)) {
$curl = new Curl;
$instance = array($curl);
}
return $instance[0];
}
function setTimeout($connect, $transfer) {
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $connect);
curl_setopt($this->curl, CURLOPT_TIMEOUT, $transfer);
}
function getError() {
return curl_errno($this->curl) ? curl_error($this->curl) : false;
}
function disableRedirect() {
$this->setRedirect(false);
}
function setRedirect($enable = true) {
if ($enable) {
$this->manual_follow = !curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, false);
$this->manual_follow = false;
}
}
function getHttpCode() {
return curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
}
function makeQuery($data) {
if (is_array($data)) {
$fields = array();
foreach ($data as $key => $value) {
$fields[] = $key . '=' . urlencode($value);
}
$fields = implode('&', $fields);
} else {
$fields = $data;
}
return $fields;
}
// FOLLOWLOCATION manually if we need to
function maybeFollow($page) {
if (strpos($page, "\r\n\r\n") !== false) {
list($headers, $page) = explode("\r\n\r\n", $page, 2);
}
$code = $this->getHttpCode();
if ($code > 300 && $code < 310) {
$info = $this->getInfo();
preg_match("#Location: ?(.*)#i", $headers, $match);
$this->redirect_url = trim($match[1]);
if (substr_count($this->redirect_url,"http://") == 0 && isset($info['url']) && substr_count($info['url'],"http://")){
$url_parts = parse_url($info['url']);
if (isset($url_parts['host']) && $url_parts['host']){
$this->redirect_url = "http://".$url_parts['host'].$this->redirect_url;
}
}
if ($this->manual_follow) {
return $this->get($this->redirect_url);
}
} else {
$this->redirect_url = '';
}
return $page;
}
function plainPost($url,$data){
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $data);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function post($url, $data) {
$fields = $this->makeQuery($data);
//var_dump($fields);
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function get($url, $data = null) {
curl_setopt($this->curl, CURLOPT_FRESH_CONNECT, false);
if (!is_null($data)) {
$fields = $this->makeQuery($data);
$url .= '?' . $fields;
}
curl_setopt($this->curl, CURLOPT_URL, $url);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
return $this->maybeFollow($page);
}
}
?>
The answer to this question was not specifically with curl.
Due to very simple cookie verification of the website this is why i was unable to load the initial webpage.
To solve this problem simply parse out all needed values from the initially loaded webpage.
Once you have all needed values just simply write a cookie with everything included. After The cookie is passed to server you are now allowed to see the content of the webpage.
To solve this problem further and to do more advanced java script manipulation a system such as phantomjs with casperjs and or using a solution such as Selenium with PHP_unit headless mode.
Hope this helps anyone who faced the same problem
Related
I have spend much of time on it, but did not found any working solution ...
I have tried the following code .. but always else case is running "didnt find login form1"
I have tried another coders11 inplemented api but it was also deprecated...
I found many other solutions but not in php ... I am looking for solution in php...
class googleAlerts{
public function createAlert($alert){
$USERNAME = 'XXXXXX#gmail.com';
$PASSWORD = 'YYYYYY';
$COOKIEFILE = 'cookies.txt';
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_URL,
'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
$data = curl_exec($ch);
$formFields = $this->getFormFields($data);
$formFields['Email'] = $USERNAME;
$formFields['Passwd'] = $PASSWORD;
unset($formFields['PersistentCookie']);
$post_string = '';
foreach($formFields as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
if (strpos($result, '<title>') === false) {
return false;
} else {
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_POSTFIELDS, null);
$result = curl_exec($ch);
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
curl_setopt($ch, CURLOPT_POST, 0);
$result = curl_exec($ch);
//var_dump($result);
$result = $this->getFormFieldsCreate($result);
$result['q'] = $alert;
$result['t'] = '7';
$result['f'] = '1';
$result['l'] = '0';
$result['e'] = 'feed';
unset($result['PersistentCookie']);
$post_string = '';
foreach($result as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
$result = curl_exec($ch);
if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
} else {
return false;
}
}
}
private function getFormFields($data)
{
if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
$inputs = $this->getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form');
}
}
private function getFormFieldsCreate($data)
{
if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
$inputs = $this->getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form1');
}
}
private function getInputs($form)
{
$inputs = array();
$elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
if ($elements > 0) {
for($i = 0; $i < $elements; $i++) {
$el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
$name = $name[1];
$value = '';
if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
$value = $value[1];
}
$inputs[$name] = $value;
}
}
}
return $inputs;
}
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');```
You can't login into google alerts with password and email anymore, you would have to pre-create cookies by login into google alerts and copying them out of the dev console and then passing them as argument when doing a curl request. Check out my google alerts api i have written in php. Maybe that helps you out https://github.com/Trivo25/google-alerts-api-php
I have a form which uses curl to submit the apiKey to my server and then the script on my server verify the key and returns true and false. but instead of response. I'm getting Trying to access array offset on value of type null. I want to know How to get response from my server after curl submission.
Curl Submit
$post['apiKey'] = $apiKey;
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_URL,"https://www.pawnhost.com/phevapi/verify_api.php");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
$json = curl_exec($ch);
$response = json_decode($json, true);
Server Script
<?php
define("ERROR_HEADER_URL", "Location: " . $_SERVER['HTTP_REFERER'] . "?error=");
require("includes/initialize.php");
if ($_SERVER['REQUEST_METHOD'] != 'POST') header(ERROR_HEADER_URL . "invalidRequest");
$postParams = allowedPOSTParams($allowed_params=['apiKey']);
if (!isset($postParams['apiKey'])) header(ERROR_HEADER_URL . "verficationFailed");
$apiKey = escape($postParams['apiKey']);
if (isInputEmpty($apiKey)) {
header(ERROR_HEADER_URL . "emptyFields");
} elseif (!$apiKey == 25) {
header(ERROR_HEADER_URL . urlencode("invalidKey"));
} else {
$response = [];
if (getApiKeyUserDetails($apiKey, $connection)) {
if (getApiKeyUserDetails($apiKey, $connection)['apiKeyUsed'] > 0) {
$response['success'] = false;
$response['error'] = 'apiKeyUsed';
} else {
makeApiKeyUsed($apiKey, $connection);
$response['success'] = true;
}
} else {
$response['success'] = false;
$response['error'] = 'invalidApiKey';
}
return json_encode($response);
}
Allowed Post Params Function:
function allowedPOSTParams($allowed_params=[]) {
$allowed_array = [];
foreach ($allowed_params as $param) {
if (isset($_POST[$param])) {
$allowed_array[$param] = $_POST[$param];
} else {
$allowed_array[$param] = NULL;
}
}
return $allowed_array;
}
Replace
curl_setopt($ch, CURLOPT_POSTFIELDS, $apiKey);
with
curl_setopt($ch, CURLOPT_POSTFIELDS, array('apiKey'=>$apiKey));
Then, you will able to find apiKey as POST parameter.
Is it currently supported that I send toast push notifications to a Windows 8 store app using php server side script like on Windows Phone.
I was able to find the below php code to send push notifications to a Windows 8 store app but doesn't seem to support toast notifications.
<?php
class WPNTypesEnum{
const Toast = 'wns/toast';
const Badge = 'wns/badge';
const Tile = 'wns/tile';
const Raw = 'wns/raw';
}
class WPNResponse{
public $message = '';
public $error = false;
public $httpCode = '';
function __construct($message, $httpCode, $error = false){
$this->message = $message;
$this->httpCode = $httpCode;
$this->error = $error;
}
}
class WPN{
private $access_token = '';
private $sid = '';
private $secret = '';
function __construct($sid, $secret){
$this->sid = $sid;
$this->secret = $secret;
}
private function get_access_token(){
if($this->access_token != ''){
return;
}
$str = "grant_type=client_credentials&client_id=$this->sid&client_secret=$this->secret&scope=notify.windows.com";
$url = "https://login.live.com/accesstoken.srf";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/x-www-form-urlencoded'));
curl_setopt($ch, CURLOPT_POSTFIELDS, "$str");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($ch);
curl_close($ch);
$output = json_decode($output);
if(isset($output->error)){
throw new Exception($output->error_description);
}
$this->access_token = $output->access_token;
}
public function build_tile_xml($title, $img){
return '<?xml version="1.0" encoding="utf-16"?>'.
'<tile>'.
'<visual lang="en-US">'.
'<binding template="TileWideImageAndText01">'.
'<image id="1" src="'.$img.'"/>'.
'<text id="1">'.$title.'</text>'.
'</binding>'.
'</visual>'.
'</tile>';
}
public function post_tile($uri, $xml_data, $type = WPNTypesEnum::Tile, $tileTag = ''){
if($this->access_token == ''){
$this->get_access_token();
}
$headers = array('Content-Type: text/xml', "Content-Length: " . strlen($xml_data), "X-WNS-Type: $type", "Authorization: Bearer $this->access_token");
if($tileTag != ''){
array_push($headers, "X-WNS-Tag: $tileTag");
}
$ch = curl_init($uri);
# Tiles: http://msdn.microsoft.com/en-us/library/windows/apps/xaml/hh868263.aspx
# http://msdn.microsoft.com/en-us/library/windows/apps/hh465435.aspx
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_POSTFIELDS, "$xml_data");
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($ch);
$response = curl_getinfo( $ch );
curl_close($ch);
$code = $response['http_code'];
if($code == 200){
return new WPNResponse('Successfully sent message', $code);
}
else if($code == 401){
$this->access_token = '';
return $this->post_tile($uri, $xml_data, $type, $tileTag);
}
else if($code == 410 || $code == 404){
return new WPNResponse('Expired or invalid URI', $code, true);
}
else{
return new WPNResponse('Unknown error while sending message', $code, true);
}
}
}
?>
I'm using CURL to scrape the html from url's. It works great in 80% of the urls I use. But some url's don't seem "scrapeable". For example, when I try to scrape http://www.thefancy.com , it doesn't work. the website keeps loading and at the end it doesn't return a result. the problem is testable at: http://www.itemmized.com/test/test/ this is my code:
if($_POST['submit']) {
function curl_exec_follow($ch, &$maxredirect = null) {
$mr = $maxredirect === null ? 5 : intval($maxredirect);
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
if ($mr > 0)
{
$original_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$newurl = $original_url;
$rch = curl_copy_handle($ch);
curl_setopt($rch, CURLOPT_HEADER, true);
curl_setopt($rch, CURLOPT_NOBODY, true);
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
do
{
curl_setopt($rch, CURLOPT_URL, $newurl);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)\n/', $header, $matches);
$newurl = trim(array_pop($matches));
// if no scheme is present then the new url is a
// relative path and thus needs some extra care
if(!preg_match("/^https?:/i", $newurl)){
$newurl = $original_url . $newurl;
}
} else {
$code = 0;
}
}
} while ($code && --$mr);
curl_close($rch);
if (!$mr)
{
if ($maxredirect === null)
trigger_error('Too many redirects.', E_USER_WARNING);
else
$maxredirect = 0;
return false;
}
curl_setopt($ch, CURLOPT_URL, $newurl);
}
}
return curl_exec($ch);
}
$ch = curl_init($_POST['form_url']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec_follow($ch);
curl_close($ch);
echo $data;
Try this.. hope this helps...
<?php
class Curl
{
public $cookieJar = "";
public function __construct($cookieJarFile = 'cookies.txt') {
$this->cookieJar = $cookieJarFile;
}
function setup()
{
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($this->curl,CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($this->curl,CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($this->curl,CURLOPT_AUTOREFERER, true);
curl_setopt($this->curl,CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl,CURLOPT_RETURNTRANSFER, true);
}
function get($url)
{
$this->curl = curl_init($url);
$this->setup();
return $this->request();
}
function getAll($reg,$str)
{
preg_match_all($reg,$str,$matches);
return $matches[1];
}
function postForm($url, $fields, $referer='')
{
$this->curl = curl_init($url);
$this->setup();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_REFERER, $referer);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
return $this->request();
}
function getInfo($info)
{
$info = ($info == 'lasturl') ? curl_getinfo($this->curl, CURLINFO_EFFECTIVE_URL) : curl_getinfo($this->curl, $info);
return $info;
}
function request()
{
return curl_exec($this->curl);
}
}
{
$curl = new Curl();
$html = $curl->get("http://www.thefancy.com");
echo "$html";
}
?>
Probably you're unable to scrape http://www.thefancy.com because every time you reach the bottom of the page new content is loading so actually you are trying to get an enormous amount of information with the cUrl probably that's where the problem is. You just get a timeout try setting the timeout in php.ini with a larger number and give it a try again. Probably its gona take a while to load but I think this way it's going to work.
I am trying to retrieve this page using curl in php. This page of course requires you to log in because it displays different apps for each user. I have been following the work done on this page, however am not having much success.
So far, in his example I am able to successfully populate the auth variable with the auth token. In the next step however (Below the comment for logging into Android Market) I run into troubles. The output variable that he says should have a 302 code results in a "The document has moved" page which links me back to the Google log in page.
Here is a pastebin to show exactly what I am trying. http://pastebin.com/9Fs9GWxk
Additionally if anyone knows what steps I need to do after this to actually get the page I need that would be amazing. Thanks
Here is something I came up with today for this question that has been modified to work for you:
<?php
$USERNAME = 'you#gmail';
$PASSWORD = 'yourpasswd';
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
curl_setopt($ch, CURLOPT_COOKIEFILE, COOKIEJAR);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_URL,
'https://accounts.google.com/ServiceLogin?hl=en&continue=https://market.android.com/mylibrary');
$data = curl_exec($ch);
$formFields = getFormFields($data);
$formFields['Email'] = $USERNAME;
$formFields['Passwd'] = $PASSWORD;
unset($formFields['PersistentCookie']);
// var_dump($formFields);
$post_string = '';
foreach($formFields as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
//var_dump($result);
if (preg_match('/^2\d{2}/', curl_getinfo($ch, CURLINFO_HTTP_CODE)) == false) {
die("Login failed");
var_dump(curl_getinfo($ch), $result);
} else {
curl_setopt($ch, CURLOPT_URL, 'https://market.android.com/mylibrary');
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_HTTPGET, true);
$result = curl_exec($ch);
echo $result;
}
function getFormFields($data)
{
if (preg_match('/(<form id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
$inputs = getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form');
}
}
function getInputs($form)
{
$inputs = array();
$elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
if ($elements > 0) {
for($i = 0; $i < $elements; $i++) {
$el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
$name = $name[1];
$value = '';
if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
$value = $value[1];
}
$inputs[$name] = $value;
}
}
}
return $inputs;
}