Get last modified file from remote VPS with PHP - php

With this code I can get the modification time but I need to get last modified file name so can some one tell me how I can do that?
function filemtime_remote($uri)
{
$uri = parse_url($uri);
$handle = #fsockopen($uri['host'],80);
if(!$handle)
return 0;
fputs($handle,"GET $uri[path] HTTP/1.1\r\nHost: $uri[host]\r\n\r\n");
$result = 0;
while(!feof($handle))
{
$line = fgets($handle,1024);
if(!trim($line))
break;
$col = strpos($line,':');
if($col !== false)
{
$header = trim(substr($line,0,$col));
$value = trim(substr($line,$col+1));
if(strtolower($header) == 'last-modified')
{
$result = strtotime($value);
break;
}
}
}
fclose($handle);
return $result;
}
function get_data($url)
{
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$result = "1";
$html = get_data("http://mysite.ir/api.php?uid=". $result); //the result is r1&r2&r3&r4&....
$matches[1] = explode("&", $html);
if(!empty($matches[1])){
$errorMessage .= "DONE";
//Need Help here
$last_modified_file = filemtime_remote($matches[1]); //its just return modification time i need the last modified file name!!
}

You cannot do that unless the remote server provides a script that allows you to give it a filename and returns the mtime to you fetching it from the local filesystem.
It would IMHO be a disclosure issue if it was possible just like that.

Related

How to recover the redirection url to another page redirected in PHP? [duplicate]

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.
Example:
String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker
If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.
Current code:
function npcID($name) {
$urltopost = "http://www.wowhead.com/search?q=" . $name;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_URL, $urltopost);
curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.
I suspect PHP is returning before the external redirect happens. How can I fix this?
To make cURL follow a redirect, use:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
Erm... I don't think you're actually executing the curl... Try:
curl_exec($ch);
...after setting the options, and before the curl_getinfo() call.
EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
Add this line to curl inizialization
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
and use getinfo before curl_close
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
es:
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch );
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var
if(strpos($headers[$i],"Location:") !== false){
$redir = trim(str_replace("Location:","",$headers[$i]));
break;
}
}
// do whatever you want with the result
echo $redir;
The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).
A bit sloppy, but a couple quick edits to make this a bit smarter are:
function getOriginalURL($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$result = curl_exec($ch);
$httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// if it's not a redirection (3XX), move along
if ($httpStatus < 300 || $httpStatus >= 400)
return $url;
// look for a location: header to find the target URL
if(preg_match('/location: (.*)/i', $result, $r)) {
$location = trim($r[1]);
// if the location is a relative URL, attempt to make it absolute
if (preg_match('/^\/(.*)/', $location)) {
$urlParts = parse_url($url);
if ($urlParts['scheme'])
$baseURL = $urlParts['scheme'].'://';
if ($urlParts['host'])
$baseURL .= $urlParts['host'];
if ($urlParts['port'])
$baseURL .= ':'.$urlParts['port'];
return $baseURL.$location;
}
return $location;
}
return $url;
}
Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.
Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**
This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.
You can build a very strong curl class over it. Add POST functionality, etc.
<?php
class curl {
static private $cookie_file = '';
static private $user_agent = '';
static private $max_redirects = 10;
static private $followlocation_allowed = true;
function __construct()
{
// set a file to store cookies
self::$cookie_file = 'cookies.txt';
// set some general User Agent
self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
{
throw new Exception('Cookie file missing or not writable.');
}
// check for PHP settings that unfits
// correct functioning of CURLOPT_FOLLOWLOCATION
if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
{
self::$followlocation_allowed = false;
}
}
/**
* Main method for GET requests
* #param string $url URI to get
* #return string request's body
*/
static public function get($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// this function is in charge of output request's body
// so DO NOT include HTTP headers
curl_setopt($process, CURLOPT_HEADER, 0);
if (self::$followlocation_allowed)
{
// if PHP settings allow it use AUTOMATIC REDIRECTION
curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects);
}
else
{
curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
}
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
// test for redirection HTTP codes
$code = curl_getinfo($process, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302)
{
curl_close($process);
try
{
// go to extract new Location URI
$location = self::_parse_redirection_header($url);
}
catch (Exception $e)
{
throw $e;
}
// IMPORTANT return
return self::get($location);
}
curl_close($process);
return $return;
}
static function _set_basic_options($process)
{
curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($process, CURLOPT_VERBOSE, 1);
// curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
}
static function _parse_redirection_header($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// NOW we need to parse HTTP headers
curl_setopt($process, CURLOPT_HEADER, 1);
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
curl_close($process);
if ( ! preg_match('#Location: (.*)#', $return, $location))
{
throw new Exception('No Location found');
}
if (self::$max_redirects-- <= 0)
{
throw new Exception('Max redirections reached trying to get: ' . $url);
}
return trim($location[1]);
}
}
You can use:
$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
Lot's of regex here, despite the fact i really like them this way might be more stable to me:
$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);
//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/#href");
$newUrl=$head[0]->nodeValue;
The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

Remove the line that found in array while processes a code PHP

i have the following code which getting some links from remote.
Now the code while processesing and found the hash [if($done==true)] keep sending request to the host even if the hash have been found in the link.
i need the code stop sending requests to the host that we found its hash, and keep working on the other hosts, so it will be exclude all founded hosts from the the second foreach [foreach (file("hosts.txt")] and not send more request to them.
please if any one can help to fix my code .
sorry about bad english.
$MAXPROCESS=50;
$execute=0;
$Arr = array();
foreach (file("hashes.txt") as $hashkey => $hash)
{
$hash = trim($hash);
$pid = pcntl_fork();
$execute++;
if ($execute >= $MAXPROCESS)
{
while (pcntl_waitpid(0, $status) != -1)
{
$status = pcntl_wexitstatus($status);
$execute =0;
//sleep(1);
flush();
//echo " [$ipkey] Child $status completed\n";
}
}
if (!$pid)
{
foreach (file("hosts.txt") as $hostkey => $hosts)
{
$host = trim($hosts);
if(!in_array($host,$Arr))
{
$done = CHECK_URL($host.$hash.'.xml');
if($done==true)
{
$Arr[] = $host;
echo "\n\r\n\r".$host." : Found [$hashkey] ------------\n\r\n\r";
}else{
echo $host." : error [$hashkey]\n";
}
}else{
return false;
}
flush();
}
exit;
}
}
function CHECK_URL($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 5 );
$result = curl_exec($ch);
$response = curl_getinfo($ch);
//if($response['http_code']=="200" || preg_match('/xml/i', $result))
if($response['http_code']=="200")
{
file_put_contents('hash.found.txt', $url."\n", FILE_APPEND);
return true;
}
curl_close($ch); // Close the curl stream
}
.............................

Cannot get an http markup of google [duplicate]

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.
Example:
String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker
If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.
Current code:
function npcID($name) {
$urltopost = "http://www.wowhead.com/search?q=" . $name;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_URL, $urltopost);
curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.
I suspect PHP is returning before the external redirect happens. How can I fix this?
To make cURL follow a redirect, use:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
Erm... I don't think you're actually executing the curl... Try:
curl_exec($ch);
...after setting the options, and before the curl_getinfo() call.
EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
Add this line to curl inizialization
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
and use getinfo before curl_close
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
es:
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch );
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var
if(strpos($headers[$i],"Location:") !== false){
$redir = trim(str_replace("Location:","",$headers[$i]));
break;
}
}
// do whatever you want with the result
echo $redir;
The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).
A bit sloppy, but a couple quick edits to make this a bit smarter are:
function getOriginalURL($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$result = curl_exec($ch);
$httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// if it's not a redirection (3XX), move along
if ($httpStatus < 300 || $httpStatus >= 400)
return $url;
// look for a location: header to find the target URL
if(preg_match('/location: (.*)/i', $result, $r)) {
$location = trim($r[1]);
// if the location is a relative URL, attempt to make it absolute
if (preg_match('/^\/(.*)/', $location)) {
$urlParts = parse_url($url);
if ($urlParts['scheme'])
$baseURL = $urlParts['scheme'].'://';
if ($urlParts['host'])
$baseURL .= $urlParts['host'];
if ($urlParts['port'])
$baseURL .= ':'.$urlParts['port'];
return $baseURL.$location;
}
return $location;
}
return $url;
}
Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.
Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**
This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.
You can build a very strong curl class over it. Add POST functionality, etc.
<?php
class curl {
static private $cookie_file = '';
static private $user_agent = '';
static private $max_redirects = 10;
static private $followlocation_allowed = true;
function __construct()
{
// set a file to store cookies
self::$cookie_file = 'cookies.txt';
// set some general User Agent
self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
{
throw new Exception('Cookie file missing or not writable.');
}
// check for PHP settings that unfits
// correct functioning of CURLOPT_FOLLOWLOCATION
if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
{
self::$followlocation_allowed = false;
}
}
/**
* Main method for GET requests
* #param string $url URI to get
* #return string request's body
*/
static public function get($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// this function is in charge of output request's body
// so DO NOT include HTTP headers
curl_setopt($process, CURLOPT_HEADER, 0);
if (self::$followlocation_allowed)
{
// if PHP settings allow it use AUTOMATIC REDIRECTION
curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects);
}
else
{
curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
}
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
// test for redirection HTTP codes
$code = curl_getinfo($process, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302)
{
curl_close($process);
try
{
// go to extract new Location URI
$location = self::_parse_redirection_header($url);
}
catch (Exception $e)
{
throw $e;
}
// IMPORTANT return
return self::get($location);
}
curl_close($process);
return $return;
}
static function _set_basic_options($process)
{
curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($process, CURLOPT_VERBOSE, 1);
// curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
}
static function _parse_redirection_header($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// NOW we need to parse HTTP headers
curl_setopt($process, CURLOPT_HEADER, 1);
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
curl_close($process);
if ( ! preg_match('#Location: (.*)#', $return, $location))
{
throw new Exception('No Location found');
}
if (self::$max_redirects-- <= 0)
{
throw new Exception('Max redirections reached trying to get: ' . $url);
}
return trim($location[1]);
}
}
You can use:
$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
Lot's of regex here, despite the fact i really like them this way might be more stable to me:
$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);
//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/#href");
$newUrl=$head[0]->nodeValue;
The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

Retrieve Android Market mylibrary with curl

I am trying to retrieve this page using curl in php. This page of course requires you to log in because it displays different apps for each user. I have been following the work done on this page, however am not having much success.
So far, in his example I am able to successfully populate the auth variable with the auth token. In the next step however (Below the comment for logging into Android Market) I run into troubles. The output variable that he says should have a 302 code results in a "The document has moved" page which links me back to the Google log in page.
Here is a pastebin to show exactly what I am trying. http://pastebin.com/9Fs9GWxk
Additionally if anyone knows what steps I need to do after this to actually get the page I need that would be amazing. Thanks
Here is something I came up with today for this question that has been modified to work for you:
<?php
$USERNAME = 'you#gmail';
$PASSWORD = 'yourpasswd';
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
curl_setopt($ch, CURLOPT_COOKIEFILE, COOKIEJAR);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_URL,
'https://accounts.google.com/ServiceLogin?hl=en&continue=https://market.android.com/mylibrary');
$data = curl_exec($ch);
$formFields = getFormFields($data);
$formFields['Email'] = $USERNAME;
$formFields['Passwd'] = $PASSWORD;
unset($formFields['PersistentCookie']);
// var_dump($formFields);
$post_string = '';
foreach($formFields as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
//var_dump($result);
if (preg_match('/^2\d{2}/', curl_getinfo($ch, CURLINFO_HTTP_CODE)) == false) {
die("Login failed");
var_dump(curl_getinfo($ch), $result);
} else {
curl_setopt($ch, CURLOPT_URL, 'https://market.android.com/mylibrary');
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_HTTPGET, true);
$result = curl_exec($ch);
echo $result;
}
function getFormFields($data)
{
if (preg_match('/(<form id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
$inputs = getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form');
}
}
function getInputs($form)
{
$inputs = array();
$elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
if ($elements > 0) {
for($i = 0; $i < $elements; $i++) {
$el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
$name = $name[1];
$value = '';
if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
$value = $value[1];
}
$inputs[$name] = $value;
}
}
}
return $inputs;
}

How can I find where I will be redirected using cURL in PHP?

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.
Example:
String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker
If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.
Current code:
function npcID($name) {
$urltopost = "http://www.wowhead.com/search?q=" . $name;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_URL, $urltopost);
curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.
I suspect PHP is returning before the external redirect happens. How can I fix this?
To make cURL follow a redirect, use:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
Erm... I don't think you're actually executing the curl... Try:
curl_exec($ch);
...after setting the options, and before the curl_getinfo() call.
EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
Add this line to curl inizialization
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
and use getinfo before curl_close
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
es:
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch );
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var
if(strpos($headers[$i],"Location:") !== false){
$redir = trim(str_replace("Location:","",$headers[$i]));
break;
}
}
// do whatever you want with the result
echo $redir;
The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).
A bit sloppy, but a couple quick edits to make this a bit smarter are:
function getOriginalURL($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$result = curl_exec($ch);
$httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// if it's not a redirection (3XX), move along
if ($httpStatus < 300 || $httpStatus >= 400)
return $url;
// look for a location: header to find the target URL
if(preg_match('/location: (.*)/i', $result, $r)) {
$location = trim($r[1]);
// if the location is a relative URL, attempt to make it absolute
if (preg_match('/^\/(.*)/', $location)) {
$urlParts = parse_url($url);
if ($urlParts['scheme'])
$baseURL = $urlParts['scheme'].'://';
if ($urlParts['host'])
$baseURL .= $urlParts['host'];
if ($urlParts['port'])
$baseURL .= ':'.$urlParts['port'];
return $baseURL.$location;
}
return $location;
}
return $url;
}
Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.
Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**
This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.
You can build a very strong curl class over it. Add POST functionality, etc.
<?php
class curl {
static private $cookie_file = '';
static private $user_agent = '';
static private $max_redirects = 10;
static private $followlocation_allowed = true;
function __construct()
{
// set a file to store cookies
self::$cookie_file = 'cookies.txt';
// set some general User Agent
self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
{
throw new Exception('Cookie file missing or not writable.');
}
// check for PHP settings that unfits
// correct functioning of CURLOPT_FOLLOWLOCATION
if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
{
self::$followlocation_allowed = false;
}
}
/**
* Main method for GET requests
* #param string $url URI to get
* #return string request's body
*/
static public function get($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// this function is in charge of output request's body
// so DO NOT include HTTP headers
curl_setopt($process, CURLOPT_HEADER, 0);
if (self::$followlocation_allowed)
{
// if PHP settings allow it use AUTOMATIC REDIRECTION
curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects);
}
else
{
curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
}
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
// test for redirection HTTP codes
$code = curl_getinfo($process, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302)
{
curl_close($process);
try
{
// go to extract new Location URI
$location = self::_parse_redirection_header($url);
}
catch (Exception $e)
{
throw $e;
}
// IMPORTANT return
return self::get($location);
}
curl_close($process);
return $return;
}
static function _set_basic_options($process)
{
curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($process, CURLOPT_VERBOSE, 1);
// curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
}
static function _parse_redirection_header($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// NOW we need to parse HTTP headers
curl_setopt($process, CURLOPT_HEADER, 1);
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
curl_close($process);
if ( ! preg_match('#Location: (.*)#', $return, $location))
{
throw new Exception('No Location found');
}
if (self::$max_redirects-- <= 0)
{
throw new Exception('Max redirections reached trying to get: ' . $url);
}
return trim($location[1]);
}
}
You can use:
$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
Lot's of regex here, despite the fact i really like them this way might be more stable to me:
$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);
//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/#href");
$newUrl=$head[0]->nodeValue;
The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

Categories