PHP cURL using my IP address instead of the Server? - php

I want to make a cURL request to a private page where it automatically checks my IP/language to deliver content in that language.
$url = 'https://example.com/';
$ch = curl_init($url);
if ($ch === false)
{
throw new CurlException('Curl Init return `false`.');
}
if ( ! ini_get('open_basedir'))
{
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
}
if (isset($options['curlHeaders']))
{
curl_setopt($ch, CURLOPT_HTTPHEADER, $options['curlHeaders']);
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36');
curl_setopt($ch, CURLOPT_URL, $url);
$content = curl_exec($ch);
if ($content === false)
{
// there was a problem
$error = curl_error($ch);
throw new CurlException('Error retrieving "'.$url.'" ('.$error.')');
}
elseif ($content === true)
{
throw new CurlException('Unexpected return value of content set to true.');
}
return $content;
The problem is that the content I get back is always in the same language as my public IP address is. Let's say I want the returned content in English, but it loads in French. The Locale on my computer is in English, the only thing that is French is my IP address.
I have tried it locally and uploaded the code to a server and used a VPN also. I got the same results though.
What am I missing here? Is there any caching when sending cURL requests?

So it turned out I had to set a language header on the cURL settings which solved this issue completely:
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Accept-Language: en-US;q=0.8,en;q=0.4"));

Related

How to recover the redirection url to another page redirected in PHP? [duplicate]

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.
Example:
String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker
If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.
Current code:
function npcID($name) {
$urltopost = "http://www.wowhead.com/search?q=" . $name;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_URL, $urltopost);
curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.
I suspect PHP is returning before the external redirect happens. How can I fix this?
To make cURL follow a redirect, use:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
Erm... I don't think you're actually executing the curl... Try:
curl_exec($ch);
...after setting the options, and before the curl_getinfo() call.
EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
Add this line to curl inizialization
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
and use getinfo before curl_close
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
es:
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch );
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var
if(strpos($headers[$i],"Location:") !== false){
$redir = trim(str_replace("Location:","",$headers[$i]));
break;
}
}
// do whatever you want with the result
echo $redir;
The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).
A bit sloppy, but a couple quick edits to make this a bit smarter are:
function getOriginalURL($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$result = curl_exec($ch);
$httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// if it's not a redirection (3XX), move along
if ($httpStatus < 300 || $httpStatus >= 400)
return $url;
// look for a location: header to find the target URL
if(preg_match('/location: (.*)/i', $result, $r)) {
$location = trim($r[1]);
// if the location is a relative URL, attempt to make it absolute
if (preg_match('/^\/(.*)/', $location)) {
$urlParts = parse_url($url);
if ($urlParts['scheme'])
$baseURL = $urlParts['scheme'].'://';
if ($urlParts['host'])
$baseURL .= $urlParts['host'];
if ($urlParts['port'])
$baseURL .= ':'.$urlParts['port'];
return $baseURL.$location;
}
return $location;
}
return $url;
}
Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.
Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**
This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.
You can build a very strong curl class over it. Add POST functionality, etc.
<?php
class curl {
static private $cookie_file = '';
static private $user_agent = '';
static private $max_redirects = 10;
static private $followlocation_allowed = true;
function __construct()
{
// set a file to store cookies
self::$cookie_file = 'cookies.txt';
// set some general User Agent
self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
{
throw new Exception('Cookie file missing or not writable.');
}
// check for PHP settings that unfits
// correct functioning of CURLOPT_FOLLOWLOCATION
if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
{
self::$followlocation_allowed = false;
}
}
/**
* Main method for GET requests
* #param string $url URI to get
* #return string request's body
*/
static public function get($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// this function is in charge of output request's body
// so DO NOT include HTTP headers
curl_setopt($process, CURLOPT_HEADER, 0);
if (self::$followlocation_allowed)
{
// if PHP settings allow it use AUTOMATIC REDIRECTION
curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects);
}
else
{
curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
}
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
// test for redirection HTTP codes
$code = curl_getinfo($process, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302)
{
curl_close($process);
try
{
// go to extract new Location URI
$location = self::_parse_redirection_header($url);
}
catch (Exception $e)
{
throw $e;
}
// IMPORTANT return
return self::get($location);
}
curl_close($process);
return $return;
}
static function _set_basic_options($process)
{
curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($process, CURLOPT_VERBOSE, 1);
// curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
}
static function _parse_redirection_header($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// NOW we need to parse HTTP headers
curl_setopt($process, CURLOPT_HEADER, 1);
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
curl_close($process);
if ( ! preg_match('#Location: (.*)#', $return, $location))
{
throw new Exception('No Location found');
}
if (self::$max_redirects-- <= 0)
{
throw new Exception('Max redirections reached trying to get: ' . $url);
}
return trim($location[1]);
}
}
You can use:
$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
Lot's of regex here, despite the fact i really like them this way might be more stable to me:
$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);
//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/#href");
$newUrl=$head[0]->nodeValue;
The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

why am i getting Forbidden when i try to use curl in php file

I m trying to integrate a payment method on a website, the first thing I did, I tried a curl code to test it using git console and it works just fine, then I tried to execute the curl command using PHP. I created a file then I used this code:
<?php
$endpoint_url = 'https://secure.payinspect.com';
$params = [
'action'=>'SALE',
'order_id'=>'ORDER12345',
'order_amount'=>'1.99',
'order_currency'=>'USD',
'order_description'=>'Product',
'card_number'=>'4111111111111111',
'card_exp_month'=>'05',
'card_exp_year'=>'2020',
'card_cvv2'=>'000',
'payer_first_name'=>'John',
'payer_last_name'=>'Doe',
'payer_address'=>'BigStreet',
'payer_country'=>'US',
'payer_state'=>'CA',
'payer_city'=>'City',
'payer_zip'=>'123456',
'payer_email'=>'doe#example',
'payer_phone'=>'199999999',
'payer_ip'=>'123.123.123.123',
'term_url_3ds'=>'http://client.site.com/return.php',
'recurring_init'=>'N',
'hash'=>'e3dd86f469f40a5cfedf96a82ff257af'
];
$buff = [];
foreach ($params as $k => $v) {
array_push($buff, "{$k}={$v}");
}
$url = $endpoint_url . implode('&', $buff);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_close($ch);
if ($result===false){ print curl_error($curl); }
$response = json_decode($result, true);
echo $result;
?>
but I got this error :
Forbidden
You don't have permission to access
I googled for this error and I tried to add this line
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36');
$result = curl_exec($ch);
but i still got the same error . so what causes this problem and how cauld i fix it
Assuming everything else is correct - this might fix the problem.
<?php
$endpoint_url = 'https://secure.payinspect.com';
$params = [
'action'=>'SALE',
'order_id'=>'ORDER12345',
'order_amount'=>'1.99',
'order_currency'=>'USD',
'order_description'=>'Product',
'card_number'=>'4111111111111111',
'card_exp_month'=>'05',
'card_exp_year'=>'2020',
'card_cvv2'=>'000',
'payer_first_name'=>'John',
'payer_last_name'=>'Doe',
'payer_address'=>'BigStreet',
'payer_country'=>'US',
'payer_state'=>'CA',
'payer_city'=>'City',
'payer_zip'=>'123456',
'payer_email'=>'doe#example',
'payer_phone'=>'199999999',
'payer_ip'=>'123.123.123.123',
'term_url_3ds'=>'http://client.site.com/return.php',
'recurring_init'=>'N',
'hash'=>'e3dd86f469f40a5cfedf96a82ff257af'
];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $endpoint_url);
// -- this sets the request method to POST ----
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($params));
// --- end ----
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_close($ch);
if ($result===false) { print curl_error($curl); }
$response = json_decode($result, true);
echo $result;
I can't say for certain (which means this isn't a great answer) but by suffixing the parameters to the URL, in the way you're doing currently, you're creating a GET request rather than a POST one.
It's quite likely that the receiving service is expecting to see your hash value (and everything else) in the POST data - and as it doesn't see it there, it rejects your request completely.

Final Effective URL - PHP (Proxy)

I apologize in advance for my English. I have small problem.
I want to get Final Effective URL from page
streamuj.tv/video/00e276bf5841bf77c8de?streamuj=original&authorize=ac13bb77d3d863ca362315b9b4dcdf3e
When you put a link into the browser gives me to .flv file
But when I put it through PHP gives me s3.streamuj.tv/unauthorized.flv
When I try it through this: getlinkinfo.com/info?link=http%3A%2F%2Fwww.streamuj.tv%2Fvideo%2F00e276bf5841bf77c8de%3Fstreamuj%3Doriginal%26authorize%3Dac13bb77d3d863ca362315b9b4dcdf3e&x=49&y=11
So everything is fine indicates that
s4.streamuj.tv:8080/vid/d0fe77e1020b6414a16aa5316c759add/58aaf1dd/00e276bf5841bf77c8de_hd.flv?start=0
My PHP CODE:
<?php
session_start();
include "simple_html_dom.php";
$proxy = array("189.3.93.114:8080");
$proxyNum = 0;
$proxy = explode(':', $proxy[$proxyNum]);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, 'http://www.streamuj.tv/video/00e276bf5841bf77c8de?streamuj=original&authorize=ac13bb77d3d863ca362315b9b4dcdf3e');
curl_setopt($curl, CURLOPT_FILETIME, true);
curl_setopt($curl, CURLOPT_NOBODY, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($curl, CURLOPT_PROXY, $proxy[0]);
curl_setopt($curl, CURLOPT_PROXYPORT, $proxy[1]);
$header = curl_exec($curl);
$info = curl_getinfo($curl);
curl_close($curl);
$u1 = $info['url'];
echo "u1: $u1</br>";
$u2 = str_replace("flv?start=0","flv",$u1);
echo $u2;
?>
Where is the problem? Why it makes unauthorized.flv?
Solution
Server was checking client legitimacy via user-agent HTTP header parameter.
Using custom user-agent solved the problem.
curl_setopt($curl, CURLOPT_HTTPHEADER, array( 'user-agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2950.0 Iron Safari/537.36' ));
Original post:
Most likely the generated flv URL is not pointing to static place. It
probably uses sessionID + cookie / verifies IP (one of these, or
both).
Without knowing what header you have to request with via CURL, you
probably won't get a relevant response.

Jquery >> PHP >> cURL and browser refresh

I have a problem with one website i wrote a few weeks ago.
my website communicates with another website_2 via API hosted on website_2
the curl operation is requested via Query POST to a PHP file.
if for some reason the operation took a longer time (which i can't determine the reason for) and the user hits refresh.. the command sent to the API is done yet my server doesn't get any result so can't log or do anything with that result..
is there a way to preserve the integrity of such transaction?
below is my code and i still get FAILED no matter what the result was on website_2
function doCommit($url_)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url_);
curl_setopt($ch, CURLOPT_USERAGENT, 'Opera/9.23 (Windows NT 5.1; U; en)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_TIMEOUT,5);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,5);
$commit = curl_exec($ch);
curl_close($ch);
if(!curl_exec($ch))
{
$ERROR="<Transaction>
<Result>Failed</Result>
<Reason>Operation Timed Out</Reason>
</Transaction>";
$oXML = new SimpleXMLElement($ERROR);
return $oXML;
}
else{
$oXML = new SimpleXMLElement($commit);
return $oXML;
}
// return $oXML->Reason;
}
You can use curl parameters to solve your problem setting a "request timeout":
CURLOPT_TIMEOUT - Sets The number of seconds to wait before a curl individual function timeouts.
CURLOPT_CONNECTTIMEOUT - Sets the maximum time before curl connection timeouts.
...and then you could return a text if curl_exec fails:
if(curl_exec($curl) === false)
{
echo 'ERROR: ' . curl_error($curl);
}
Solved it by the following code
function doCommit($url_)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url_);
curl_setopt($ch, CURLOPT_USERAGENT, 'Opera/9.23 (Windows NT 5.1; U; en)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_TIMEOUT,2);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,2);
$commit = curl_exec($ch);
curl_close($ch);
if(curl_errno($ch) == 0)
{
$oXML = new SimpleXMLElement($commit);
return $oXML;
}
else{
$ERROR="<Transaction>
<Result>Failed</Result>
<Reason>Operation Timed Out</Reason>
</Transaction>";
$oXML = new SimpleXMLElement($ERROR);
return $oXML;
}
// return $oXML->Reason;
}

How can I find where I will be redirected using cURL in PHP?

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.
Example:
String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker
If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.
Current code:
function npcID($name) {
$urltopost = "http://www.wowhead.com/search?q=" . $name;
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_URL, $urltopost);
curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.
I suspect PHP is returning before the external redirect happens. How can I fix this?
To make cURL follow a redirect, use:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
Erm... I don't think you're actually executing the curl... Try:
curl_exec($ch);
...after setting the options, and before the curl_getinfo() call.
EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
Add this line to curl inizialization
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
and use getinfo before curl_close
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
es:
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch );
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var
if(strpos($headers[$i],"Location:") !== false){
$redir = trim(str_replace("Location:","",$headers[$i]));
break;
}
}
// do whatever you want with the result
echo $redir;
The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).
A bit sloppy, but a couple quick edits to make this a bit smarter are:
function getOriginalURL($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$result = curl_exec($ch);
$httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// if it's not a redirection (3XX), move along
if ($httpStatus < 300 || $httpStatus >= 400)
return $url;
// look for a location: header to find the target URL
if(preg_match('/location: (.*)/i', $result, $r)) {
$location = trim($r[1]);
// if the location is a relative URL, attempt to make it absolute
if (preg_match('/^\/(.*)/', $location)) {
$urlParts = parse_url($url);
if ($urlParts['scheme'])
$baseURL = $urlParts['scheme'].'://';
if ($urlParts['host'])
$baseURL .= $urlParts['host'];
if ($urlParts['port'])
$baseURL .= ':'.$urlParts['port'];
return $baseURL.$location;
}
return $location;
}
return $url;
}
Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.
Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**
This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.
You can build a very strong curl class over it. Add POST functionality, etc.
<?php
class curl {
static private $cookie_file = '';
static private $user_agent = '';
static private $max_redirects = 10;
static private $followlocation_allowed = true;
function __construct()
{
// set a file to store cookies
self::$cookie_file = 'cookies.txt';
// set some general User Agent
self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
{
throw new Exception('Cookie file missing or not writable.');
}
// check for PHP settings that unfits
// correct functioning of CURLOPT_FOLLOWLOCATION
if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
{
self::$followlocation_allowed = false;
}
}
/**
* Main method for GET requests
* #param string $url URI to get
* #return string request's body
*/
static public function get($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// this function is in charge of output request's body
// so DO NOT include HTTP headers
curl_setopt($process, CURLOPT_HEADER, 0);
if (self::$followlocation_allowed)
{
// if PHP settings allow it use AUTOMATIC REDIRECTION
curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects);
}
else
{
curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
}
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
// test for redirection HTTP codes
$code = curl_getinfo($process, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302)
{
curl_close($process);
try
{
// go to extract new Location URI
$location = self::_parse_redirection_header($url);
}
catch (Exception $e)
{
throw $e;
}
// IMPORTANT return
return self::get($location);
}
curl_close($process);
return $return;
}
static function _set_basic_options($process)
{
curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($process, CURLOPT_VERBOSE, 1);
// curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
// curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
}
static function _parse_redirection_header($url)
{
$process = curl_init($url);
self::_set_basic_options($process);
// NOW we need to parse HTTP headers
curl_setopt($process, CURLOPT_HEADER, 1);
$return = curl_exec($process);
if ($return === false)
{
throw new Exception('Curl error: ' . curl_error($process));
}
curl_close($process);
if ( ! preg_match('#Location: (.*)#', $return, $location))
{
throw new Exception('No Location found');
}
if (self::$max_redirects-- <= 0)
{
throw new Exception('Max redirections reached trying to get: ' . $url);
}
return trim($location[1]);
}
}
You can use:
$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
Lot's of regex here, despite the fact i really like them this way might be more stable to me:
$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);
//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/#href");
$newUrl=$head[0]->nodeValue;
The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

Categories