I am scraping content from a url that is hosted in the UK using curl. When i view the site in my browser from the US it shows the product pricing in dollars but when i use curl to retrieve content it returns in Euros. I need it to return in US dollars as if you were viewing it from a browser in the US. Below is the code I am using
function LoadCURLPage($url, $agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-us; rv:1.4)
Gecko/20030624 Netscape/7.1 (ax)",
$cookie = '', $referer = '', $post_fields = '', $return_transfer = 1,
$follow_location = 1, $ssl = '', $curlopt_header = 1)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
if($ssl)
{
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
}
curl_setopt ($ch, CURLOPT_HEADER, $curlopt_header);
curl_setopt ($ch, CURLOPT_HTTPHEADER,array('User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16', 'Accept-language: en-us,en;q=0.7,bn;q=0.3', 'Accept-charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7'));
if($agent)
{
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
}
if($post_fields)
{
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
if($referer)
{
curl_setopt($ch, CURLOPT_REFERER, $referer);
}
if($cookie)
{
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
}
$result = curl_exec ($ch);
curl_close ($ch);
return $result;
}
// the url
$url = "http://us.asos.com/Adidas-Honey-Silver-Mid-Sneakers/ysrqb/?iid=2212284";
//the function
echo LoadCURLPage($url);
It's in a cookie. So either visit the page that sets that cookie, or edit your cookiejar file.
Related
I am trying to find where I'll be redirected at. So I tried to functions for this, but none of those are working properly.
the links is here. when you try to enter, you will be redirected:
https://lions-mansion.jp/MA141070/
so I tried use cURL,
function redirect1($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$data = curl_exec($ch);
$data = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
return $data;
}
and also this:
function redirect($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
$location = trim($match[1]);
}
return $result;
}
But I couldn't find the redirected url.
this page does not use a redirect-scheme that libcurl understands (it uses a html <meta http-equiv="REFRESH"-redirect, unsupported by libcurl), so libcurl can neither tell you where it is being redirected, nor can libcurl auto-follow the redirect (because libcurl does not understand it)
you need to parse out the redirect url yourself from the HTML, eg
function redirect1($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$data = curl_exec($ch);
$domd=#DOMDocument::loadHTML($data);
$xp=new DOMXPath($domd);
// <META http-equiv="REFRESH" content="0;URL=http://sumai.tokyu-land.co.jp/branz/roppongi4/?iad=daikyo" />
$location=$xp->query("//meta[#http-equiv='REFRESH']")->item(0)->getAttribute("content");
// 0;URL=http://sumai.tokyu-land.co.jp/branz/roppongi4/?iad=daikyo
$location=substr($location,stripos($location,'URL=')+4);
curl_close($ch);
return $location;
}
var_dump(redirect1('https://lions-mansion.jp/MA141070/'));
output:
C:\projects\misc>php re.php
string(57) "http://sumai.tokyu-land.co.jp/branz/roppongi4/?iad=daikyo"
If keep you CURLOPT_RETURNTRANSFER to true, after executing the CURL command you can use this function call to get the redirect of effective URL:
$finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
I am trying to cURL a webpage which has captcha then I am getting the image and show it to user then submit but I am getting session expired error. not even incorrect captcha error.
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0");
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
preg_match_all('/^Set-Cookie:\s*([^;]*)/mi', $result, $matches);
$cookies = array();
foreach($matches[1] as $item) {
parse_str($item, $cookie);
$cookies = array_merge($cookies, $cookie);
}
Then if code is submitted;
if(isset($_POST['submit'])) {
curl_setopt($ch, CURLOPT_URL, $url1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
foreach($cookies as $key => $val) {
curl_setopt($ch, CURLOPT_COOKIE, $key.'='.$cookie);
}
I am trying almost everything on the internet but I couldn't manage anything.
I'm trying to login into ws1.com using curl, however whenever i put the POST to true I'm getting error: Bad Request, this is the code that i tried:
<?php
$LOGINURL = "https://secure2.ws1.com/login";
$agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4) Gecko/20030624 Netscape/7.1 (ax)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, "_csrf=QTRvNkJKaUoWBFYDBHkLDHFkP0MdMhAPOUZCASR9Xh4ZRDx7BC8LGA%3D%3D&LoginForm%5Bemail%5D=naczzalid%40hotmail.com&LoginForm%5Bpassword%5D=csc1233&LoginForm%5BrememberMe%5D=0&login-button=");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
echo $result = curl_exec ($ch);
curl_close ($ch);
?>
Anyone please can explain to me what is the problem in this so i can learn how to do it?
I tested some things with this form and if the csrf code is incorrect, then it gives a bad request.
The csrf value changes for every request and is tied to your cookies. So you need to fetch the login page first and extract the correct csrf code before submitting.
Working code:
<?php
$LOGINURL = "https://secure2.ws1.com/login";
$agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4) Gecko/20030624 Netscape/7.1 (ax)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
$result = curl_exec ($ch);
// extract csrf token
preg_match('/<input type="hidden" name="_csrf" value="([^"]+)">/i', $result, $csrf);
$csrf = $csrf[1];
$csrf = urlencode($csrf);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, "_csrf={$csrf}&LoginForm%5Bemail%5D=naczzalid%40hotmail.com&LoginForm%5Bpassword%5D=csc1233&LoginForm%5BrememberMe%5D=0&login-button=");
$result = curl_exec($ch);
curl_close ($ch);
var_dump($result);
I'm trying to login to my website using curl all is working fine, the only problem is when I open the file cookie.txt I find it empty this is the code that I tried:
<?
$url = "http://security-dz.com/wp-login.php"; // URL
$POSTFIELDS = 'log=testtest&pwd=test1234';
$agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4)
Gecko/20030624 Netscape/7.1 (ax)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS,$POSTFIELDS);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result = curl_exec($ch);
curl_close($ch);
echo $result;
?>
I created a file called cookie.txt at the same folder, but when I'm runing this code i can access my website normally the only problem is when i open the file cookie.txt it is empty so what i can do about this?
If you want to get full path you can use this way to.
$cookie=dirname(__FILE__)."\\cookie.txt";
so you can just use this way.
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
it work for me
Try this:
<?
$url = "http://snipercoder.com/wp-login.php"; // URL
$POSTFIELDS = 'log=testtest&pwd=test1234';
$agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4)
Gecko/20030624 Netscape/7.1 (ax)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS,$POSTFIELDS);
curl_setopt($ch, CURLOPT_COOKIEFILE, "C:/yourfolderserver/www/cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "C:/yourfolderserver/www/cookie.txt");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result = curl_exec($ch);
curl_close($ch);
echo $result;
?>
CURLOPT_COOKIEFILE/CURLOPT_COOKIEJAR options must be set with absolute path value. "cookie.txt" is a relative path.
If you are in a localhost try this:
<?
$url = "http://security-dz.com/wp-login.php"; // URL
$POSTFIELDS = 'log=testtest&pwd=test1234';
$agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4)
Gecko/20030624 Netscape/7.1 (ax)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS,$POSTFIELDS);
curl_setopt($ch, CURLOPT_COOKIEFILE, "C:/wamp/www/cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "C:/wamp/www/cookie.txt");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result = curl_exec($ch);
curl_close($ch);
echo $result;
?>
I've been playing with this curl facebook login script for a while just trying to get to grips with some of the features in curl, but it seems that I can not get the cookies to register:
php script
function facebookLogin(){
$login_email = 'email';
$login_pass = 'pass';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://www.facebook.com/login.php');
curl_setopt($ch, CURLOPT_POSTFIELDS,'email='.urlencode($login_email).'&pass='.urlencode($login_pass).'&login=Login');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3");
curl_setopt($ch, CURLOPT_REFERER, "http://www.facebook.com");
$page = curl_exec($ch);
echo $page;
}
I have a text file called cookies.txt which is in the same directory as the script, but after running this script nothing is written into the file and therefore no cookies are created, this is a big issue when trying to explore other web pages on the same website as you have to keep logging in.
Where am I going wrong?
Ok it turns out it is registered even if the cookies.txt file is empty but you need to make sure you call this file when you try to explore other parts of the site e.g.
function facebookGoToMessages(){
facebookLogin();
$ch = curl_init ("http://www.facebook.com/messages");
curl_setopt ($ch, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3");
curl_setopt($ch, CURLOPT_REFERER, "http://www.facebook.com");
$page = curl_exec ($ch);
echo $page;
}