Related
I'm new to using cURL and its hard to find good resources for it.
What I'm trying to do is login to a remote site, by having curl do the login form and then send back that it was successful.
The code I have doesn't seem to work and only tries to show the main page of the site.
$username="mylogin#gmail.com";
$password="mypassword";
$url="http://www.myremotesite.com/index.php?page=login";
$cookie="cookie.txt";
$postdata = "email=".$username."&password=".$password;
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6");
curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt ($ch, CURLOPT_REFERER, $url);
curl_setopt ($ch, CURLOPT_POSTFIELDS, $postdata);
curl_setopt ($ch, CURLOPT_POST, 1);
$result = curl_exec ($ch);
echo $result;
curl_close($ch);
What am I doing wrong. After this is working I want to redirect to another page and get content from my site.
I had let this go for a good while but revisited it later. Since this question is viewed regularly. This is eventually what I ended up using that worked for me.
define("DOC_ROOT","/path/to/html");
//username and password of account
$username = trim($values["email"]);
$password = trim($values["password"]);
//set the directory for the cookie using defined document root var
$path = DOC_ROOT."/ctemp";
//build a unique path with every request to store. the info per user with custom func. I used this function to build unique paths based on member ID, that was for my use case. It can be a regular dir.
//$path = build_unique_path($path); // this was for my use case
//login form action url
$url="https://www.example.com/login/action";
$postinfo = "email=".$username."&password=".$password;
$cookie_file_path = $path."/cookie.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
//set the cookie the site has for certain features, this is optional
curl_setopt($ch, CURLOPT_COOKIE, "cookiename=0");
curl_setopt($ch, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $_SERVER['REQUEST_URI']);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
curl_exec($ch);
//page with the content I want to grab
curl_setopt($ch, CURLOPT_URL, "http://www.example.com/page/");
//do stuff with the info with DomDocument() etc
$html = curl_exec($ch);
curl_close($ch);
Update: This code was never meant to be a copy and paste. It was to show how I used it for my specific use case. You should adapt it to your code as needed. Such as directories, vars etc
I had same question and I found this answer on this website.
And I changed it just a little bit (the curl_close at last line)
$username = 'myuser';
$password = 'mypass';
$loginUrl = 'http://www.example.com/login/';
//init curl
$ch = curl_init();
//Set the URL to work with
curl_setopt($ch, CURLOPT_URL, $loginUrl);
// ENABLE HTTP POST
curl_setopt($ch, CURLOPT_POST, 1);
//Set the post parameters
curl_setopt($ch, CURLOPT_POSTFIELDS, 'user='.$username.'&pass='.$password);
//Handle cookies for the login
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
//Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
//not to print out the results of its query.
//Instead, it will return the results as a string return value
//from curl_exec() instead of the usual true/false.
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//execute the request (the login)
$store = curl_exec($ch);
//the login is now done and you can continue to get the
//protected content.
//set the URL to the protected file
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/protected/download.zip');
//execute the request
$content = curl_exec($ch);
curl_close($ch);
//save the data to disk
file_put_contents('~/download.zip', $content);
I think this was what you were looking for.Am I right?
And one useful related question. About how to keep a session alive in cUrl: https://stackoverflow.com/a/13020494/2226796
View the source of the login page. Look for the form HTML tag. Within that tag is something that will look like action= Use that value as $url, not the URL of the form itself.
Also, while you are there, verify the input boxes are named what you have them listed as.
For example, a basic login form will look similar to:
<form method='post' action='postlogin.php'>
Email Address: <input type='text' name='email'>
Password: <input type='password' name='password'>
</form>
Using the above form as an example, change your value of $url to:
$url="http://www.myremotesite.com/postlogin.php";
Verify the values you have listed in $postdata:
$postdata = "email=".$username."&password=".$password;
and it should work just fine.
This is how I solved this in ImpressPages:
//initial request with login data
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/login.php');
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/32.0.1700.107 Chrome/32.0.1700.107 Safari/537.36');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, "username=XXXXX&password=XXXXX");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie-name'); //could be empty, but cause problems on some hosts
curl_setopt($ch, CURLOPT_COOKIEFILE, '/var/www/ip4.x/file/tmp'); //could be empty, but cause problems on some hosts
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
//another request preserving the session
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/profile');
curl_setopt($ch, CURLOPT_POST, false);
curl_setopt($ch, CURLOPT_POSTFIELDS, "");
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
Panama Jack Example not work for me - Give Fatal error: Call to undefined function build_unique_path(). I used this code - (more simple - my opinion) :
// options$login_email = 'alabala#gmail.com';$login_pass = 'alabala4807';$cookie_file_path = "/tmp/cookies.txt";$LOGINURL = "http://alabala.com/index.php?route=account/login"; $agent = "Nokia-Communicator-WWW-Browser/2.0 (Geos 3.0 Nokia-9000i)";// begin script$ch = curl_init();// extra headers$headers[] = "Accept: */*";$headers[] = "Connection: Keep-Alive";// basic curl options for all requestscurl_setopt($ch, CURLOPT_HTTPHEADER, $headers);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path); // set first URLcurl_setopt($ch, CURLOPT_URL, $LOGINURL);// execute session to get cookies and required form inputs$content = curl_exec($ch); // grab the hidden inputs from the form required to login$fields = getFormFields($content);$fields['email'] = $login_email;$fields['password'] = $login_pass;// set postfields using what we extracted from the form$POSTFIELDS = http_build_query($fields); // change URL to login URLcurl_setopt($ch, CURLOPT_URL, $LOGINURL); // set post optionscurl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS); // perform login$result = curl_exec($ch); print $result; function getFormFields($data){ if (preg_match('/()/is', $data, $matches)) { $inputs = getInputs($matches[1]); return $inputs; } else { die('didnt find login form'); }}function getInputs($form){ $inputs = array(); $elements = preg_match_all("/(]+>)/is", $form, $matches); if ($elements > 0) { for($i = 0;$i $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]); if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) { $name = $name[1]; $value = ''; if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) { $value = $value[1]; } $inputs[$name] = $value; } } } return $inputs;}$grab_url='http://grab.url/alabala';//page with the content I want to grabcurl_setopt($ch, CURLOPT_URL, $grab_url);//do stuff with the info with DomDocument() etc$html = curl_exec($ch);curl_close($ch);var_dump($html); die;
Hey I am trying to login to the NJIT site to check if username and password are correct. For some reason I keep getting rejected even if I use correct credentials. Also how do I strip the $result to check if it contains "Fail" which would mean the credentials were wrong. Here is my code.
Main:
<?PHP
session_start();
require_once('functions.php');
//$UCID=$_POST['UCID'];
//$Pass=$_POST['Pass'];
$UCID="jko328";
$Pass="password";
$credentialsNJIT="user=".$UCID."&pass=".$Pass;
$njit_url="https://cp4.njit.edu/cp/home/login";
$njit_result=goCurlNJIT($credentialsNJIT, $njit_url);
echo $result;
?>
Here is the cURL function:
function goCurlNJIT($postdata, $url){
session_start();
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTPS);
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_REFERER, $url);
curl_setopt ($ch, CURLOPT_POSTFIELDS, $postdata);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt ($ch, CURLOPT_POST, true);
$result = curl_exec($ch);
curl_close($ch);
if(strpos($result, "Failed") === false){
$response = "NJIT did not like credentials";
}
else{
$response = "NJIT liked your credentials";
}
echo $response;
}
Actually when we load a page it saves the cookie and send it . So to sign in you first need to acess the page without credential and save the cookies . In next request you need to send the cookies . To avoid bot script login usually websites have dynamic hidden fields and other securities .. in this case you cant log on .
I'm updating the function too much and making it way more flexible. --You can update it further more if you want.
First and most importantly, you need to create a text file named cookie.txt in the directory where your scrapping file is.
function goCurlNJIT($header = array(), $url, $post = false)
{
$cookie = "cookie.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate,sdch');
if (isset($header) && !empty($header))
{
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
}
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 200);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36");
curl_setopt($ch, CURLOPT_COOKIEJAR, realpath($cookie));
curl_setopt($ch, CURLOPT_COOKIEFILE, realpath($cookie));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_REFERER, $url);
//if it's a POST request instead of GET
if (isset($post) && !empty($post) && $post)
{
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
} //endif
$data = curl_exec($ch);
curl_close($ch);
if($info['http_code'] == 200){
return ($data); //this will return page on successful, false otherwise
}else{
return false;
}
}
if you'd paid a close look to Requested Headers, you can notice that there is one unusual header there, that is Upgrade-Insecure-Requests:1(I personally haven't seen this before, so it's wise to send this along with request).
Next the request that you're posting is not like as it should be, you're missing stuff.
$UCID="jko328";
$Pass="password";
$credentialsNJIT="user=".$UCID."&pass=".$Pass; // where is uuid?
it should be something like this. You're skipping uuid from post string.
pass=password&user=jko328&uuid=0xACA021
so putting altogether,
$post['user'] = "jko328";
$post['pass'] = "password";
$post['uuid'] = '0xACA021';
$urlToPost = "https://cp4.njit.edu/cp/home/login";
$header['Upgrade-Insecure-Requests'] = 1;
//Now make call to function, and it'll work fine.
echo goCurlNJIT($header, $urlToPost, http_build_query($post));
and this will work fine. Make sure you've created cookie.txt file in the directory where your this scripts is.
Here is the login page:
http://www.ifreewind.net/iFreeWind.aspx
I need content of this page which need login first:
http://www.ifreewind.net/Users/Search.aspx?R=1&P=00102&i=2
On case you need, my post data content is here:
$data = "__VIEWSTATE=%2FwEPDwULLTE3NjQ3MDc3NDQPZBYCAgMPZBYCAgEPFgIeB1Zpc2libGVoZBgBBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAQUSUmVtZW1iZXJNZUNoZWNrQm94r57YdIUtbSps%2FGLW1PUtjxcILdE%3D&__EVENTVALIDATION=%2FwEWBQLKivfjBgLw2N3fDgLC9%2FChAwLxuKbKAgL%2BjNCfDwU6DJjH4Q2acTlGVXmDrSv2Nn4G&UserNameTextBox=myemailaddress%40gmail.com&PasswordTextBox=mypassword&LoginButton=%E7%99%BB%E9%99%86";
curl_setopt($datapost, CURLOPT_POSTFIELDS, $data);
My code is here, but not work:
$site = "http://www.ifreewind.net/Users/Search.aspx?R=1&P=00102&i=2";
$ch = curl_init();
$headers = array('Host:www.ifreewind.net',
'User-Agent:Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1',
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language:zh-cn,zh;q=0.5',
'Accept-Encoding:gzip, deflate',
'Accept-Charset:GB2312,utf-8;q=0.7,*;q=0.7',
'Connection:keep-alive',
'Referer:http://www.ifreewind.net/Users/Search.aspx?R=1&P=00102&i=2',
'Cookie:Hm_lvt_7fa3bcf45d96b91c6a87d1433c045849=1327324205986; VisitUrl_-1=ok; ASP.NET_SessionId=4smyrujt3m3cnu2sxbh55z55; Hm_lpvt_7fa3bcf45d96b91c6a87d1433c045849=1327324205986; MyId=7087; iTechAuthen=FDDE38649ADA11A5C73923D4D9437097226833721D48739F39720A91C49A95DCC345C8E9DE670B71D837808619CBF23213C6252AE82112A06CE37271D7D1A3466979E2B264845C8C75B7E1791DDB49C910178DA0BC6D5BD4D6AC536842279D41FA2866DA5B4F278BAB6443D2F370B96F1E5723C685AA015BE611317F40F66965DD2CF0FD5E7C1DB794D7172CC784EF1C2B773CFCAE05772EE611B6F82EF6894F8B32EA932D01F81F70F73C18F1CB8C6F3DDC5E44',
'Cache-Control:max-age=0'
);
curl_setopt($ch, CURLOPT_URL, $site);
curl_setopt($ch, CURLOPT_TIMEOUT, 6000);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60);
curl_setopt($ch, CURLOPT_HEADER, FALSE);
//curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_POST, TRUE);
//curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);//add
ob_start();
return curl_exec($ch);
ob_end_clean();
curl_close($ch);
unset($ch);
Actually my code will meet strange results in web browser like a lot of "����ܰ��", I tried to switch language charset in firefox, but utf-8 and others just doest't show well, so please help.
You can use a header parser to get back the PHPSESSID. This makes for cleaner future curl requests. Heres an example.
This was part of some object oriented programming where I could CURL into different parts of the site, and send my sessionID to track the session.
function login($email,$password){
$login=false;
$post='member[email]='.urlencode($email).'&member[password]='.urlencode($password);
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, 'http://signon.page/login.php');
curl_setopt($ch,CURLOPT_USERAGENT,$this->useragent);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
//POST
curl_setopt($ch,CURLOPT_POST,4);
curl_setopt($ch,CURLOPT_POSTFIELDS,$post);
curl_setopt($ch, CURLOPT_HEADER, 1);
$output=curl_exec($ch);
//get cookies in map array
$rows=explode("\n",$output);
foreach($rows as $num=>$row){
$trim=substr($row,0,5);
$trim2=substr($row,0,29);
if ($trim2=="Location: /public/member/home")$login=true;
/* if the site sends back a header redirect my login worked.*/
if ($trim=="Set-C") {$rownum=$num;}}
$cookies=$rows[$rownum];
$cookies=substr($cookies,12);/*RAW COOKIE*/
$cookies=explode("; ",$cookies);
$arr=array();
foreach ($cookies as $n=>$v){
$s=explode("=",$v);
$arr[$s[0]]=$s[1];}
$cookies=$arr;
$_SESSION['SN']=$cookies['PHPSESSID'];
curl_close($ch);
$_SESSION['auth']=$login;
return $login;}//end isLoggedIn
I am trying to make a API call to wikipedia through: http://en.wikipedia.org/w/api.php?action=parse&page=Petunia&format=xml, but the xml is full with html and css tags.
Is there a way to fetch only plain text without tags? Thanks!
*Edit 1:
$json = json_decode(file_get_contents('http://en.wikipedia.org/w/api.php?action=parse&page=Petunia&format=json'));
$txt = strip_tags($json->text);
var_dump($json);
Null displayed.
Question was partially answered here
$url = 'http://en.wikipedia.org/w/api.php?action=parse&page=Petunia&format=json&prop=text';
$ch = curl_init($url);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_USERAGENT, "TestScript"); // required by wikipedia.org server
$c = curl_exec($ch);
$json = json_decode($c);
var_dump(strip_tags($json->{'parse'}->{'text'}->{'*'}))
I was not able to use file_get_contents but it works fine with cURL.
it is possible to fetch info or description from wikipedia by using xml.
$url = "http://en.wikipedia.org/w/api.php?action=opensearch&search=".$term."&format=xml&limit=1";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HTTPGET, TRUE);
curl_setopt($ch, CURLOPT_POST, FALSE);
curl_setopt($ch, CURLOPT_HEADER, false); // Include head as needed
curl_setopt($ch, CURLOPT_NOBODY, FALSE); // Return body
curl_setopt($ch, CURLOPT_VERBOSE, FALSE); // Minimize logs
curl_setopt($ch, CURLOPT_REFERER, ""); // Referer value
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); // No certificate
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); // Follow redirects
curl_setopt($ch, CURLOPT_MAXREDIRS, 4); // Limit redirections to four
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // Return in string
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; he; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"); // Webbot name
$page = curl_exec($ch);
$xml = simplexml_load_string($page);
if((string)$xml->Section->Item->Description) {
print_r(array((string)$xml->Section->Item->Text,
(string)$xml->Section->Item->Description,
(string)$xml->Section->Item->Url));
} else {
echo "sorry";
}
But curl must be install on server... have a nice day...
I'm new to using cURL and its hard to find good resources for it.
What I'm trying to do is login to a remote site, by having curl do the login form and then send back that it was successful.
The code I have doesn't seem to work and only tries to show the main page of the site.
$username="mylogin#gmail.com";
$password="mypassword";
$url="http://www.myremotesite.com/index.php?page=login";
$cookie="cookie.txt";
$postdata = "email=".$username."&password=".$password;
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6");
curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt ($ch, CURLOPT_REFERER, $url);
curl_setopt ($ch, CURLOPT_POSTFIELDS, $postdata);
curl_setopt ($ch, CURLOPT_POST, 1);
$result = curl_exec ($ch);
echo $result;
curl_close($ch);
What am I doing wrong. After this is working I want to redirect to another page and get content from my site.
I had let this go for a good while but revisited it later. Since this question is viewed regularly. This is eventually what I ended up using that worked for me.
define("DOC_ROOT","/path/to/html");
//username and password of account
$username = trim($values["email"]);
$password = trim($values["password"]);
//set the directory for the cookie using defined document root var
$path = DOC_ROOT."/ctemp";
//build a unique path with every request to store. the info per user with custom func. I used this function to build unique paths based on member ID, that was for my use case. It can be a regular dir.
//$path = build_unique_path($path); // this was for my use case
//login form action url
$url="https://www.example.com/login/action";
$postinfo = "email=".$username."&password=".$password;
$cookie_file_path = $path."/cookie.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
//set the cookie the site has for certain features, this is optional
curl_setopt($ch, CURLOPT_COOKIE, "cookiename=0");
curl_setopt($ch, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $_SERVER['REQUEST_URI']);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
curl_exec($ch);
//page with the content I want to grab
curl_setopt($ch, CURLOPT_URL, "http://www.example.com/page/");
//do stuff with the info with DomDocument() etc
$html = curl_exec($ch);
curl_close($ch);
Update: This code was never meant to be a copy and paste. It was to show how I used it for my specific use case. You should adapt it to your code as needed. Such as directories, vars etc
I had same question and I found this answer on this website.
And I changed it just a little bit (the curl_close at last line)
$username = 'myuser';
$password = 'mypass';
$loginUrl = 'http://www.example.com/login/';
//init curl
$ch = curl_init();
//Set the URL to work with
curl_setopt($ch, CURLOPT_URL, $loginUrl);
// ENABLE HTTP POST
curl_setopt($ch, CURLOPT_POST, 1);
//Set the post parameters
curl_setopt($ch, CURLOPT_POSTFIELDS, 'user='.$username.'&pass='.$password);
//Handle cookies for the login
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
//Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
//not to print out the results of its query.
//Instead, it will return the results as a string return value
//from curl_exec() instead of the usual true/false.
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//execute the request (the login)
$store = curl_exec($ch);
//the login is now done and you can continue to get the
//protected content.
//set the URL to the protected file
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/protected/download.zip');
//execute the request
$content = curl_exec($ch);
curl_close($ch);
//save the data to disk
file_put_contents('~/download.zip', $content);
I think this was what you were looking for.Am I right?
And one useful related question. About how to keep a session alive in cUrl: https://stackoverflow.com/a/13020494/2226796
View the source of the login page. Look for the form HTML tag. Within that tag is something that will look like action= Use that value as $url, not the URL of the form itself.
Also, while you are there, verify the input boxes are named what you have them listed as.
For example, a basic login form will look similar to:
<form method='post' action='postlogin.php'>
Email Address: <input type='text' name='email'>
Password: <input type='password' name='password'>
</form>
Using the above form as an example, change your value of $url to:
$url="http://www.myremotesite.com/postlogin.php";
Verify the values you have listed in $postdata:
$postdata = "email=".$username."&password=".$password;
and it should work just fine.
This is how I solved this in ImpressPages:
//initial request with login data
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/login.php');
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/32.0.1700.107 Chrome/32.0.1700.107 Safari/537.36');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, "username=XXXXX&password=XXXXX");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie-name'); //could be empty, but cause problems on some hosts
curl_setopt($ch, CURLOPT_COOKIEFILE, '/var/www/ip4.x/file/tmp'); //could be empty, but cause problems on some hosts
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
//another request preserving the session
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/profile');
curl_setopt($ch, CURLOPT_POST, false);
curl_setopt($ch, CURLOPT_POSTFIELDS, "");
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
Panama Jack Example not work for me - Give Fatal error: Call to undefined function build_unique_path(). I used this code - (more simple - my opinion) :
// options$login_email = 'alabala#gmail.com';$login_pass = 'alabala4807';$cookie_file_path = "/tmp/cookies.txt";$LOGINURL = "http://alabala.com/index.php?route=account/login"; $agent = "Nokia-Communicator-WWW-Browser/2.0 (Geos 3.0 Nokia-9000i)";// begin script$ch = curl_init();// extra headers$headers[] = "Accept: */*";$headers[] = "Connection: Keep-Alive";// basic curl options for all requestscurl_setopt($ch, CURLOPT_HTTPHEADER, $headers);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path); // set first URLcurl_setopt($ch, CURLOPT_URL, $LOGINURL);// execute session to get cookies and required form inputs$content = curl_exec($ch); // grab the hidden inputs from the form required to login$fields = getFormFields($content);$fields['email'] = $login_email;$fields['password'] = $login_pass;// set postfields using what we extracted from the form$POSTFIELDS = http_build_query($fields); // change URL to login URLcurl_setopt($ch, CURLOPT_URL, $LOGINURL); // set post optionscurl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS); // perform login$result = curl_exec($ch); print $result; function getFormFields($data){ if (preg_match('/()/is', $data, $matches)) { $inputs = getInputs($matches[1]); return $inputs; } else { die('didnt find login form'); }}function getInputs($form){ $inputs = array(); $elements = preg_match_all("/(]+>)/is", $form, $matches); if ($elements > 0) { for($i = 0;$i $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]); if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) { $name = $name[1]; $value = ''; if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) { $value = $value[1]; } $inputs[$name] = $value; } } } return $inputs;}$grab_url='http://grab.url/alabala';//page with the content I want to grabcurl_setopt($ch, CURLOPT_URL, $grab_url);//do stuff with the info with DomDocument() etc$html = curl_exec($ch);curl_close($ch);var_dump($html); die;