CURL using GET method instead of POST - php

I have a data submission system that post values via curl to a url like this:
$URL="http://some_url/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$URL);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt ($ch, CURLOPT_HTTPHEADER, array ('Content-Type:application/x-www-form-urlencoded;application/xml'));
curl_setopt($ch, CURLOPT_POSTFIELDS, "First Name=".$firstname."&Last Name=".$lastname."&Daytime Phone=".$dayphone."&Evening Phone=".$evephone."&DeliveryCode=".$deliveryCode."&username=".$username."&password=".$password);
echo curl_exec ($ch);
curl_close ($ch);
When i inspect the network console, it shows that a GET request was performed instead of POST as desired. What can be the issue.

I don't know why your code does not work, but I give you a code that does work; It may not be the solution, but it will help you to understand cUrl;
Adapt the code as you need it;
<?php
class CurlTool {
public static $userAgents = array(
'FireFox3' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; rv:1.9) Gecko/2008052906 Firefox/3.0',
'GoogleBot' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'IE7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Netscape' => 'Mozilla/4.8 [en] (Windows NT 6.0; U)',
'Opera' => 'Opera/9.25 (Windows NT 6.0; U; en)'
);
public static $options = array(
CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
CURLOPT_AUTOREFERER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FRESH_CONNECT => true,
CURLOPT_COOKIEJAR => "cookies.txt",
CURLOPT_COOKIEFILE => "cookies.txt",
CURLOPT_SSL_VERIFYPEER => false,
//CURLOPT_COOKIESESSION => false,
);
private static $proxyServers = array();
private static $proxyCount = 0;
private static $currentProxyIndex = 0;
public static $getinfo;
public static function addProxyServer($url) {
self::$proxyServers[] = $url;
++self::$proxyCount;
}
public function curl_redirect_exec($ch, &$redirects, $curlopt_header = false) {
/*
curl_setopt($ch, CURLOPT_USERAGENT ,'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)');
curl_setopt($ch, CURLOPT_AUTOREFERER ,true);
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE,"cookies.txt");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER ,false);
*/
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
echo $redirects." ";
if ($redirects>0) {
echo "rr: ".$url;
exit;
}
$data = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
list($header) = explode("\r\n\r\n", $data, 2);
$matches = array();
//this part has been changes from the original
preg_match("/(Location:|URI:)[^(\n)]*/", $header, $matches);
$url = trim(str_replace($matches[1],"",$matches[0]));
//end changes
$url_parsed = parse_url($url);
if (isset($url_parsed)) {
$ok=curl_setopt($ch, CURLOPT_URL, $url);
if (!$ok) {
echo "doesn't work";
exit;
}
print_r($url_parsed);
exit;
$redirects++;
if($redirects>10)die('live is hard');
return curl_redirect_exec($ch, $redirects,$curlopt_header);
}
}
if ($curlopt_header)
return $data;
else {
list(,$body) = explode("\r\n\r\n", $data, 2);
return $body;
}
}
public static function fetchContent($url, $fields = null, $verbose = false) {
//print '*'.$fields.'*';
//
if (($curl = curl_init($url)) == false) {
throw new Exception("curl_init error for url $url.");
}
if (self::$proxyCount > 0) {
$proxy = self::$proxyServers[self::$currentProxyIndex++ % self::$proxyCount];
curl_setopt($curl, CURLOPT_PROXY, $proxy);
if ($verbose === true) {
echo "Reading $url [Proxy: $proxy] ... ";
}
} else if ($verbose === true) {
echo "Reading $url ... ";
}
//$verbose=TRUE;
//print_r($fields);
// debug_print_backtrace();
//url-ify the data for the POST
$fields_string = '';
if (is_array($fields))
foreach ($fields as $key => $value) {
if (empty($key))
continue;
$fields_string .= $key . '=' . urlencode($value) . '&';
if ($verbose === true) {
echo $key . ": " . $value;
}
}
rtrim($fields_string, '&');
if (count($fields) > 0) {
curl_setopt($curl, CURLOPT_POST, count($fields));
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
}
if ($verbose === true) {
echo "Fields string $fields_string ... ";
}
//print_r(self::$options);
//echo phpinfo();
curl_setopt_array($curl, self::$options);
//curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
/*foreach (self::$options as $option => $value) {
if (!curl_setopt($curl, $option, $value)) {
//die('cannot set'.$option.':'.$value);
//return false;
}
}
*/
$html = curl_exec($curl);
$redirects = 0 ;
//echo $html;
//exit;
//$html = self::curl_redirect_exec($curl, &$redirects);
//die(2);
self::$getinfo = curl_getinfo($curl);
if ($html === false) {
throw new Exception("curl_exec error for url $url " . curl_error($curl));
}
curl_close($curl);
if ($verbose === true) {
echo "Done.\n";
}
$html = preg_replace('#\n+#', ' ', $html);
$html = preg_replace('#\s+#', ' ', $html);
return $html;
}
public static function downloadFile($url, $fileName, $fields = null, $verbose = false) {
if (($curl = curl_init($url)) == false) {
throw new Exception("curl_init error for url $url.");
}
if (self::$proxyCount > 0) {
$proxy = self::$proxyServers[self::$currentProxyIndex++ % self::$proxyCount];
curl_setopt($curl, CURLOPT_PROXY, $proxy);
if ($verbose === true) {
echo "Downloading $url [Proxy: $proxy] ... ";
}
} else if ($verbose === true) {
echo "Downloading $url ... ";
}
//url-ify the data for the POST
$fields_string = '';
if (is_array($fields))
foreach ($fields as $key => $value) {
if (empty($key))
continue;
$fields_string .= $key . '=' . urlencode($value) . '&';
}
rtrim($fields_string, '&');
curl_setopt($curl, CURLOPT_POST, count($fields));
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt_array($curl, self::$options);
if (is_file($fileName)) {
$contents = file_get_contents($fileName, false, null, -1, 3 * 1024);
$pattern = "__VIEWSTATE";
if (strpos($contents, $pattern) === false) {
return $fileName;
}
}
// if (is_file($fileName)) {
// // make a HEAD request and try to get the file size HEAD
// // if they differ then redownload the file, otherwise no need
// curl_setopt($curl, CURLOPT_NOBODY, true);
// curl_setopt($curl, CURLOPT_HEADER, true);
// $ret = curl_exec($curl);
// //echo $fileName;
// $size = curl_getinfo($curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD);
// if ($size == filesize($fileName)) {
// return $fileName;
// } else {
// unlink($fileName);
// return self::downloadFile($url, $fileName, $fields, $verbose);
// }
// }
//echo getcwd();
if (substr($fileName, -1) == '/') {
$targetDir = $fileName;
$fileName = tempnam(sys_get_temp_dir(), 'c_');
}
if (($fp = fopen('../files/'.$fileName, "w")) === false) {
throw new Exception("fopen error for filename $fileName");
}
curl_setopt($curl, CURLOPT_FILE, $fp);
curl_setopt($curl, CURLOPT_BINARYTRANSFER, true);
$ret = curl_exec($curl);
self::$getinfo = curl_getinfo($curl);
if ($ret === false) {
fclose($fp);
unlink('../files/'.$fileName);
echo curl_error($curl).'<br/>';
echo curl_errno($curl).'<br/>';
if(curl_errno($curl)!=6)
{
throw new Exception("curl_exec error for url $url.");
}
} elseif (isset($targetDir)) {
$eurl = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);
preg_match('#^.*/(.+)$#', $eurl, $match);
fclose($fp);
rename($fileName, "$targetDir{$match[1]}");
$fileName = "$targetDir{$match[1]}";
} else {
fclose($fp);
}
curl_close($curl);
if ($verbose === true) {
echo "Done.\n";
}
return $fileName;
}
}

Related

optimize foreach loop php

I've got double foreach loop. Script takes urls from one file and tries to find it in html code of pages from another file. Of course that reading so many pages is pretty hard for server so I want to optimize script but how can I do it?
Here is the code:
<?php
$sites_raw = file('https://earnmoneysafe.com/script/sites.txt');
$sites = array_map('trim', $sites_raw);
$urls_raw = file('https://earnmoneysafe.com/script/4toiskatj.txt');
$urls = array_map('trim', $urls_raw);
function file_get_contents_curl($url) {
$ch = curl_init();
$config['useragent'] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0';
curl_setopt($curl, CURLOPT_USERAGENT, $config['useragent']);
curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
foreach ($sites as $site){
$homepage = file_get_contents_curl($site);
foreach ($urls as $url){
$needle = $url;
if (strpos($homepage, $needle) !== false) {
echo 'true';
}
}
}
?>
Use curl_multi_exec() to fetch all the URLs in parallel.
$urls = file('https://earnmoneysafe.com/script/4toiskatj.txt', FILE_IGNORE_NEW_LINES);
$sites = file('https://earnmoneysafe.com/script/sites.txt', FILE_IGNORE_NEW_LINES);
foreach ($sites as $site) {
$curl_handles[$site] = get_curl($site);
}
$mh = curl_multi_init();
foreach ($curl_handles as $ch) {
curl_multi_add_handle($mh, $ch);
}
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
foreach ($curl_handles as $site => $ch) {
$homepage = curl_multi_getcontent($ch);
foreach ($urls as $needle) {
if (strpos($homepage, $needle) !== false) {
echo 'true';
}
}
curl_multi_remove_handle($mh, $ch);
}
curl_multi_close($mh);
function get_curl($url) {
$ch = curl_init();
$config['useragent'] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0';
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); // edited
curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return $ch;
}
I think this, This code is cleaner
<?php
const SITES_URL = 'https://earnmoneysafe.com/script/sites.txt';
const URLS_URL = 'https://earnmoneysafe.com/script/4toiskatj.txt';
function readFileLines($url) {
$file_contents = file_get_contents($url);
$lines = explode("\n", $file_contents);
$filtered_lines = array_filter($lines, function($line) {
return !empty(trim($line));
});
return $filtered_lines;
}
function checkSiteUrls($site, $urls) {
$homepage = file_get_contents($site);
foreach ($urls as $url) {
if (strpos($homepage, $url) !== false) {
echo 'true';
}
}
}
$sites = readFileLines(SITES_URL);
$urls = readFileLines(URLS_URL);
foreach ($sites as $site) {
checkSiteUrls($site, $urls);
}
?>

How can I create Google Alert in php

I have spend much of time on it, but did not found any working solution ...
I have tried the following code .. but always else case is running "didnt find login form1"
I have tried another coders11 inplemented api but it was also deprecated...
I found many other solutions but not in php ... I am looking for solution in php...
class googleAlerts{
public function createAlert($alert){
$USERNAME = 'XXXXXX#gmail.com';
$PASSWORD = 'YYYYYY';
$COOKIEFILE = 'cookies.txt';
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_URL,
'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
$data = curl_exec($ch);
$formFields = $this->getFormFields($data);
$formFields['Email'] = $USERNAME;
$formFields['Passwd'] = $PASSWORD;
unset($formFields['PersistentCookie']);
$post_string = '';
foreach($formFields as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
if (strpos($result, '<title>') === false) {
return false;
} else {
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_POSTFIELDS, null);
$result = curl_exec($ch);
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
curl_setopt($ch, CURLOPT_POST, 0);
$result = curl_exec($ch);
//var_dump($result);
$result = $this->getFormFieldsCreate($result);
$result['q'] = $alert;
$result['t'] = '7';
$result['f'] = '1';
$result['l'] = '0';
$result['e'] = 'feed';
unset($result['PersistentCookie']);
$post_string = '';
foreach($result as $key => $value) {
$post_string .= $key . '=' . urlencode($value) . '&';
}
$post_string = substr($post_string, 0, -1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$result = curl_exec($ch);
curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
$result = curl_exec($ch);
if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
} else {
return false;
}
}
}
private function getFormFields($data)
{
if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
$inputs = $this->getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form');
}
}
private function getFormFieldsCreate($data)
{
if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
$inputs = $this->getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form1');
}
}
private function getInputs($form)
{
$inputs = array();
$elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
if ($elements > 0) {
for($i = 0; $i < $elements; $i++) {
$el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
$name = $name[1];
$value = '';
if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
$value = $value[1];
}
$inputs[$name] = $value;
}
}
}
return $inputs;
}
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');```
You can't login into google alerts with password and email anymore, you would have to pre-create cookies by login into google alerts and copying them out of the dev console and then passing them as argument when doing a curl request. Check out my google alerts api i have written in php. Maybe that helps you out https://github.com/Trivo25/google-alerts-api-php

How can I scrape LinkedIn company pages with cURL and PHP?

I want to scrape some LinkedIn company pages with cURL and PHP with login Credentials. I tried this code. But I got error like
Unauthorized
You must be authenticated to access this page.
Before scraping the company page I have to sign in at LinkedIn with a personal account via cURL, but it doesn't seems to work.
Instead of using simple_html_dom we used above fetch_value.
function fetch_value($str, $find_start = '', $find_end = '') {
if ($find_start == '') {
return '';
}
$start = strpos($str, $find_start);
if ($start === false) {
return '';
}
$length = strlen($find_start);
$substr = substr($str, $start + $length);
if ($find_end == '') {
return $substr;
}
$end = strpos($substr, $find_end);
if ($end === false) {
return $substr;
}
return substr($substr, 0, $end);
}
$linkedin_login_page = "https://www.linkedin.com/uas/login";
$linkedin_ref = "https://www.linkedin.com";
$username = 'username';
$password = 'password';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $linkedin_login_page);
curl_setopt($ch, CURLOPT_REFERER, $linkedin_ref);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7)');
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
$login_content = curl_exec($ch);
if (curl_error($ch)) {
echo 'error:' . curl_error($ch);
}
$var = array(
'isJsEnabled' => 'false',
'source_app' => '',
'clickedSuggestion' => 'false',
'session_key' => trim($username),
'session_password' => trim($password),
'signin' => 'Sign In',
'session_redirect' => '',
'trk' => '',
'fromEmail' => ''
);
$var['loginCsrfParam'] = fetch_value($login_content, 'type="hidden" name="loginCsrfParam" value="', '"');
$var['csrfToken'] = fetch_value($login_content, 'type="hidden" name="csrfToken" value="', '"');
$var['sourceAlias'] = fetch_value($login_content, 'input type="hidden" name="sourceAlias" value="', '"');
$post_array = array();
foreach ($var as $key => $value) {
$post_array[] = urlencode($key) . '=' . urlencode($value);
}
$post_string = implode('&', $post_array);
curl_setopt($ch, CURLOPT_URL, "https://www.linkedin.com/uas/login-submit");
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
$store = curl_exec($ch);
if (stripos($store, "session_password-login-error") !== false) {
$err = trim(strip_tags(fetch_value($store, '<span class="error" id="session_password-login-error">', '</span>')));
echo "Login error : ".$err;
} elseif (stripos($store, 'profile-nav-item') !== false) {
curl_setopt($ch, CURLOPT_URL, 'https://www.linkedin.com/company-beta/10667/?pathWildcard=10667');
curl_setopt($ch, CURLOPT_POST, false);
curl_setopt($ch, CURLOPT_POSTFIELDS, "");
$content = curl_exec($ch);
curl_close($ch);
echo $content;
} else {
echo "unknown error";
}
Any suggestion please help?
Thanks!

Remove the line that found in array while processes a code PHP

i have the following code which getting some links from remote.
Now the code while processesing and found the hash [if($done==true)] keep sending request to the host even if the hash have been found in the link.
i need the code stop sending requests to the host that we found its hash, and keep working on the other hosts, so it will be exclude all founded hosts from the the second foreach [foreach (file("hosts.txt")] and not send more request to them.
please if any one can help to fix my code .
sorry about bad english.
$MAXPROCESS=50;
$execute=0;
$Arr = array();
foreach (file("hashes.txt") as $hashkey => $hash)
{
$hash = trim($hash);
$pid = pcntl_fork();
$execute++;
if ($execute >= $MAXPROCESS)
{
while (pcntl_waitpid(0, $status) != -1)
{
$status = pcntl_wexitstatus($status);
$execute =0;
//sleep(1);
flush();
//echo " [$ipkey] Child $status completed\n";
}
}
if (!$pid)
{
foreach (file("hosts.txt") as $hostkey => $hosts)
{
$host = trim($hosts);
if(!in_array($host,$Arr))
{
$done = CHECK_URL($host.$hash.'.xml');
if($done==true)
{
$Arr[] = $host;
echo "\n\r\n\r".$host." : Found [$hashkey] ------------\n\r\n\r";
}else{
echo $host." : error [$hashkey]\n";
}
}else{
return false;
}
flush();
}
exit;
}
}
function CHECK_URL($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 5 );
$result = curl_exec($ch);
$response = curl_getinfo($ch);
//if($response['http_code']=="200" || preg_match('/xml/i', $result))
if($response['http_code']=="200")
{
file_put_contents('hash.found.txt', $url."\n", FILE_APPEND);
return true;
}
curl_close($ch); // Close the curl stream
}
.............................

Curl, Redirection not loading actual webpage

There has to be some form of redirect that is happening through java.
If you load this webpage https://btc-e.com/index.php
you will not actually get the webpage if you use curl. you get just a bunch of java. How do i go about getting to the actual HTML so i can start a login process.
I know this website provides an API, but i need a CURL login method, that uses the website and not the API.
here is all the code which i am using
<?php
$curl = new Curl();
$curl->setSsl();
$curl->setCookieFile('whatever_cookie_file.cook');
$page = $curl->get("https://btc-e.com/index.php");
echo $page;
class Curl {
public $curl;
public $manual_follow;
public $redirect_url;
public $cookiefile = null;
public $headers = array();
function Curl($proxy=false) {
$this->curl = curl_init();
$this->headers[] = "Accept: */*;q=0.5, text/javascript, application/javascript, application/ecmascript, application/x-ecmascript";
$this->headers[] = "Cache-Control: max-age=0";
$this->headers[] = "Connection: keep-alive";
$this->headers[] = "Keep-Alive: 300";
$this->headers[] = "Accept-Charset: utf-8;ISO-8859-1;iso-8859-2;q=0.7,*;q=0.7";
$this->headers[] = "Accept-Language: en-us,en;q=0.5";
$this->headers[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14 (.NET CLR 3.5.30729)');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($this->curl, CURLOPT_VERBOSE, false);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($this->curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
if($proxy != false){
curl_setopt($this->curl, CURLOPT_PROXY,$proxy);
}// end if proxy != false
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')){
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
$this->manual_follow = true;
}
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_TIMEOUT, 30);
$this->setRedirect();
}
function addHeader($header){
$this->headers[] = $header;
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function header($val){
curl_setopt($this->curl, CURLOPT_HEADER, $val);
}
function noAjax(){
foreach($this->headers as $key => $val){
if ($val == "X-Requested-With: XMLHttpRequest"){
unset($this->headers[$key]);
}
}
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setAjax(){
$this->headers[] = "X-Requested-With: XMLHttpRequest";
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setSsl($username = null, $password = null){
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($this->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
if ($username && $password){
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
}
function setBasicAuth($username,$password){
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
function setCookieFile($file){
if (file_exists($file)) {
} else {
$handle = fopen($file, 'w+') or print('The cookie file could not be opened. Make sure this directory has the correct permissions');
fclose($handle);
}
curl_setopt($this->curl, CURLOPT_COOKIESESSION, true);
curl_setopt($this->curl, CURLOPT_COOKIEJAR, $file);
curl_setopt($this->curl, CURLOPT_COOKIEFILE, $file);
$this->cookiefile = $file;
}
function getCookies(){
$contents = file_get_contents($this->cookiefile);
$cookies = array();
if ($contents){
$lines = explode("\n",$contents);
if (count($lines)){
foreach($lines as $key=>$val){
$tmp = explode("\t",$val);
if (count($tmp)>3){
$tmp[count($tmp)-1] = str_replace("\n","",$tmp[count($tmp)-1]);
$tmp[count($tmp)-1] = str_replace("\r","",$tmp[count($tmp)-1]);
$cookies[$tmp[count($tmp)-2]]=$tmp[count($tmp)-1];
}
}
}
}
return $cookies;
}
function setDataMode($val){
curl_setopt($this->curl, CURLOPT_BINARYTRANSFER, $val);
}
function close() {
curl_close($this->curl);
}
function getInfo(){
return curl_getinfo($this->curl);
}
function getInstance() {
static $instance;
if (!isset($instance)) {
$curl = new Curl;
$instance = array($curl);
}
return $instance[0];
}
function setTimeout($connect, $transfer) {
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $connect);
curl_setopt($this->curl, CURLOPT_TIMEOUT, $transfer);
}
function getError() {
return curl_errno($this->curl) ? curl_error($this->curl) : false;
}
function disableRedirect() {
$this->setRedirect(false);
}
function setRedirect($enable = true) {
if ($enable) {
$this->manual_follow = !curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, false);
$this->manual_follow = false;
}
}
function getHttpCode() {
return curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
}
function makeQuery($data) {
if (is_array($data)) {
$fields = array();
foreach ($data as $key => $value) {
$fields[] = $key . '=' . urlencode($value);
}
$fields = implode('&', $fields);
} else {
$fields = $data;
}
return $fields;
}
// FOLLOWLOCATION manually if we need to
function maybeFollow($page) {
if (strpos($page, "\r\n\r\n") !== false) {
list($headers, $page) = explode("\r\n\r\n", $page, 2);
}
$code = $this->getHttpCode();
if ($code > 300 && $code < 310) {
$info = $this->getInfo();
preg_match("#Location: ?(.*)#i", $headers, $match);
$this->redirect_url = trim($match[1]);
if (substr_count($this->redirect_url,"http://") == 0 && isset($info['url']) && substr_count($info['url'],"http://")){
$url_parts = parse_url($info['url']);
if (isset($url_parts['host']) && $url_parts['host']){
$this->redirect_url = "http://".$url_parts['host'].$this->redirect_url;
}
}
if ($this->manual_follow) {
return $this->get($this->redirect_url);
}
} else {
$this->redirect_url = '';
}
return $page;
}
function plainPost($url,$data){
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $data);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function post($url, $data) {
$fields = $this->makeQuery($data);
//var_dump($fields);
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function get($url, $data = null) {
curl_setopt($this->curl, CURLOPT_FRESH_CONNECT, false);
if (!is_null($data)) {
$fields = $this->makeQuery($data);
$url .= '?' . $fields;
}
curl_setopt($this->curl, CURLOPT_URL, $url);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
return $this->maybeFollow($page);
}
}
?>
The answer to this question was not specifically with curl.
Due to very simple cookie verification of the website this is why i was unable to load the initial webpage.
To solve this problem simply parse out all needed values from the initially loaded webpage.
Once you have all needed values just simply write a cookie with everything included. After The cookie is passed to server you are now allowed to see the content of the webpage.
To solve this problem further and to do more advanced java script manipulation a system such as phantomjs with casperjs and or using a solution such as Selenium with PHP_unit headless mode.
Hope this helps anyone who faced the same problem

Categories