I've got double foreach loop. Script takes urls from one file and tries to find it in html code of pages from another file. Of course that reading so many pages is pretty hard for server so I want to optimize script but how can I do it?
Here is the code:
<?php
$sites_raw = file('https://earnmoneysafe.com/script/sites.txt');
$sites = array_map('trim', $sites_raw);
$urls_raw = file('https://earnmoneysafe.com/script/4toiskatj.txt');
$urls = array_map('trim', $urls_raw);
function file_get_contents_curl($url) {
$ch = curl_init();
$config['useragent'] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0';
curl_setopt($curl, CURLOPT_USERAGENT, $config['useragent']);
curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
foreach ($sites as $site){
$homepage = file_get_contents_curl($site);
foreach ($urls as $url){
$needle = $url;
if (strpos($homepage, $needle) !== false) {
echo 'true';
}
}
}
?>
Use curl_multi_exec() to fetch all the URLs in parallel.
$urls = file('https://earnmoneysafe.com/script/4toiskatj.txt', FILE_IGNORE_NEW_LINES);
$sites = file('https://earnmoneysafe.com/script/sites.txt', FILE_IGNORE_NEW_LINES);
foreach ($sites as $site) {
$curl_handles[$site] = get_curl($site);
}
$mh = curl_multi_init();
foreach ($curl_handles as $ch) {
curl_multi_add_handle($mh, $ch);
}
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
foreach ($curl_handles as $site => $ch) {
$homepage = curl_multi_getcontent($ch);
foreach ($urls as $needle) {
if (strpos($homepage, $needle) !== false) {
echo 'true';
}
}
curl_multi_remove_handle($mh, $ch);
}
curl_multi_close($mh);
function get_curl($url) {
$ch = curl_init();
$config['useragent'] = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0';
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); // edited
curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
return $ch;
}
I think this, This code is cleaner
<?php
const SITES_URL = 'https://earnmoneysafe.com/script/sites.txt';
const URLS_URL = 'https://earnmoneysafe.com/script/4toiskatj.txt';
function readFileLines($url) {
$file_contents = file_get_contents($url);
$lines = explode("\n", $file_contents);
$filtered_lines = array_filter($lines, function($line) {
return !empty(trim($line));
});
return $filtered_lines;
}
function checkSiteUrls($site, $urls) {
$homepage = file_get_contents($site);
foreach ($urls as $url) {
if (strpos($homepage, $url) !== false) {
echo 'true';
}
}
}
$sites = readFileLines(SITES_URL);
$urls = readFileLines(URLS_URL);
foreach ($sites as $site) {
checkSiteUrls($site, $urls);
}
?>
Related
i have the following code which getting some links from remote.
Now the code while processesing and found the hash [if($done==true)] keep sending request to the host even if the hash have been found in the link.
i need the code stop sending requests to the host that we found its hash, and keep working on the other hosts, so it will be exclude all founded hosts from the the second foreach [foreach (file("hosts.txt")] and not send more request to them.
please if any one can help to fix my code .
sorry about bad english.
$MAXPROCESS=50;
$execute=0;
$Arr = array();
foreach (file("hashes.txt") as $hashkey => $hash)
{
$hash = trim($hash);
$pid = pcntl_fork();
$execute++;
if ($execute >= $MAXPROCESS)
{
while (pcntl_waitpid(0, $status) != -1)
{
$status = pcntl_wexitstatus($status);
$execute =0;
//sleep(1);
flush();
//echo " [$ipkey] Child $status completed\n";
}
}
if (!$pid)
{
foreach (file("hosts.txt") as $hostkey => $hosts)
{
$host = trim($hosts);
if(!in_array($host,$Arr))
{
$done = CHECK_URL($host.$hash.'.xml');
if($done==true)
{
$Arr[] = $host;
echo "\n\r\n\r".$host." : Found [$hashkey] ------------\n\r\n\r";
}else{
echo $host." : error [$hashkey]\n";
}
}else{
return false;
}
flush();
}
exit;
}
}
function CHECK_URL($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 5 );
$result = curl_exec($ch);
$response = curl_getinfo($ch);
//if($response['http_code']=="200" || preg_match('/xml/i', $result))
if($response['http_code']=="200")
{
file_put_contents('hash.found.txt', $url."\n", FILE_APPEND);
return true;
}
curl_close($ch); // Close the curl stream
}
.............................
How can I get all url-s from https://api.tenor.com/v1/trending?key=LIVDSRZULELA&limit=8 (results->media->nanomp4->url)
if($json = cURLGetContents("https://api.tenor.com/v1/trending?key=LIVDSRZULELA&limit=8")) {
$obj = json_decode($json);
echo $obj->results->{"media"}->{"nanomp4"}->{"url"};
}
Function cURLGetContents($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
On a circulate basis:
$json = cURLGetContents("https://api.tenor.com/v1/trending?key=LIVDSRZULELA&limit=8");
$urlList = getAllUrls((array) json_decode($json, true));
function getAllUrls($input, $list = []) {
foreach ($input as $key => $data) {
if (is_array($data)) {
$list = getAllUrls($data, $list);
} elseif ($key === 'url') {
$list[] = $data;
}
}
return $list;
}
$endereco = "https://api.tenor.com/v1/search?key=MBDPHCT6LA4H&q=sexo&limit=2";
$GrabURL = cURLGetContents($endereco);
$searchResponse = json_decode($GrabURL, true);
foreach ($searchResponse["results"] as $searchResult) {
print_r($searchResult["media"][0]["tinygif"]["url"]);
}
function cURLGetContents($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
I am new to OOP. I am just learning and I have to use it to find the actual/final URL of a link that redirects.
Class ABC {
public function getWebPage($url, $redirectcallback = null){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");
$html = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
list($httpheader) = explode("\r\n\r\n", $html, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $httpheader, $matches);
$nurl = trim(array_pop($matches));
$url_parsed = parse_url($nurl);
if (isset($url_parsed)) {
if($redirectcallback){ // callback
$redirectcallback($nurl, $url);
}
$html = $this->getWebPage($nurl, $redirectcallback);
}
}
return $html;
}
}
The above function inside class call the same function again and again to find the actual url. However I am already calling the class in some other file
$obj = new ABC;
$url = "http://www.anrdoezrs.net/asd/?ak=123";
$someurl = $obj->getWebPage($url);
But this does not work. Please suggest.
Class ABC {
public function xyz($url){
$html = $this->xyz($url);
return $html;
}
}
In another class call it with
$obj = new ABC;
$url = "Some value";
$someurl = $obj->xyz($url);
There has to be some form of redirect that is happening through java.
If you load this webpage https://btc-e.com/index.php
you will not actually get the webpage if you use curl. you get just a bunch of java. How do i go about getting to the actual HTML so i can start a login process.
I know this website provides an API, but i need a CURL login method, that uses the website and not the API.
here is all the code which i am using
<?php
$curl = new Curl();
$curl->setSsl();
$curl->setCookieFile('whatever_cookie_file.cook');
$page = $curl->get("https://btc-e.com/index.php");
echo $page;
class Curl {
public $curl;
public $manual_follow;
public $redirect_url;
public $cookiefile = null;
public $headers = array();
function Curl($proxy=false) {
$this->curl = curl_init();
$this->headers[] = "Accept: */*;q=0.5, text/javascript, application/javascript, application/ecmascript, application/x-ecmascript";
$this->headers[] = "Cache-Control: max-age=0";
$this->headers[] = "Connection: keep-alive";
$this->headers[] = "Keep-Alive: 300";
$this->headers[] = "Accept-Charset: utf-8;ISO-8859-1;iso-8859-2;q=0.7,*;q=0.7";
$this->headers[] = "Accept-Language: en-us,en;q=0.5";
$this->headers[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14 (.NET CLR 3.5.30729)');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($this->curl, CURLOPT_VERBOSE, false);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($this->curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
if($proxy != false){
curl_setopt($this->curl, CURLOPT_PROXY,$proxy);
}// end if proxy != false
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')){
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
$this->manual_follow = true;
}
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_TIMEOUT, 30);
$this->setRedirect();
}
function addHeader($header){
$this->headers[] = $header;
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function header($val){
curl_setopt($this->curl, CURLOPT_HEADER, $val);
}
function noAjax(){
foreach($this->headers as $key => $val){
if ($val == "X-Requested-With: XMLHttpRequest"){
unset($this->headers[$key]);
}
}
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setAjax(){
$this->headers[] = "X-Requested-With: XMLHttpRequest";
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
function setSsl($username = null, $password = null){
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($this->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
if ($username && $password){
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
}
function setBasicAuth($username,$password){
curl_setopt($this->curl, CURLOPT_HEADER, false);
curl_setopt($this->curl, CURLOPT_USERPWD, "$username:$password");
}
function setCookieFile($file){
if (file_exists($file)) {
} else {
$handle = fopen($file, 'w+') or print('The cookie file could not be opened. Make sure this directory has the correct permissions');
fclose($handle);
}
curl_setopt($this->curl, CURLOPT_COOKIESESSION, true);
curl_setopt($this->curl, CURLOPT_COOKIEJAR, $file);
curl_setopt($this->curl, CURLOPT_COOKIEFILE, $file);
$this->cookiefile = $file;
}
function getCookies(){
$contents = file_get_contents($this->cookiefile);
$cookies = array();
if ($contents){
$lines = explode("\n",$contents);
if (count($lines)){
foreach($lines as $key=>$val){
$tmp = explode("\t",$val);
if (count($tmp)>3){
$tmp[count($tmp)-1] = str_replace("\n","",$tmp[count($tmp)-1]);
$tmp[count($tmp)-1] = str_replace("\r","",$tmp[count($tmp)-1]);
$cookies[$tmp[count($tmp)-2]]=$tmp[count($tmp)-1];
}
}
}
}
return $cookies;
}
function setDataMode($val){
curl_setopt($this->curl, CURLOPT_BINARYTRANSFER, $val);
}
function close() {
curl_close($this->curl);
}
function getInfo(){
return curl_getinfo($this->curl);
}
function getInstance() {
static $instance;
if (!isset($instance)) {
$curl = new Curl;
$instance = array($curl);
}
return $instance[0];
}
function setTimeout($connect, $transfer) {
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $connect);
curl_setopt($this->curl, CURLOPT_TIMEOUT, $transfer);
}
function getError() {
return curl_errno($this->curl) ? curl_error($this->curl) : false;
}
function disableRedirect() {
$this->setRedirect(false);
}
function setRedirect($enable = true) {
if ($enable) {
$this->manual_follow = !curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
} else {
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, false);
$this->manual_follow = false;
}
}
function getHttpCode() {
return curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
}
function makeQuery($data) {
if (is_array($data)) {
$fields = array();
foreach ($data as $key => $value) {
$fields[] = $key . '=' . urlencode($value);
}
$fields = implode('&', $fields);
} else {
$fields = $data;
}
return $fields;
}
// FOLLOWLOCATION manually if we need to
function maybeFollow($page) {
if (strpos($page, "\r\n\r\n") !== false) {
list($headers, $page) = explode("\r\n\r\n", $page, 2);
}
$code = $this->getHttpCode();
if ($code > 300 && $code < 310) {
$info = $this->getInfo();
preg_match("#Location: ?(.*)#i", $headers, $match);
$this->redirect_url = trim($match[1]);
if (substr_count($this->redirect_url,"http://") == 0 && isset($info['url']) && substr_count($info['url'],"http://")){
$url_parts = parse_url($info['url']);
if (isset($url_parts['host']) && $url_parts['host']){
$this->redirect_url = "http://".$url_parts['host'].$this->redirect_url;
}
}
if ($this->manual_follow) {
return $this->get($this->redirect_url);
}
} else {
$this->redirect_url = '';
}
return $page;
}
function plainPost($url,$data){
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $data);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function post($url, $data) {
$fields = $this->makeQuery($data);
//var_dump($fields);
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, true);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
curl_setopt($this->curl, CURLOPT_POST, false);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, '');
return $this->maybeFollow($page);
}
function get($url, $data = null) {
curl_setopt($this->curl, CURLOPT_FRESH_CONNECT, false);
if (!is_null($data)) {
$fields = $this->makeQuery($data);
$url .= '?' . $fields;
}
curl_setopt($this->curl, CURLOPT_URL, $url);
$page = curl_exec($this->curl);
$error = curl_errno($this->curl);
if ($error != CURLE_OK || empty($page)) {
return false;
}
return $this->maybeFollow($page);
}
}
?>
The answer to this question was not specifically with curl.
Due to very simple cookie verification of the website this is why i was unable to load the initial webpage.
To solve this problem simply parse out all needed values from the initially loaded webpage.
Once you have all needed values just simply write a cookie with everything included. After The cookie is passed to server you are now allowed to see the content of the webpage.
To solve this problem further and to do more advanced java script manipulation a system such as phantomjs with casperjs and or using a solution such as Selenium with PHP_unit headless mode.
Hope this helps anyone who faced the same problem
I have a data submission system that post values via curl to a url like this:
$URL="http://some_url/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$URL);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt ($ch, CURLOPT_HTTPHEADER, array ('Content-Type:application/x-www-form-urlencoded;application/xml'));
curl_setopt($ch, CURLOPT_POSTFIELDS, "First Name=".$firstname."&Last Name=".$lastname."&Daytime Phone=".$dayphone."&Evening Phone=".$evephone."&DeliveryCode=".$deliveryCode."&username=".$username."&password=".$password);
echo curl_exec ($ch);
curl_close ($ch);
When i inspect the network console, it shows that a GET request was performed instead of POST as desired. What can be the issue.
I don't know why your code does not work, but I give you a code that does work; It may not be the solution, but it will help you to understand cUrl;
Adapt the code as you need it;
<?php
class CurlTool {
public static $userAgents = array(
'FireFox3' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; rv:1.9) Gecko/2008052906 Firefox/3.0',
'GoogleBot' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'IE7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Netscape' => 'Mozilla/4.8 [en] (Windows NT 6.0; U)',
'Opera' => 'Opera/9.25 (Windows NT 6.0; U; en)'
);
public static $options = array(
CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
CURLOPT_AUTOREFERER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FRESH_CONNECT => true,
CURLOPT_COOKIEJAR => "cookies.txt",
CURLOPT_COOKIEFILE => "cookies.txt",
CURLOPT_SSL_VERIFYPEER => false,
//CURLOPT_COOKIESESSION => false,
);
private static $proxyServers = array();
private static $proxyCount = 0;
private static $currentProxyIndex = 0;
public static $getinfo;
public static function addProxyServer($url) {
self::$proxyServers[] = $url;
++self::$proxyCount;
}
public function curl_redirect_exec($ch, &$redirects, $curlopt_header = false) {
/*
curl_setopt($ch, CURLOPT_USERAGENT ,'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)');
curl_setopt($ch, CURLOPT_AUTOREFERER ,true);
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE,"cookies.txt");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER ,false);
*/
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
echo $redirects." ";
if ($redirects>0) {
echo "rr: ".$url;
exit;
}
$data = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
list($header) = explode("\r\n\r\n", $data, 2);
$matches = array();
//this part has been changes from the original
preg_match("/(Location:|URI:)[^(\n)]*/", $header, $matches);
$url = trim(str_replace($matches[1],"",$matches[0]));
//end changes
$url_parsed = parse_url($url);
if (isset($url_parsed)) {
$ok=curl_setopt($ch, CURLOPT_URL, $url);
if (!$ok) {
echo "doesn't work";
exit;
}
print_r($url_parsed);
exit;
$redirects++;
if($redirects>10)die('live is hard');
return curl_redirect_exec($ch, $redirects,$curlopt_header);
}
}
if ($curlopt_header)
return $data;
else {
list(,$body) = explode("\r\n\r\n", $data, 2);
return $body;
}
}
public static function fetchContent($url, $fields = null, $verbose = false) {
//print '*'.$fields.'*';
//
if (($curl = curl_init($url)) == false) {
throw new Exception("curl_init error for url $url.");
}
if (self::$proxyCount > 0) {
$proxy = self::$proxyServers[self::$currentProxyIndex++ % self::$proxyCount];
curl_setopt($curl, CURLOPT_PROXY, $proxy);
if ($verbose === true) {
echo "Reading $url [Proxy: $proxy] ... ";
}
} else if ($verbose === true) {
echo "Reading $url ... ";
}
//$verbose=TRUE;
//print_r($fields);
// debug_print_backtrace();
//url-ify the data for the POST
$fields_string = '';
if (is_array($fields))
foreach ($fields as $key => $value) {
if (empty($key))
continue;
$fields_string .= $key . '=' . urlencode($value) . '&';
if ($verbose === true) {
echo $key . ": " . $value;
}
}
rtrim($fields_string, '&');
if (count($fields) > 0) {
curl_setopt($curl, CURLOPT_POST, count($fields));
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
}
if ($verbose === true) {
echo "Fields string $fields_string ... ";
}
//print_r(self::$options);
//echo phpinfo();
curl_setopt_array($curl, self::$options);
//curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
/*foreach (self::$options as $option => $value) {
if (!curl_setopt($curl, $option, $value)) {
//die('cannot set'.$option.':'.$value);
//return false;
}
}
*/
$html = curl_exec($curl);
$redirects = 0 ;
//echo $html;
//exit;
//$html = self::curl_redirect_exec($curl, &$redirects);
//die(2);
self::$getinfo = curl_getinfo($curl);
if ($html === false) {
throw new Exception("curl_exec error for url $url " . curl_error($curl));
}
curl_close($curl);
if ($verbose === true) {
echo "Done.\n";
}
$html = preg_replace('#\n+#', ' ', $html);
$html = preg_replace('#\s+#', ' ', $html);
return $html;
}
public static function downloadFile($url, $fileName, $fields = null, $verbose = false) {
if (($curl = curl_init($url)) == false) {
throw new Exception("curl_init error for url $url.");
}
if (self::$proxyCount > 0) {
$proxy = self::$proxyServers[self::$currentProxyIndex++ % self::$proxyCount];
curl_setopt($curl, CURLOPT_PROXY, $proxy);
if ($verbose === true) {
echo "Downloading $url [Proxy: $proxy] ... ";
}
} else if ($verbose === true) {
echo "Downloading $url ... ";
}
//url-ify the data for the POST
$fields_string = '';
if (is_array($fields))
foreach ($fields as $key => $value) {
if (empty($key))
continue;
$fields_string .= $key . '=' . urlencode($value) . '&';
}
rtrim($fields_string, '&');
curl_setopt($curl, CURLOPT_POST, count($fields));
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt_array($curl, self::$options);
if (is_file($fileName)) {
$contents = file_get_contents($fileName, false, null, -1, 3 * 1024);
$pattern = "__VIEWSTATE";
if (strpos($contents, $pattern) === false) {
return $fileName;
}
}
// if (is_file($fileName)) {
// // make a HEAD request and try to get the file size HEAD
// // if they differ then redownload the file, otherwise no need
// curl_setopt($curl, CURLOPT_NOBODY, true);
// curl_setopt($curl, CURLOPT_HEADER, true);
// $ret = curl_exec($curl);
// //echo $fileName;
// $size = curl_getinfo($curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD);
// if ($size == filesize($fileName)) {
// return $fileName;
// } else {
// unlink($fileName);
// return self::downloadFile($url, $fileName, $fields, $verbose);
// }
// }
//echo getcwd();
if (substr($fileName, -1) == '/') {
$targetDir = $fileName;
$fileName = tempnam(sys_get_temp_dir(), 'c_');
}
if (($fp = fopen('../files/'.$fileName, "w")) === false) {
throw new Exception("fopen error for filename $fileName");
}
curl_setopt($curl, CURLOPT_FILE, $fp);
curl_setopt($curl, CURLOPT_BINARYTRANSFER, true);
$ret = curl_exec($curl);
self::$getinfo = curl_getinfo($curl);
if ($ret === false) {
fclose($fp);
unlink('../files/'.$fileName);
echo curl_error($curl).'<br/>';
echo curl_errno($curl).'<br/>';
if(curl_errno($curl)!=6)
{
throw new Exception("curl_exec error for url $url.");
}
} elseif (isset($targetDir)) {
$eurl = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);
preg_match('#^.*/(.+)$#', $eurl, $match);
fclose($fp);
rename($fileName, "$targetDir{$match[1]}");
$fileName = "$targetDir{$match[1]}";
} else {
fclose($fp);
}
curl_close($curl);
if ($verbose === true) {
echo "Done.\n";
}
return $fileName;
}
}