i am trying to download the source file bellow using curl, but it is only creating destination file with 0 bytes, not downloading the original file and write the resource data. How can i solve this problem. I tried a example from this link: http://www.w3bees.com/2013/09/download-file-from-remote-server-with.html . But it also give the same result.
Anybody can help? my code is:
if(isset($_POST[$data_file_name_url])){
$source = 'http://mr-greens-class.wikispaces.com/file/messages/Calender.ics';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $source);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSLVERSION,3);
$data = curl_exec ($ch);
$error = curl_error($ch);
curl_close ($ch);
$destination = $dir.'/test.ics';
$file = fopen($destination, "w+");
fputs($file, $data);
//file_put_contents($destination, $data);
fclose($file);
}
You should remove SSLVERSION if not use (your URL start with http, so I assume it's not SSL)
$path = 'myfile.ics';
$final_url = 'http://mr-greens-class.wikispaces.com/file/messages/Calender.ics';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $final_url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
//curl_setopt($ch, CURLOPT_PORT, 80);
curl_setopt($ch, CURLOPT_HEADER, 0);
// sometimes needed to prevent problem by bot filtering
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.103 Safari/537.36');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$data = curl_exec($ch);
if($data == false) {
$errno = curl_errno($ch);
$error_message = curl_error($ch);
//error_log( "cURL error ({$errno}):\n<br/> {$error_message}");
}
curl_close($ch);
file_put_contents($path, $data);
Related
Interestingly, I have not been able to find a working example of this. Using php, I'm trying to scrape/re display all the images of a given url, onto another website. I know how to do this with text, but images, I'm not sure. Anyone know of a good working example? I get how to grab all contents, but not specifically just the images. ex this does the whole page:
<?php
$curl = curl_init();
curl_setopt ($curl, CURLOPT_URL,
"https://en.wikipedia.org/wiki/Wikipedia:Picture_of_the_day");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec ($curl);
curl_close ($curl);
echo $result;
?>
Thanks much. -Wilson
*ideally actually this would just grab the first image, such as in the example above. But I won't get ahead of myself, just trying to get this function down.
You may use a file to save the result.
$fp = fopen($filename, 'a+');
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0');
curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate');
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($dltotal, $dlnow, $ultotal, $ulnow) {
});
curl_setopt($ch, CURLOPT_LOW_SPEED_LIMIT, 1);
curl_setopt($ch, CURLOPT_LOW_SPEED_TIME, 8);
curl_exec($ch);
$error = curl_error($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
$end_size = $begin_size + curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD);
Log::info('end_size='.$end_size);
curl_close($ch);
fclose($fp);
You can add /use PHP DOMXPath function to parsing your scrape result.
add this following script right after your code
$dom = new DOMDocument();
#$dom->loadHTML($result);
$xpath = new DOMXPath($dom);
//get all images
$images = $xpath->query ('//img/#src');
$img = array();
foreach ( $images as $image) {
$img[] = $image->nodeValue;
}
print_r($img);
** Edited Part
Try to change your CURL code with this one
$url = 'https://en.wikipedia.org/wiki/Wikipedia:Picture_of_the_day';
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10');
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0); //untuk https
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0);//untuk https
//curl_setopt($curl, CURLOPT_ENCODING , 'gzip');
$html = curl_exec($curl);
if(curl_error($curl)){
echo 'Curl error: ' . curl_error($curl);
$result = ''; //return empty if error
}
else {
$result = $html;
}
The size of the save images 0 kb
Working code, except facebook
function imagedownload($url,$saveto){
$ch = curl_init ($url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $args);
$result = parse_url($url);
curl_setopt($ch, CURLOPT_REFERER, $result['scheme'].'://'.$result['host']);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0');
$raw=curl_exec($ch);
curl_close ($ch);
if(file_exists($saveto)){
unlink($saveto);
}
$fp = fopen($saveto,'x');
fwrite($fp, $raw);
fclose($fp);
}
$url="http://scontent-frt3-1.xx.fbcdn.net/v/t1.0-0/cp0/e15/q65/p320x320/13700038_850487491753797_6227258625184891703_n.jpg?oh=793ecde8db1a8e65789534907d08b25e&oe=57F1DDFF";
$konum="images/"
$yolla=imagedownload($url,$konum);
The size of the save images 0 kb
Working code, except facebook
It works if you remove the options to POST and do a regular GET request:
<?php
function imagedownload($url, $saveto)
{
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$result = parse_url($url);
curl_setopt($ch, CURLOPT_REFERER, $result['scheme'] . '://' . $result['host']);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0');
$raw = curl_exec($ch);
curl_close($ch);
if (file_exists($saveto)) {
unlink($saveto);
}
$fp = fopen($saveto, 'x');
fwrite($fp, $raw);
fclose($fp);
}
$url = "http://scontent-frt3-1.xx.fbcdn.net/v/t1.0-0/cp0/e15/q65/p320x320/13700038_850487491753797_6227258625184891703_n.jpg?oh=793ecde8db1a8e65789534907d08b25e&oe=57F1DDFF";
$konum = "test.jpg";
$yolla = imagedownload($url, $konum);
I had try in my localhost this code and it working well.
<?php
ini_set('display_errors', 1);
function imagedownload($url,$saveto){
$ch = curl_init ($url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER,1);
$raw=curl_exec($ch);
curl_close ($ch);
if(file_exists($saveto)){
unlink($saveto);
}
$fp = fopen($saveto,'x');
fwrite($fp, $raw);
fclose($fp);
}
$url="http://scontent-frt3-1.xx.fbcdn.net/v/t1.0-0/cp0/e15/q65/p320x320/13700038_850487491753797_6227258625184891703_n.jpg?oh=793ecde8db1a8e65789534907d08b25e&oe=57F1DDFF";
$konum="/var/www/html/jsPDF-master/examples/test.jpg";
$yolla=imagedownload($url,$konum);
?>
And result
And one note: Please make sure the directory for save image must have write permission.
Example: chmod -R 777 /var/www/html/jsPDF-master/examples
Or ensure that in php.ini allow_url_fopen is enable
Curl stop working on my server but it is working perfectly on the other server
Here is my script:
<?php
$proxy_ip = '165.139.149.169';
$proxy_port = '3128';
$url = 'https://www.google.com/search?q=skinny+fiber+ingredients&btnG=Search&client=ubuntu&channel=fs&num=100&ie=utf-8&oe=utf-8&gfe_rd=cr&ei=sw9CVbCuPKaA8QfX0ICYBA&gws_rd=cr';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, 'https://www.google.com');
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0');
curl_setopt($ch, CURLOPT_MAXREDIRS, 5); // Good leeway for redirections.
curl_setopt($ch, CURLOPT_PROXYPORT, $proxy_port);
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
curl_setopt($ch, CURLOPT_PROXY, $proxy_ip);
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, $loginpassw);
$data = curl_exec($ch);
curl_close($ch);
echo "abc";
echo $data;
?>
http://serpbull.com/dev/curl.php (script not working here)
http://imarkdev.com/serp/curl.php (script working here)
Edit:
Not working script takes very long to respond (curl timeout) and prints empty response.
Use curl_error() function to get more information about the failure.
$data = curl_exec($ch);
if ($data === false)
{
echo 'Curl error: ' . curl_error($ch);
} else {
echo 'Response: ' . $data;
}
I am trying to download a file behind a login using cURL and PHP. I am able to connect to the server and login properly using CURLOPT_POST; furthermore, my file downloads properly. However, the contents are corrupted and cannot be opened. On a side note, the file downloads onto the server, how would I use curl to download the file to a users local machine if this tool were ever on a public web server.
// POST data from JSTOR Tool
$username = $_POST['username'];
$password = $_POST['password'];
$start = $_POST['start'];
$end = $_POST['end'];
$loginUrl="https://www.jstor.org/action/doLogin";
print "Hello $username, your articles are downloading.";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://www.jstor.org/action/doLogin');
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/32.0.1700.107 Chrome/32.0.1700.107 Safari/537.36');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, "login=".$username."&password=".$password);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie-name');
curl_setopt($ch, CURLOPT_COOKIEFILE, '/var/www/ip4.x/file/tmp');
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
//another request to download the article, but preserves the session
curl_setopt($ch, CURLOPT_URL, 'http://www.jstor.org/stable/pdf/1.pdf');
curl_setopt($ch, CURLOPT_POST, false);
curl_setopt($ch, CURLOPT_POSTFIELDS, "");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSLVERSION,3);
$data = curl_exec ($ch);
print_r($data);
$error = curl_error($ch);
curl_close ($ch);
$destination = "./files/test.pdf";
$file = fopen($destination, "w");
fputs($file, $data);
fclose($file);
print " Did it work?";
Thanks for the help!
I figured it out. I just need curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); in order to prevent redirects from throwing me off.
I am writing a script to download files from a password protected members area. I have it working right now by using a curl call to login and then download. But the issue I am trying to fix is that I could like to have a script login and save the cookie then another script use the cookie to download the file needed. Now I am not sure if this is possible.
Here is my working code:
$cookie_file_path = "downloads/cookie.txt";
$fp = fopen($cookie_file_path, "w");
fclose($fp);
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_URL, $loginUrl);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
curl_setopt($ch, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $loginPostInfo);
curl_exec($ch);
// harddcode some known data
$downloadSize = 244626770;
$chuckSize = 1024*2048;
$filePath = "downloads/file.avi";
$file = fopen($filePath, "w");
$downloaded = 0;
$startTime = microtime(true);
while ($downloaded < $downloadSize) {
// DOWNLOAD
curl_setopt($ch, CURLOPT_RANGE, $downloaded."-".($downloaded + $chuckSize - 1));
curl_setopt($ch, CURLOPT_URL, $downloadUrl);
$result = curl_exec($ch);
$nowTime = microtime(true);
fwrite($file, $result);
echo "\n\nprogress: ".$downloaded."/".$downloadSize." - %".(round($downloaded / $downloadSize, 4) * 100);
$downloaded += $chuckSize;
// calculate kbps
$totalTime = $nowTime - $startTime;
$kbps = $downloaded / $totalTime;
echo "\ndownloaded: ".$downloaded." bytes";
echo "\ntime: ".round($totalTime, 2);
echo "\nkbps: ".(round($kbps / 1024, 2));
}
fclose($file);
curl_close($ch);
So is it possible to close the curl after the login curl_exec and then open a curl call again to download the file using the cookie I saved during the login part?
Yes it's possible.
CURLOPT_COOKIEJAR is the write path for cookies, while CURLOPT_COOKIEFILE is the read path for cookies. If you provide CURLOPT_COOKIEFILE with the same path as you did with CURLOPT_COOKIEJAR, cURL will persist the cookies across requests:
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path);
On ImpressPages I've done it this way:
//initial request with login data
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/login.php');
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/32.0.1700.107 Chrome/32.0.1700.107 Safari/537.36');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, "username=XXXXX&password=XXXXX");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie-name'); //could be empty, but cause problems on some hosts
curl_setopt($ch, CURLOPT_COOKIEFILE, '/var/www/ip4.x/file/tmp'); //could be empty, but cause problems on some hosts
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
//another request preserving the session
curl_setopt($ch, CURLOPT_URL, 'http://www.example.com/profile');
curl_setopt($ch, CURLOPT_POST, false);
curl_setopt($ch, CURLOPT_POSTFIELDS, "");
$answer = curl_exec($ch);
if (curl_error($ch)) {
echo curl_error($ch);
}
Yes, please loop at the CURLOPT_COOKIEJAR and CURLOPT_COOKIEFILE. I see you already use CURLOPT_COOKIEJAR, so you should probably only dive into *_COOKIEJAR.