Copy images from url to server, delete all images after - php

I have an affiliate link with a product image
http://www.myaffiliate.com/products/product.jpg
I want to copy the imagine to my website, in a folder called lets say products. It's hosted on shared sherver, not my pc. I think the php copy() does not work, well tried it and I think my hosts doen't support the method. How can I do it, curl maybe? Then, I need a php command to delete all the images from the products folder - I'll do this with a cronjob.
<?php
$table = 'cron';
$feed = 'myfeed';
$xml = simplexml_load_file($feed);
mysql_query("TRUNCATE TABLE ".$table."", $dbh1);
mysql_query("TRUNCATE TABLE ".$table."", $dbh2);
function getimg($url) {
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$headers[] = 'Connection: Keep-Alive';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'php';
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $headers);
curl_setopt($process, CURLOPT_HEADER, 0);
curl_setopt($process, CURLOPT_USERAGENT, $useragent);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$return = curl_exec($process);
curl_close($process);
return $return;
}
foreach( $xml->performerinfo as $performerinfo )
{
$performerid = $performerinfo->performerid;
$category = $performerinfo->category;
$subcategory = $performerinfo->subcategory;
$bio = $performerinfo->bio;
$turnons = $performerinfo->turnons;
$willing = $performerinfo->willingness;
$willingness = str_replace(',', ', ', $willing);
$age = $performerinfo->age;
$build = $performerinfo->build;
$height = $performerinfo->height;
$weight = $performerinfo->weight;
$breastsize = $performerinfo->breastsize;
$haircolor = $performerinfo->haircolor;
$hairlength = $performerinfo->hairlength;
$eyecolor = $performerinfo->eyecolor;
$ethnicity = $performerinfo->ethnicity;
$sexpref = $performerinfo->sexpref;
$pic0 = $performerinfo->picture[0];
$pic1 = $performerinfo->picture[1];
$pic2 = $performerinfo->picture[2];
$pic3 = $performerinfo->picture[3];
$pic4 = $performerinfo->picture[4];
$test = $performerinfo->picture[0];
$imgurl = 'http://www.foodtest.ru/images/big_img/sausage_3.jpg';
$imagename= basename($imgurl);
if(file_exists('./tmp/'.$imagename)){continue;}
$image = getimg($imgurl);
file_put_contents('tmp/'.$imagename,$image);
}
//baracuda reloaded
mysql_query("TRUNCATE TABLE reloaded", $dbh1);
mysql_query("TRUNCATE TABLE reloaded", $dbh2);
mysql_query("INSERT INTO reloaded SELECT * FROM ".$table."", $dbh1);
mysql_query("INSERT INTO reloaded SELECT * FROM ".$table."", $dbh2);
?>

use file_get_contents or cURL in a loop
<?php
//loop with a list of images
loop{
$imgurl = 'url to image';
$imagename= basename($imgurl);
if(file_exists('./images_folder/'.$imagename)){continue;}
$image = file_get_contents($imgurl);
file_put_contents('images_folder/'.$imagename,$image);
}
?>
or cURL
<?php
//loop this
loop{
$imgurl = 'url to image';
$imagename= basename($imgurl);
if(file_exists('./images_folder/'.$imagename)){continue;}
$image = getimg($imgurl);
file_put_contents('images_folder/'.$imagename,$image);
}
?>
<?php
//cURL function (outside of loop) to get img
function getimg($url) {
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$headers[] = 'Connection: Keep-Alive';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'php';
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $headers);
curl_setopt($process, CURLOPT_HEADER, 0);
curl_setopt($process, CURLOPT_USERAGENT, $useragent);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$return = curl_exec($process);
curl_close($process);
return $return;
}
?>
in a file on your server that you want deleted put this in it and call it at END of your loops once you got all the images
<?php
function rrmdir($dir) {
if (is_dir($dir)) {
$objects = scandir($dir);
foreach ($objects as $object) {
if ($object != "." && $object != "..") {
if (filetype($dir."/".$object) == "dir") rrmdir($dir."/".$object); else unlink($dir."/".$object);
}
}
reset($objects);
rmdir($dir);
mkdir($dir);
}
}
rrmrir('./products');
?>

Related

refrence a variable inside a instances

This is our class that is called
<?php
class Nest
{
public $debug;
private $username;
private $password;
private $cookieFile;
public function __construct($username, $password, $debug = false)
{
// Set the properties
$this->debug = $debug;
$this->username = $username;
$this->password = $password;
$this->useragent = 'Nest/1.1.0.10 CFNetwork/548.0.4';
$this->cookieFile = tempnam('/tmp', 'nest-cookie');
// Login
$response = $this->curlPost('https://home.nest.com/user/login', 'username=' . urlencode($username) . '&password=' . urlencode($password));
if (($json = json_decode($response)) === false)
throw new Exception('Unable to connect to Nest');
// Stash information needed to make subsequence requests
$this->access_token = $json->access_token;
$this->user_id = $json->userid;
$this->transport_url = $json->urls->transport_url;
}
public function house_state_set($state)
{
switch ($state)
{
case 'away':
$away = true;
break;
case 'home':
$away = false;
break;
default:
throw new Exception('Invalid state given: "' . $state . '"');
}
$status = $this->status_get();
$structure_id = $status->user->{$this->user_id}->structures[0];
$payload = json_encode(array('away_timestamp' => time(), 'away' => $away, 'away_setter' => 0));
return $this->curlPost($this->transport_url . '/v2/put/' . $structure_id, $payload);
}
public function house_state_get()
{
$status = $this->status_get();
$structure = $status->user->{$this->user_id}->structures[0];
list (,$structure_id) = explode('.', $structure);
return ($status->structure->{$structure_id}->away ? 'away' : 'home');
}
public function temperature_set(&$temp)
{
$status = $this->status_get();
$structure = $status->user->{$this->user_id}->structures[0];
list (,$structure_id) = explode('.', $structure);
$device = $status->structure->{$structure_id}->devices[0];
list (,$device_serial) = explode('.', $device);
$temperature_scale = $status->device->{$device_serial}->temperature_scale;
if ($temperature_scale == "F")
{
$target_temp_celsius = (($temp - 32) / 1.8);
}
else
{
$target_temp_celsius = $temp;
}
$payload = json_encode(array('target_change_pending' => true, 'target_temperature' => $target_temp_celsius));
return $this->curlPost($this->transport_url . '/v2/put/shared.' . $device_serial, $payload);
}
public function status_get()
{
$response = $this->curlGet($this->transport_url . '/v2/mobile/user.' . $this->user_id);
if (($json = json_decode($response)) === false)
throw new Exception('Unable to gather the status from Nest');
return $json;
}
private function curlGet($url, $referer = null, $headers = null)
{
$headers[] = 'Authorization: Basic ' . $this->access_token;
$headers[] = 'X-nl-user-id:' . $this->user_id;
$headers[] = 'X-nl-protocol-version: 1';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookieFile);
curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookieFile);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $this->useragent);
if(!is_null($referer)) curl_setopt($ch, CURLOPT_REFERER, $referer);
if(!is_null($headers)) curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// curl_setopt($ch, CURLOPT_VERBOSE, true);
$html = curl_exec($ch);
if(curl_errno($ch) != 0)
{
throw new Exception("Error during GET of '$url': " . curl_error($ch));
}
$this->lastURL = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$this->lastStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
return $html;
}
private function curlPost($url, $post_vars = '', $referer = null)
{
if (isset($this->access_token)) $headers[] = 'Authorization: Basic ' . $this->access_token;
if (isset($this->user_id)) $headers[] = 'X-nl-user-id:' . $this->user_id;
$headers[] = 'X-nl-protocol-version: 1';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookieFile);
curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookieFile);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $this->useragent);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_vars);
if(!is_null($headers)) curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// curl_setopt($ch, CURLOPT_VERBOSE, true);
$html = curl_exec($ch);
$this->lastURL = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$this->lastStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
return $html;
}
}
and is called with:
<?php
$new = $_GET['t'];
include_once('nest.php');
$nest = new Nest('usr', 'pwd');
$nest->temperature_set($new);
?>
If I replace $new with a number and then call the function it works but not in it current form. I have tried using the class to call outside variables, tried to just call but cant figure where and why its not working.
I suspect it is not working because your device is configured to use Celsius.
PHP is a dynamically typed language, as is JavaScript, but sometimes people forget that both will change types based on context (and sometimes the type conversion will have unexpected results).
All $_REQUEST variables are strings. Try changing:
$target_temp_celsius = $temp;
To
$target_temp_celsius = $temp + 0;
(JSON is strongly typed)

php curl not saving file to location

I have been searching for an answer for this all day, but with no luck!
I want to download/copy an image from the web to a location on my server, The code below doesn't seam to throw any errors other than the image is just not saving to the required or any directory.
As you can see I am using cURL to get the image and the variable $contents is returning true (1) so I am assuming the script works but I am actually missing something.
Many thanks in advance for your help. :-)
$dir = URL::base() . "/img/products/";
$imgSrc = "an image on the web";
$file = fopen($dir, "wb");
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$headers[] = 'Connection: Keep-Alive';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)';
$ch = curl_init($imgSrc);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FILE, $file); // location to write to
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60);
$contents = curl_exec($ch);
$curl_errno = curl_errno($ch);
$curl_error = curl_error($ch);
curl_close($ch);
fclose($lfile);
if ($curl_errno > 0)
{
Log::write("CURL", "cURL Error (".$curl_errno."): ".$curl_error);
}
else
{
Log::write("CURL", "Data received: " . $contents);
}
return;
Provide the file the writing access to PHP FILE using curl to store the contents. This can be done in three ways:
If you have the terminal access then use chmod to provide the writing access
If you have the CPanel access then use directory explorer then provide the writing access to the file by changing file properties.
You must have the access to FTP and change the file access attributes and provide the writing access.
Don't use curl.
If all you need to do is download an image, go for "file_get_contents" instead.
It's dead easy:
$fileContents = file_get_contents("https://www.google.com/images/srpr/logo4w.png");
File::put('where/to/store/the/image.jpg', $fileContents);
function saveImageToFile($image_url,$output_filename)
{
$ch = curl_init ($url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER,1);
$raw=curl_exec($ch);
curl_close ($ch);
if(file_exists($saveto))
{
unlink($saveto); //Saves over files
}
$fp = fopen($saveto,'x');
fwrite($fp, $raw);
fclose($fp);
}
Your problem is quite simple, and I have no idea how everyone else ignored it. It is Laravel-specific. Your $dir variable returns an HTTP resource identifier. What you need is a filesystem identifier.
For laravel, change your URL::to() to path("public") to tell Laravel to stop using HTTP URIs and instead take the local path to the public folder (/your/laravel/setup/path/public/).
code
$dir = path("public") . "img/products/";
$imgSrc = "an image on the web";
$file = fopen($dir . substr($imgSrc,strrpos("/",$imgSrc)+1), "wb");
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$headers[] = 'Connection: Keep-Alive';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)';
$ch = curl_init($imgSrc);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FILE, $file); // location to write to
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60);
$contents = curl_exec($ch);
$curl_errno = curl_errno($ch);
$curl_error = curl_error($ch);
curl_close($ch);
if ($curl_errno > 0)
{
Log::write("CURL", "cURL Error (".$curl_errno."): ".$curl_error);
}
else
{
Log::write("CURL", "Data received: " . $contents);
fwrite($file,$contents);
fclose($file);
}
return;
OK, finally got it all working and here is the code if anyone else ever tries to do the same sort of thing!
I was missing these parts:
$dir = $_SERVER['DOCUMENT_ROOT'] . "/img/products/";
and
fwrite($file,$contents);
So here is my final code... credit to Sébastien for pointing me in the right direction. Thanks.
if($method == 'save')
{
$productId = Input::get('pId');
$removeProductImages = DB::table('product_ref_images')->where('product_id', '=', $productId)->delete();
$imagesData = Input::get('imageRefs');
$dir = $_SERVER['DOCUMENT_ROOT'] . "/img/products/";
$sortOrder = 0;
for ($i=0; $i < count($imagesData); $i++) {
$imgSrc = trim($imagesData[$i]['imgSrc']);
$imgId = trim($imagesData[$i]['imgId']);
$file = fopen($dir . basename($imgSrc), "wb");
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$headers[] = 'Connection: Keep-Alive';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)';
$ch = curl_init($imgSrc);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FILE, $file);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60);
$contents = curl_exec($ch);
$curl_errno = curl_errno($ch);
$curl_error = curl_error($ch);
curl_close($ch);
if ($curl_errno > 0)
{
Log::write("CURL", "cURL Error (".$curl_errno."): ".$curl_error);
break;
}
else
{
fwrite($file,$contents);
fclose($file);
$imageIds = DB::table('product_ref_images')->order_by('image_id', 'desc')->first();
if($imageIds == null)
{
$imageIds = 0;
}
else
{
$imageIds = $imageIds->image_id;
}
$updateImages = DB::table('product_ref_images')
->insert(array(
'image_id' => $imageIds + 1,
'product_id' => $productId,
'flickr_image_id' => $imgId,
'sort_order' => $sortOrder++,
'local_storage_url' => $dir . basename($imgSrc),
'created_at' => date("Y-m-d H:i:s"),
'updated_at' => date("Y-m-d H:i:s")
));
}
}
return Response::json('Complete');
}
Remove this line:
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
You're storing the response to a file, not to the return variable. Otherwise, you have to save it yourself (like you did in the other solution).

PHP Display Image (SSL PROXY)

I am trying to make a PHP script that accepts a URL and displays it (trying to keep SSL on my project)
However, I am unable to get anything to output with my script. No errors are being displayed and I am at a loss of words. What am I not seeing?
<?php
//if parameter is empty
if((!isset($_GET['img'])) or ($_GET['img'] == '') or (!isset($per['scheme'])))
{
exit;
}
$per = parse_url($_GET['img']);
print_r ($per);
$imgurl = $_GET['img'];
print_r ($imgurl);
$imgurl = str_replace(' ', "%20", $imgurl);
$aFile = getimagesize($imgurl);
print_r ($aFile);
//check file extension
if($aFile == 'jpg' or $aFile == 'jpeg'){
header('Content-Type: image/jpeg');
imagejpeg(getimg($file));
} elseif($aFile == 'png') {
header('Content-Type: image/png');
imagepng(getimg($file));
} elseif($aFile == 'gif') {
header('Content-Type: image/gif');
imagegif(getimg($file));
} else {
die('not supported');
}
$imgurl = $_GET['img'];
$imgurl = str_replace(' ', "%20", $imgurl);
function getimg($imageurl) {
$cache_expire = 60*60*24*365;
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg, image/png';
$headers[] = 'Cache-Control: maxage=. $cache_expire';
$headers[] = 'Pragma: public';
$headers[] = 'Accept-Encoding: None';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6';
$process = curl_init($imageurl);
curl_setopt($process, CURLOPT_HTTPHEADER, $headers);
curl_setopt($process, CURLOPT_REFERER, $_GET['img']);
curl_setopt($process, CURLOPT_HEADER, 0);
curl_setopt($process, CURLOPT_HTTPGET, true);
curl_setopt($process, CURLOPT_USERAGENT, $user_agent);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
$return = curl_exec($process);
curl_close($process);
return $return;
}
exit;
?>
Here is an edited version of the script that outputs garbled text when I use readfile:
<?php
error_reporting(E_ALL);
ini_set('display_errors', '1');
//if parameter is empty
if((!isset($_GET['img'])) or ($_GET['img'] == ''))
{
exit;
}
$per = parse_url($_GET['img']);
$imgurl = $_GET['img'];
$imgurl = str_replace(' ', "%20", $imgurl);
$aFile = getimagesize($imgurl);
//check file extension
$checkmime = getimagesize($imgurl);
if($checkmime['mime'] != 'image/png' && $checkmime['mime'] != 'image/gif' && $checkmime['mime'] != 'image/jpeg') {
die('not supported');
} else {
readfile("$imgurl");
}
function getimg($imageurl) {
$cache_expire = 60*60*24*365;
$headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg, image/png';
$headers[] = 'Cache-Control: maxage=. $cache_expire';
$headers[] = 'Pragma: public';
$headers[] = 'Accept-Encoding: None';
$headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6';
$process = curl_init($imageurl);
curl_setopt($process, CURLOPT_HTTPHEADER, $headers);
curl_setopt($process, CURLOPT_REFERER, $_GET['img']);
curl_setopt($process, CURLOPT_HEADER, 0);
curl_setopt($process, CURLOPT_HTTPGET, true);
curl_setopt($process, CURLOPT_USERAGENT, $user_agent);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
$return = curl_exec($process);
curl_close($process);
return $return;
}
exit;
?>
If this truly is your entire script your problem first lies with $per['scheme'] not being set so it will trip the first if() statement and have the script exit.
You check if it's not set here: !isset($per['scheme']) so it will make the expression TRUE and thus end the script, reason no output even from print_r();
Use this instead:
if (empty($_GET['img'])) // Check if $_GET['img'] is not set AND is = '' (empty)
{
exit;
}
Use:
} else {
header('Content-Type: ' . $checkmime['mime']);
readfile($imgurl);
}

How To Load And Save Facebook Profile Picture Of User

I Want direct link of user's profile picture
i Have Following Code
$img = file_get_contents("https://graph.facebook.com/" . $fid . "/picture?type=large");
$file = dirname(__file__). "/" . $fid . ".jpg";
file_put_contents($file, $img);
But https://graph.facebook.com/" . $fid . "/picture?type=large Have a Redirection. How do i follow the redirection? is there any way to do it via file_get_contents? I know i can do it via curl But It Seems complicated i got error that safe_mode is on and i don't know how to off it.
Thanks You
You should be able follow redirects with file_get_contents by giving it a third parameter $context – in which you set the HTTP context option follow_location to 1.
(Although this should already be the default, and in my test getting the image data worked with file_get_contents alone already.)
Her is the code that I am using and it works perfect for me. It also saves the picture to my server so I then have a local url (which can then be posted either back to the same user's profile or to the wall of another user/page/event/etc.) All you have to do is place it in your code where $user has a value and it should work just fine.
<?
$uid = $user;
function GetTheImage($linky) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_URL, $linky);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
# ADDED LINE:
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
$userpicture = "http://graph.facebook.com/$uid/picture?type=large";
$sourceurl = GetTheImage($userpicture);
$save = fopen("/home/arose/public_html/mydomain.com/tmp/$uid-large.jpg", "w"); //this is name of new file that i save
fwrite($save, $sourceurl);
fclose($save);
?>
<html>
<head>
</head>
<body>
<img src="./tmp/<? echo $uid; ?>-large.jpg" />
</body>
</html>
<?php
$fbid=$user_profile['id'];
$pic = file_get_contents("https://graph.facebook.com/$fbid/picture?type=large");
$filename = $fbid.".jpg";
$path="images/".$filename;
file_put_contents($path, $pic);
?>
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
function curl_redir_exec($ch)
{
static $curl_loops = 0;
static $curl_max_loops = 20;
if ($curl_loops++ >= $curl_max_loops)
{
$curl_loops = 0;
return FALSE;
}
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
#list($header, $data) = #explode("\n\n", $data, 2);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302)
{
$matches = array();
preg_match('/Location:(.*?)\n/', $header, $matches);
$url = #parse_url(trim(array_pop($matches)));
if (!$url)
{
//couldn't process the url to redirect to
$curl_loops = 0;
return $data;
}
$last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
if (!$url['scheme'])
$url['scheme'] = $last_url['scheme'];
if (!$url['host'])
$url['host'] = $last_url['host'];
if (!$url['path'])
$url['path'] = $last_url['path'];
$new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . (#$url['query']?'?'.$url['query']:'');
return $new_url;
} else {
$curl_loops=0;
return $data;
}
}
function get_right_url($url) {
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
return curl_redir_exec($curl);
}
$url = 'http://graph.facebook.com/' . $fbid . '/picture?type=large';
$file_handler = fopen('img/avatar/'.$fbid.'.jpg', 'w');
$curl = curl_init(get_right_url($url));
curl_setopt($curl, CURLOPT_FILE, $file_handler);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_exec($curl);
curl_close($curl);
fclose($file_handler);

Even CURL function can't scrape some urls

I'm using CURL to scrape the html from url's. It works great in 80% of the urls I use. But some url's don't seem "scrapeable". For example, when I try to scrape http://www.thefancy.com , it doesn't work. the website keeps loading and at the end it doesn't return a result. the problem is testable at: http://www.itemmized.com/test/test/ this is my code:
if($_POST['submit']) {
function curl_exec_follow($ch, &$maxredirect = null) {
$mr = $maxredirect === null ? 5 : intval($maxredirect);
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
if ($mr > 0)
{
$original_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$newurl = $original_url;
$rch = curl_copy_handle($ch);
curl_setopt($rch, CURLOPT_HEADER, true);
curl_setopt($rch, CURLOPT_NOBODY, true);
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
do
{
curl_setopt($rch, CURLOPT_URL, $newurl);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)\n/', $header, $matches);
$newurl = trim(array_pop($matches));
// if no scheme is present then the new url is a
// relative path and thus needs some extra care
if(!preg_match("/^https?:/i", $newurl)){
$newurl = $original_url . $newurl;
}
} else {
$code = 0;
}
}
} while ($code && --$mr);
curl_close($rch);
if (!$mr)
{
if ($maxredirect === null)
trigger_error('Too many redirects.', E_USER_WARNING);
else
$maxredirect = 0;
return false;
}
curl_setopt($ch, CURLOPT_URL, $newurl);
}
}
return curl_exec($ch);
}
$ch = curl_init($_POST['form_url']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec_follow($ch);
curl_close($ch);
echo $data;
Try this.. hope this helps...
<?php
class Curl
{
public $cookieJar = "";
public function __construct($cookieJarFile = 'cookies.txt') {
$this->cookieJar = $cookieJarFile;
}
function setup()
{
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($this->curl,CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($this->curl,CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($this->curl,CURLOPT_AUTOREFERER, true);
curl_setopt($this->curl,CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl,CURLOPT_RETURNTRANSFER, true);
}
function get($url)
{
$this->curl = curl_init($url);
$this->setup();
return $this->request();
}
function getAll($reg,$str)
{
preg_match_all($reg,$str,$matches);
return $matches[1];
}
function postForm($url, $fields, $referer='')
{
$this->curl = curl_init($url);
$this->setup();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_REFERER, $referer);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
return $this->request();
}
function getInfo($info)
{
$info = ($info == 'lasturl') ? curl_getinfo($this->curl, CURLINFO_EFFECTIVE_URL) : curl_getinfo($this->curl, $info);
return $info;
}
function request()
{
return curl_exec($this->curl);
}
}
{
$curl = new Curl();
$html = $curl->get("http://www.thefancy.com");
echo "$html";
}
?>
Probably you're unable to scrape http://www.thefancy.com because every time you reach the bottom of the page new content is loading so actually you are trying to get an enormous amount of information with the cUrl probably that's where the problem is. You just get a timeout try setting the timeout in php.ini with a larger number and give it a try again. Probably its gona take a while to load but I think this way it's going to work.

Categories