I am using this code to get page from internet, but I get result status 0:
$url='http://www.jiwlp.com';
$this->url = $url;
if (isset($this->url)) {
// start cURL instance
$this->ch = curl_init ();
// this tells cUrl to return the data
curl_setopt ($this->ch, CURLOPT_RETURNTRANSFER, 1);
// set the url to download
curl_setopt ($this->ch, CURLOPT_URL, $this->url);
// follow redirects if any
curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION, true);
// tell cURL if the data is binary data or not
curl_setopt($this->ch, CURLOPT_BINARYTRANSFER, $this->binary);
$useragent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($this->ch, CURLOPT_VERBOSE, 1);
curl_setopt($this->ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($this->ch, CURLOPT_TIMEOUT, 5);
// grabs the webpage from the internet
$this->html = curl_exec($this->ch);
$this->status = curl_getinfo($this->ch, CURLINFO_HTTP_CODE);
print_r(curl_getinfo($this->ch)); // closes the connection
curl_close ($this->ch);
}
What am I doing wrong?
this version works for me, removed the oo
$url = 'http://www.jiwlp.com';
if(isset($url)){
$ch = curl_init();
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt($ch,CURLOPT_BINARYTRANSFER,$binary);
$useragent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US;
rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,true);
curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,false);
curl_setopt($ch,CURLOPT_VERBOSE,1);
curl_setopt($ch,CURLOPT_USERAGENT,$useragent);
curl_setopt($ch,CURLOPT_TIMEOUT,5);
curl_exec($ch);
$status = curl_getinfo($ch,CURLINFO_HTTP_CODE);
print_r(curl_getinfo($ch));
curl_close($ch);
}
The response status is already stored in $this->status, I'm assuming you are referring to HTTP response status codes, so instead of
// grabs the webpage from the internet
$this->html = curl_exec($this->ch);
$this->status = curl_getinfo($this->ch, CURLINFO_HTTP_CODE);
print_r(curl_getinfo($this->ch));
Try printing out $this->status instead.
// grabs the webpage from the internet
$this->html = curl_exec($this->ch);
$this->status = curl_getinfo($this->ch, CURLINFO_HTTP_CODE);
print_r($this->status);
check CURL in php disabled function.
if CURL_EXEC is disabled in the webserver, php wont give error instead will [http_code] => 0 header_size] => 0 .......
run phpinfo() from a php page is one of a way to get the list of disabled php functions.
I use this function to get http site/link status:
<?php
function get_link_status($url, $timeout = 10)
{
$ch = curl_init();
// set cURL options
$opts = array(CURLOPT_RETURNTRANSFER => true, // do not output to browser
CURLOPT_URL => $url, // set URL
CURLOPT_NOBODY => true, // do a HEAD request only
CURLOPT_TIMEOUT => $timeout); // set timeout
curl_setopt_array($ch, $opts);
curl_exec($ch); // do it!
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE); // find HTTP status
curl_close($ch); // close handle
echo $status; //or return $status;
//example of check
if ($status == '301') { echo 'This is redirected';}
}
get_link_status('http://example.com');
?>
Related
I have been working on setting up the frase api. and created the following curl snippet.
<?php
$url = 'http://api.frase.io/api/v1/process_url';
//The data you want to send via POST
$fields = ['url' => 'https://firstsiteguide.com/best-gaming-blogs/', 'token' => "dd528796a9924dae9962bc5bd7ccdb20"];
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_POST, 1);
curl_setopt($ch,CURLOPT_POSTFIELDS,http_build_query($fields));
curl_setopt($ch,CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT ,3);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0');
curl_setopt($ch,CURLOPT_TIMEOUT, 20);
$response = curl_exec($ch);
if (curl_errno($ch)) {
$error_msg = curl_error($ch);
echo "<br/>CURL ERROR: ". $error_msg ."<br/>";
}else{
print "curl response is:" . $response ;
}
curl_close ($ch);
?>
I am not sure why, But I am receiving the following error for the same
The requested URL returned error: 400 Bad Request
Can help me identify what part of code I am missing or doing wrong. Thank you so much in advance.
You're passing the token as a body parameter instead of a header. The body parameter needs to be sent as a JSON encoded string as mentioned on the API documentation page. Also, you need to remove or at least increase the cURL timeout value as it takes time to fetch, process and return a value from the API end. Note that the API will return the response in JSON format.
So, the complete code should be as:
<?php
$url = 'http://api.frase.io/api/v1/process_url'; //The endpoint url you want to send data via POST
$headers = ['token: dd528796a9924dae9962bc5bd7ccdb20']; // add this line, headers
$fields = ['url' => 'https://firstsiteguide.com/best-gaming-blogs/']; // modify this line, body
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // add this line
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($fields)); // modify this
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0');
$response = curl_exec($ch);
if (curl_errno($ch))
{
$error_msg = curl_error($ch);
echo "<br/>CURL ERROR: " . $error_msg . "<br/>";
}
else
{
print($response);
// $values = json_decode($response, true); // this is an array with all the values
}
curl_close($ch);
?>
I have a problem with one website i wrote a few weeks ago.
my website communicates with another website_2 via API hosted on website_2
the curl operation is requested via Query POST to a PHP file.
if for some reason the operation took a longer time (which i can't determine the reason for) and the user hits refresh.. the command sent to the API is done yet my server doesn't get any result so can't log or do anything with that result..
is there a way to preserve the integrity of such transaction?
below is my code and i still get FAILED no matter what the result was on website_2
function doCommit($url_)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url_);
curl_setopt($ch, CURLOPT_USERAGENT, 'Opera/9.23 (Windows NT 5.1; U; en)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_TIMEOUT,5);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,5);
$commit = curl_exec($ch);
curl_close($ch);
if(!curl_exec($ch))
{
$ERROR="<Transaction>
<Result>Failed</Result>
<Reason>Operation Timed Out</Reason>
</Transaction>";
$oXML = new SimpleXMLElement($ERROR);
return $oXML;
}
else{
$oXML = new SimpleXMLElement($commit);
return $oXML;
}
// return $oXML->Reason;
}
You can use curl parameters to solve your problem setting a "request timeout":
CURLOPT_TIMEOUT - Sets The number of seconds to wait before a curl individual function timeouts.
CURLOPT_CONNECTTIMEOUT - Sets the maximum time before curl connection timeouts.
...and then you could return a text if curl_exec fails:
if(curl_exec($curl) === false)
{
echo 'ERROR: ' . curl_error($curl);
}
Solved it by the following code
function doCommit($url_)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url_);
curl_setopt($ch, CURLOPT_USERAGENT, 'Opera/9.23 (Windows NT 5.1; U; en)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_TIMEOUT,2);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,2);
$commit = curl_exec($ch);
curl_close($ch);
if(curl_errno($ch) == 0)
{
$oXML = new SimpleXMLElement($commit);
return $oXML;
}
else{
$ERROR="<Transaction>
<Result>Failed</Result>
<Reason>Operation Timed Out</Reason>
</Transaction>";
$oXML = new SimpleXMLElement($ERROR);
return $oXML;
}
// return $oXML->Reason;
}
I'm currently using cURL to try and get the URL from a redirect for a website scraper. I only need the url from the website. I've researched on stackoverflow and other sites for the past couple days and have been unsuccessful. The code I'm currently using is from this website:
$url = "http://www.someredirect.com";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_NOBODY, true);
$response = curl_exec($ch);
preg_match_all('/^Location:(.*)$/mi', $response, $matches);
curl_close($ch);
echo !empty($matches[1]) ? trim($matches[1][0]) : 'No redirect found';
Any help would be greatly appreciated!
In your particular case, the server is checking for certain user-agent strings.
When a server checks the user-agent string, it will only respond with a 302 redirect status code when the server sees a "valid" (according to the server) user-agent. Any "invalid" user-agents will not receive the 302 redirect status code response or Location: header.
In your particular case, when the server receives a request from an "invalid" user-agent it responds with a 200 OK status code with no text in the response body.
(Note: in the code below, the actual URLs provided have been replaced with examples.)
Let's say that http://www.example.com's server checks the User-Agent string and that http://www.example.com/product/123/ redirects to http://www.example.org/abc.
In PHP your solution would be:
<?php
$url = 'http://www.example.com/product/123/';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0"); // Necessary. The server checks for a valid User-Agent.
curl_exec($ch);
$response = curl_exec($ch);
preg_match_all('/^Location:(.*)$/mi', $response, $matches);
curl_close($ch);
echo !empty($matches[1]) ? trim($matches[1][0]) : 'No redirect found';
And, the output of this script would be: http://www.example.org/abc.
Try using this code:
function curl_last_url(/*resource*/ $ch, /*int*/ &$maxredirect = null) {
$mr = $maxredirect === null ? 5 : intval($maxredirect);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
if ($mr > 0) {
echo $mr;
echo $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$rch = curl_copy_handle($ch);
curl_setopt($rch, CURLOPT_HEADER, true);
curl_setopt($rch, CURLOPT_NOBODY, true);
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
do {
curl_setopt($rch, CURLOPT_URL, $newurl);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
echo $code;
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)\n/', $header, $matches);
$newurl = trim(array_pop($matches));
} else {
$code = 0;
}
}
} while ($code && --$mr);
curl_close($rch);
if (!$mr) {
if ($maxredirect === null) {
trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING);
} else {
$maxredirect = 0;
}
return false;
}
curl_setopt($ch, CURLOPT_URL, $newurl);
}
return $newurl;
}
A similar question has been posted at but i could not find the solution there
Curl error Could not resolve host: saved_report.xml; No data record of requested type"
<?php
$url="http://en.wikipedia.org/wiki/Pakistan";
$ch = curl_init(urlencode($url));
echo $ch;
// used to spoof that coming from a real browser so we don't get blocked by some sites
$useragent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 4);
curl_setopt($ch, CURLOPT_TIMEOUT, 8);
curl_setopt($ch, CURLOPT_LOW_SPEED_TIME, 10);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$content = curl_exec($ch);
$info = curl_getinfo($ch);
if ($content === false || $info['http_code'] != 200) {
$content = "No cURL data returned for $url [". $info['http_code']. "]";
if (curl_error($ch))
$content .= "\n". curl_error($ch);
}
else {
// 'OK' status; format $output data if necessary here:
echo "...";
}
echo $content;
curl_close($ch);
?>
when i paste the same address in browser i am able to access the webpage. but when i run this script i get the error message. Can anyone please help me.
Thanks
Remove the urlencode call.
remove the urlencode($url) it should be:
$ch = curl_init($url);
Well.
If you remove urlencode() with instantiating your $ch-var, you go just fine. urlencode() is definitely wrong here.
Good:
$ch = curl_init($url);
Bad:
$ch = curl_init(urlencode($url));
$ch = curl_init($url);
instead of
$ch = curl_init(urlencode($url));
UPDATE: Turns out my code works. Browser was caching previous failed response. Thanks for the pointers.
I'm building a prototype and one thing I'd like to do is perform a service if the user is a valid member of NYTimes.com by providing their credentials.
Using curl, I'm trying to perform a login to the site, and then check for success or failure.
My code, below, doesn't return errors but drops me back at the login page:
<?php
class Login {
function Verify() {
print $this->getContent();
}
function getContent() {
$url = 'http://www.nytimes.com/auth/login';
// URI can be any NYT web page to be redirected to upon successful login
// SAVEOPTION and Submit2 are Optional but in original web form so included here
$fields = array(
'is_continue'=> 'true',
'USERID' => urlencode('ENTER_YOUR_USERNAME'),
'PASSWORD' => urlencode('ENTER_A_PASSWORD'),
'URI' => urlencode('http://www.nytimes.com/robots.txt'),
'OQ' => '',
'OP' => '',
'SAVEOPTION' => 'YES',
'Submit2' => 'Log In'
);
$fields_string = '';
if(!$curld = curl_init($url)) {
echo "Could not connect to the specified resource";
exit;
}
$ch = curl_init();
$useragent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; }
rtrim($fields_string,'&');
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_COOKIEJAR, "curl_login_cookie.txt");
curl_setopt($ch ,CURLOPT_POST, count($fields));
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string);
ob_start();
curl_exec ($ch);
curl_close ($ch);
$result = ob_get_contents();
ob_end_clean();
return $result;
}
}
$login = new Login;
$result = $login->Verify();
?>
Any pointers, or suggestions welcome.
You need to check that the result of curl_exec is not false (which it'll be if the execution failed).
If it's succeeding, try checking the results of curl_errno and curl_getinfo to see information about the operation.
Also, it might be better to set the CURLOPT_RETURNTRANSFER option on your curl object instead of using output buffering to capture the result.