I'm trying to get some data from that website: https://stubhub.com .
1- With file_get_contents:
$url= 'https://www.stubhub.com';
$html = file_get_contents($url);
echo $html;
I get:
Warning: file_get_contents(https://stubhub.com): failed to open stream: HTTP request failed! HTTP/1.0 405 Method Not Allowed
2- With CURL:
$url= 'https://www.stubhub.com';
$curl = curl_init();
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_HEADER, true);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_REFERER, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
$html = curl_exec($curl);
$response = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
var_dump($html);
var_dump($response);
But I get:
bool(false) int(0)
I tried to add some headers like User-Agent and proxy:
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201');
$proxy = '185.135.226.159:23500';
curl_setopt($curl, CURLOPT_PROXY, $proxy);
But again I get the same.
I have allow_url_fopen=On, So what's wrong?
function curl( $url=NULL, $options=NULL ){
$cacert='c:/wwwroot/cacert.pem'; # <----- download your own copy and configure this path
$vbh = fopen('php://temp', 'w+');
$res=array(
'response' => NULL,
'info' => array( 'http_code' => 100 ),
'headers' => NULL,
'errors' => NULL
);
if( is_null( $url ) ) return (object)$res;
session_write_close();
/* Initialise curl request object */
$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
}
/* Define standard options */
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $curl, CURLOPT_FAILONERROR, true );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );
curl_setopt( $curl, CURLOPT_VERBOSE, true );
curl_setopt( $curl, CURLOPT_NOPROGRESS, true );
curl_setopt( $curl, CURLOPT_STDERR, $vbh );
/* Assign runtime parameters as options */
if( isset( $options ) && is_array( $options ) ){
foreach( $options as $param => $value ) curl_setopt( $curl, $param, $value );
}
/* Execute the request and store responses */
$res=(object)array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl )
);
rewind( $vbh );
$res->verbose=stream_get_contents( $vbh );
fclose( $vbh );
curl_close( $curl );
return $res;
}
$url='https://www.stubhub.com/';
$res = curl( $url );
if( $res->info->http_code==200 ){
printf('<pre>%s</pre>',print_r( $res->info,true ));
printf('<pre>%s</pre>',print_r( $res->verbose,true ));
}
This will output:
stdClass Object
(
[url] => https://www.stubhub.com/
[content_type] => text/html
[http_code] => 200
[header_size] => 1304
[request_size] => 214
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0.609
[namelookup_time] => 0.25
[connect_time] => 0.265
[pretransfer_time] => 0.39
[size_upload] => 0
[size_download] => 1194
[speed_download] => 1960
[speed_upload] => 0
[download_content_length] => 1194
[upload_content_length] => -1
[starttransfer_time] => 0.609
[redirect_time] => 0
[redirect_url] =>
[primary_ip] => 23.43.75.46
[certinfo] => Array
(
)
[primary_port] => 443
[local_ip] => 192.168.0.56
[local_port] => 5042
)
* Trying 23.43.75.46...
* TCP_NODELAY set
* Connected to www.stubhub.com (23.43.75.46) port 443 (#0)
* ALPN, offering http/1.1
* successfully set certificate verify locations:
CAfile: c:/wwwroot/cacert.pem
CApath: none
* SSL connection using TLSv1.2 / ECDHE-ECDSA-AES256-GCM-SHA384
* ALPN, server accepted to use http/1.1
* Server certificate:
* subject: C=US; ST=California; L=San Francisco; O=Stubhub, Inc.; OU=Technology; CN=www.stubhub.com
* start date: Jun 11 00:00:00 2018 GMT
* expire date: Jan 9 12:00:00 2020 GMT
* subjectAltName: host "www.stubhub.com" matched cert's "www.stubhub.com"
* issuer: C=US; O=DigiCert Inc; CN=DigiCert ECC Secure Server CA
* SSL certificate verify ok.
> GET / HTTP/1.1
Host: www.stubhub.com
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Accept: */*
Accept-Encoding: deflate, gzip
< HTTP/1.1 200 OK
< Server: nginx
< Content-Type: text/html
< Expires: Thu, 01 Jan 1970 00:00:01 GMT
< Cache-Control: private, no-cache, no-store, must-revalidate
< Surrogate-Control: no-store, bypass-cache
< Content-Encoding: gzip
< X-EdgeConnect-MidMile-RTT: 163
< X-EdgeConnect-Origin-MEX-Latency: 24
< X-Akamai-Transformed: 9 624 0 pmb=mTOE,1mRUM,1
< Date: Sat, 20 Oct 2018 16:25:57 GMT
< Content-Length: 1194
< Connection: keep-alive
< Vary: Accept-Encoding
< Set-Cookie: DC=lvs31;Path=/;Domain=stubhub.com;Expires=Sat, 20-Oct-2018 16:55:56 GMT;Max-Age=1800
< Set-Cookie: akacd_PCF_Prod=1540053357~rv=98~id=53e183ee10a83152497c9102c8c7dee7; path=/; Expires=Sat, 20 Oct 2018 16:35:57 GMT
< Strict-Transport-Security: max-age=31536000; includeSubDomains
< Set-Cookie: _abck=10D08E1267D29C2EDBEA32445BD116805C7A3616AB3500001557CB5B9AD22713~-1~e+BGOJkoD/UwtPOWH75YXUSo6Kzyd7sF6nTkkw89JfE=~-1~-1; expires=Sun, 20 Oct 2019 16:25:57 GMT; max-age=31536000; path=/; domain=.stubhub.com
< Set-Cookie: bm_sz=7C06CFF7557E22DEC7855EC89DF628B0~QAAQFjZ6XGg5goBmAQAAIypMkhVJRZxwtVU8097T7Q8Z2TcGPZR0XRtAVFY3TBHGsR4EW51MqZlCAyk3cMPDJEmukVvLunM36/5Kn1gtoxarUtgkqBvlfudWZBJb2xc1rHdnMhdsAXoHWLaGt0NwROSXckDe48kkqu2Kw3suRgrWcqDlj7Y1akARK8OYnoa6; Domain=.stubhub.com; Path=/; Expires=Sat, 20 Oct 2018 20:25:56 GMT; Max-Age=14399; HttpOnly
<
* Connection #0 to host www.stubhub.com left intact
To access the actual response body you would process $res->response - load it into DOMDocument or whatever you intend to do... good luck
Related
I am sending a POST request in PHP via cURL to a REST API that uses XML. When I use Postman or Advanced REST Client, I get a XML response to my POST request. However, when I use PHP and cURL I do not seem able to see back the XML responses. What do I need to do to get these back? Eventually I need to retrieve a token that I can then use to process INSERT, UPDATES and GETS through this API via XML.
Here is the code that I am currently using:
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_HTTPHEADER => array(
'xxxxxx-Username: xxx',
'xxxxxx-Password: xxx',
'content-type: application/xml'
),
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
and currently I am getting a blank page. I have tried quite a few solutions, like the following
//header("Content-Type: text/xml");
//header('Content-type: application/xml');
//$decoded = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $response);
//echo $decoded;
//echo $response;
//print_r($response);
// set up your xml result
$xml = new SimpleXMLElement($response, LIBXML_NOCDATA);
// loop through the results
$cnt = count($xml->Result);
for($i=0; $i<$cnt; $i++){
echo 'XML : First Name: = ';
}
but nothing seems to give me back what I get from Postman or Advanced REST Client, which on this particular command is the following
<?xml version="1.0" encoding="UTF-8"?>
<AuthInfo>
<token/>
<AuthStatus>
<Id>503</Id>
<Description>There's no proapi manager running with the given company code: crmapp</Description>
</AuthStatus>
</AuthInfo>
I understand that at this stage there is an issue with my url that I need to fix, but I still should be able to receive that error back via XML.
Can anyone please help me get this XML response back so that I can progress my interface?
Thank you in advance,
Adri
Thanks again Professor, here is the full debug with the latest version of PHP and cUrl
Verbose debug info
* Trying xxx.xx.xxx.xxx:443...
* Connected to xxxxx-xx-xx.xxxxxxxx.com.au (xxx.xx.xxx.xxx) port 443 (#0)
* ALPN, offering h2
* ALPN, offering http/1.1
* successfully set certificate verify locations:
* CAfile: D:/Adri/PHP/MoW/famac/cacert.pem
* CApath: D:/Adri/PHP/MoW/famac/cacert.pem
* SSL connection using TLSv1.2 / ECDHE-RSA-AES256-GCM-SHA384
* ALPN, server did not agree to a protocol
* Server certificate:
* subject: CN=*.prontohosted.com.au
* start date: Jun 2 00:00:00 2020 GMT
* expire date: Sep 4 00:00:00 2022 GMT
* subjectAltName: host "xxxxx-xx-xx.xxxxxxxx.com.au" matched cert's "*.xxxxxxxx.com.au"
* issuer: C=GB; ST=Greater Manchester; L=Salford; O=Sectigo Limited; CN=Sectigo RSA Domain Validation Secure Server CA
* SSL certificate verify ok.
> GET /xxxxx/rest/xxx.xxx/login HTTP/1.1
Host: xxxxx-xx-xx.xxxxxxxx.com.au
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.38 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.38
Accept: */*
Accept-Encoding: deflate, gzip
xxxxxx-Username: xxx
xxxxxx-Password: xxx
Content-Type: application/xml
* Mark bundle as not supporting multiuse
< HTTP/1.1 404 Not Found
< Date: Tue, 09 Nov 2021 11:34:57 GMT
< Server: Apache
< Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
< X-Frame-Options: SAMEORIGIN
< X-XSS-Protection: 1; mode=block
< X-Content-Type-Options: nosniff
< Content-Security-Policy: img-src 'self' *.xxxxx.net *.xxxxx.com.au https://www.google.com https://*.googleapis.com/ www.google-analytics.com stats.g.doubleclick.net http://*.xxxxx-xxxxx.com *.twitter.com *.twimg.com data: blob: https://*.google.com https://*.gstatic.com https://*.googleapis.com; frame-src * blob:; script-src 'self' 'unsafe-inline' 'unsafe-eval' *.xxxxx.net *.xxxxx.com.au https://*.google.com www.google-analytics.com *.twitter.com *.twimg.com https://*.googleapis.com https://jawj.github.io https://*.gstatic.com; connect-src 'self' wss: blob: *.twitter.com www.google-analytics.com stats.g.doubleclick.net; base-uri 'none'; style-src 'self' 'unsafe-inline' *.twitter.com *.twimg.com https://*.google.com *.googleapis.com https://*.gstatic.com; font-src 'self' data: https://*.googleapis.com https://fonts.gstatic.com; child-src * blob:; object-src 'none'; default-src 'self' blob:
< X-Permitted-Cross-Domain-Policies: master-only
< Content-Type: text/html; charset=UTF-8
< Content-Length: 994
* The requested URL returned error: 404
* Closing connection 0
Info
stdClass Object
(
[url] => https://xxxxx-xx-xx.xxxxxxxx.com.au/xxxxx/rest/xxx.xxx/login
[content_type] => text/html; charset=UTF-8
[http_code] => 404
[header_size] => 1271
[request_size] => 350
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0.232624
[namelookup_time] => 0.029367
[connect_time] => 0.05058
[pretransfer_time] => 0.162497
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => 994
[upload_content_length] => 0
[starttransfer_time] => 0.232609
[redirect_time] => 0
[redirect_url] =>
[primary_ip] => xxx.xx.xxx.xxx
[certinfo] => Array
(
)
[primary_port] => 443
[local_ip] => xxx.xxx.x.xxx
[local_port] => 52711
[http_version] => 2
[protocol] => 2
[ssl_verifyresult] => 0
[scheme] => HTTPS
[appconnect_time_us] => 162464
[connect_time_us] => 50580
[namelookup_time_us] => 29367
[pretransfer_time_us] => 162497
[redirect_time_us] => 0
[starttransfer_time_us] => 232609
[total_time_us] => 232624
)
Can you please let me know what you think of this? While I am no longer getting the previous error, I still seem unable to receive the XML response back. :(
Thank you in advance, Adri
The curl function I use is as follows. It has extra debugging information in the output and the default settings can be easily overridden at runtime by supplying a different $options argument. I'm not suggesting this is the answer but with a better set of options configured and better debug info you should get closer.
function curl( $url=NULL, $options=NULL, $headers=false ){
$cacert='c:/wwwroot/cacert.pem';
$vbh = fopen('php://temp', 'w+');
/*
Download a copy of CACERT.pem from
https://curl.haxx.se/docs/caextract.html
save to webserver and modify the $cacert variable
to suit - ensuring that the path you choose is
readable.
*/
$res=array(
'response' => NULL,
'info' => array( 'http_code' => 100 ),
'headers' => NULL,
'errors' => NULL
);
if( is_null( $url ) ) return (object)$res;
session_write_close();
/* Initialise curl request object - these should be OK as-is */
$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
curl_setopt( $curl, CURLOPT_CAPATH, $cacert );
}
/* Define standard options */
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $curl, CURLOPT_FAILONERROR, true );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.38 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.38' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );
/* enhanced debug */
curl_setopt( $curl, CURLOPT_VERBOSE, true );
curl_setopt( $curl, CURLOPT_NOPROGRESS, true );
curl_setopt( $curl, CURLOPT_STDERR, $vbh );
/* Assign runtime parameters as options to override defaults if needed. */
if( isset( $options ) && is_array( $options ) ){
foreach( $options as $param => $value ) curl_setopt( $curl, $param, $value );
}
/* send any headers with the request that are needed */
if( $headers && is_array( $headers ) ){
curl_setopt( $curl, CURLOPT_HTTPHEADER, $headers );
}
/* Execute the request and store responses */
$res=(object)array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl )
);
rewind( $vbh );
$res->verbose=stream_get_contents( $vbh );
fclose( $vbh );
curl_close( $curl );
return $res;
}
Then, to use it:
$url='https://www.example.com/api/';
$args=array();
$headers=array(
'xxxxxx-Username: xxx',
'xxxxxx-Password: xxx',
'Content-Type: application/xml'
);
$res=curl( $url, $args, $headers );
if( $res->info->http_code==200 ){
#cool - use $res->response in further processing
print_r($res->response,true);
}else{
# useful information will be displayed here...
printf('<h1>Verbose debug info</h1><pre>%s</pre>',print_r($res->verbose,true));
printf('<h1>Info</h1><pre>%s</pre>',print_r($res->info,true));
}
update to indicate how to send POST data
You use the $options parameter to supply different runtime configuration to the curl request, like so:
$url='https://www.example.com/api/';
$args=array(
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $send_body
);
$headers=array(
'xxxxxx-Username: xxx',
'xxxxxx-Password: xxx',
'Content-Type: application/xml'
);
$res=curl( $url, $args, $headers );
I'm trying to simulate a real browser request using CURL with proxy rotate, I searched about it, But none of the answers worked.
Here is the code:
$url= 'https://www.stubhub.com/';
$proxy = '1.10.185.133:30207';
$userAgent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36';
$curl = curl_init();
curl_setopt( $curl, CURLOPT_URL, trim($url) );
curl_setopt($curl, CURLOPT_REFERER, trim($url));
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 0 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 0 );
curl_setopt( $curl, CURLOPT_AUTOREFERER, TRUE );
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
$cacert='C:/xampp/htdocs/cacert.pem';
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
curl_setopt($curl, CURLOPT_COOKIEFILE,__DIR__."/cookies.txt");
curl_setopt ($curl, CURLOPT_COOKIEJAR, dirname(__FILE__) . '/cookies.txt');
curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
curl_setopt( $curl, CURLOPT_USERAGENT, $userAgent );
//Headers
$header = array();
$header[] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$header[] = "Accept-Language: cs,en-US;q=0.7,en;q=0.3";
$header[] = "Accept-Encoding: utf-8";
$header[] = "Connection: keep-alive";
$header[] = "Host: www.gumtree.com";
$header[] = "Origin: https://www.stubhub.com";
$header[] = "Referer: https://www.stubhub.com";
curl_setopt( $curl, CURLOPT_HEADER, $header );
curl_setopt($curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($curl, CURLOPT_HTTPPROXYTUNNEL, TRUE);
curl_setopt($curl, CURLOPT_PROXY, $proxy);
curl_setopt($curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
$data = curl_exec( $curl );
$info = curl_getinfo( $curl );
$error = curl_error( $curl );
echo '<pre>';
print_r($all);
echo '</pre>';
Here is what I get when I run the script:
Array
(
[data] => HTTP/1.1 200 OK
HTTP/1.0 405 Method Not Allowed
Server: nginx
Content-Type: text/html; charset=UTF-8
Accept-Ranges: bytes
Expires: Thu, 01 Jan 1970 00:00:01 GMT
Cache-Control: private, no-cache, no-store, must-revalidate
Surrogate-Control: no-store, bypass-cache
Content-Length: 9411
X-EdgeConnect-MidMile-RTT: 203
X-EdgeConnect-Origin-MEX-Latency: 24
Date: Sat, 03 Nov 2018 17:15:56 GMT
Connection: close
Strict-Transport-Security: max-age=31536000; includeSubDomains
[info] => Array
(
[url] => https://www.stubhub.com/
[content_type] => text/html; charset=UTF-8
[http_code] => 405
[header_size] => 487
[request_size] => 608
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 38.484
[namelookup_time] => 0
[connect_time] => 2.219
[pretransfer_time] => 17.062
[size_upload] => 0
[size_download] => 9411
[speed_download] => 244
[speed_upload] => 0
[download_content_length] => 9411
[upload_content_length] => -1
[starttransfer_time] => 23.859
[redirect_time] => 0
[redirect_url] =>
[primary_ip] => 1.10.186.132
[certinfo] => Array
(
)
[primary_port] => 42150
[local_ip] => 192.168.1.25
[local_port] => 59320
)
[error] =>
)
As well as a Recaptcha, As it says:
Due to high volume of activity from your computer, our anti-robot software has blocked your access to stubhub.com. Please solve the puzzle below and you will immediately regain access.
When I visit the website using any browser, The website is displayed.
But with the above script, It's not.
So what am I missing to make the curl request like a real browser request and not be detected as a bot?
Or if there is an API/library that could do it, Please mention it.
Would Guzzle or similar fix this issue?
"So what am I missing to make the curl request like a real browser request"
My guess is they are using a simple cookie check. There are more sophisticated methods that allow recognizing automation such as cURL with a high degree of reliability, especially if coupled with lists of proxy IP addresses or IPs of known bangers.
Your first step is to intercept the outgoing browser request using pcap or something similar, then try and replicate it using cURL.
One other simple thing to check is whether your cookie jar has been seeded with some telltale. I routinely do that too, since most scripts on the Internet are just copy-pastes and don't pay much attention to these details.
The thing that would for sure make you bounce from any of my systems is that you're sending a referer, but you don't seem to actually have connected to the first page. You're practically saying "Well met again" to a server that is seeing you for the first time. You might have saved a cookie from that first encounter, and the cookie has now been invalidated (actually been marked "evil") by some other action. At least in the beginning, always replicate the visiting sequence from a clean slate.
You might try and adapt this answer, also cURL-based. Always verify actual traffic using a MitM SSL-decoding proxy.
Now, the real answer - what do you need that information for? Can you get it somewhere else? Can you ask for it explicitly, maybe reach an agreement with the source site?
I write code for send hit request to proxy server's ip addresses. But this code is giving error of
504 Gateway Time-out
I also try to increase the timeout in php.ini. But that is also not working. Here is the code I am trying to use
<?php
$curl = curl_init();
$timeout = 300;
$proxies = file("proxy.txt");
$r="https://www.youtube.com/watch?v=iglQXfPXJHE";
//$r ="https://www.youtube.com/watch?v=rcWMxmKbj7c";
// Not more than 2 at a time
for($x=0;$x<2000; $x++){
//setting time limit to zero will ensure the script doesn't get timed out
set_time_limit(300);
//now we will separate proxy address from the port
//$PROXY_URL=$proxies[$getrand[$x]];
echo $proxies[$x];
curl_setopt($curl, CURLOPT_URL,$r);
curl_setopt($curl , CURLOPT_PROXY , preg_replace('/\s+/', '',$proxies[$x]));
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5");
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($curl, CURLOPT_REFERER, "http://google.com/");
$text = curl_exec($curl);
echo "Hit Generated:";
echo htmlentities($x);
}
?>
Any help is appreciated. Thank you
Given a text file, call proxy.txt, with the following content
198.110.57.6 8080 US United States anonymous no yes 1 minute ago
35.193.215.131 8080 US United States anonymous no yes 1 minute ago
198.50.219.239 80 CA Canada anonymous no yes 1 minute ago
217.61.124.144 80 IT Italy anonymous no yes 1 minute ago
171.255.199.5 80 VN Vietnam anonymous no no 1 minute ago
And the following PHP code
define('ROOT','c:/wwwroot');
function curlproxy( $url, $ip, $port, $https ){
$cacert=ROOT . '/cacert.pem';
$curl=curl_init();
if( $https==true ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, 0 );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, realpath( $cacert ) );
$proxy='https://'.$ip .':' . $port;
} else {
$proxy='http://'.$ip .':' . $port;
}
$vbh = fopen('php://temp', 'w+');
curl_setopt( $curl, CURLOPT_URL, $url );
curl_setopt( $curl, CURLOPT_AUTOREFERER, TRUE );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
curl_setopt( $curl, CURLOPT_FRESH_CONNECT, TRUE );
curl_setopt( $curl, CURLOPT_FORBID_REUSE, TRUE );
curl_setopt( $curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1 );
curl_setopt( $curl, CURLOPT_CLOSEPOLICY, CURLCLOSEPOLICY_OLDEST );
curl_setopt( $curl, CURLOPT_MAXCONNECTS, 1 );
curl_setopt( $curl, CURLOPT_FAILONERROR, TRUE );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' );
curl_setopt( $curl, CURLINFO_HEADER_OUT, FALSE );
curl_setopt( $curl, CURLOPT_NOBODY, TRUE );
curl_setopt( $curl, CURLOPT_PROXY, $proxy );
curl_setopt( $curl, CURLOPT_HTTPPROXYTUNNEL, TRUE );
curl_setopt( $curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP );
curl_setopt( $curl, CURLOPT_VERBOSE, TRUE );
curl_setopt( $curl, CURLOPT_NOPROGRESS, TRUE );
curl_setopt( $curl, CURLOPT_STDERR, $vbh );
$payload=(object)array_filter( array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl ),
'request' => array(
'url' => $url,
'ip' => $ip,
'port' => $port,
'https' => $https,
'proxy' => $proxy
)
)
);
curl_close( $curl );
rewind( $vbh );
$payload->verbose=stream_get_contents( $vbh );
fclose( $vbh );
return $payload;
}
$data=array();
$url='https://www.youtube.com/watch?v=iglQXfPXJHE';
$list = file('c:/temp/proxy.txt');
foreach( $list as $i => $line ){
list($ip,$port,$code,$country,$anomynous,$google,$https,$up)=explode(chr(9),$line);
$data[]=curlproxy( $url, $ip, $port, $https );
}
echo '<pre>',print_r($data,true),'</pre>';
Gave reasonable results for certain proxies chosen randomly from free-proxy-list.net ~ of which a small snippet is shown here
Array
(
[0] => stdClass Object
(
[info] => stdClass Object
(
[url] => https://lightspeed.ravennaschools.org/access?YT91X4Q5J1HNFABCRE8BZ5FZ22WS4KQ2
[content_type] => text/html
[http_code] => 200
[header_size] => 721
[request_size] => 970
[filetime] => -1
[ssl_verify_result] => 19
[redirect_count] => 1
[total_time] => 1.763
[namelookup_time] => 0
[connect_time] => 0.14
[pretransfer_time] => 0.624
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => 0
[upload_content_length] => 0
[starttransfer_time] => 0.764
[redirect_time] => 0.999
[certinfo] => Array
(
)
)
[request] => Array
(
[url] => https://www.youtube.com/watch?v=iglQXfPXJHE
[ip] => 198.110.57.6
[port] => 8080
[https] => yes
[proxy] => https://198.110.57.6:8080
)
[verbose] => * About to connect() to proxy 198.110.57.6 port 8080 (#0)
* Trying 198.110.57.6... * connected
* Connected to 198.110.57.6 (198.110.57.6) port 8080 (#0)
* Establish HTTP proxy tunnel to www.youtube.com:443
> CONNECT www.youtube.com:443 HTTP/1.1
Host: www.youtube.com:443
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Proxy-Connection: Keep-Alive
< HTTP/1.1 200 Connection established
<
* Proxy replied OK to CONNECT request
* successfully set certificate verify locations:
* CAfile: C:\wwwroot\cacert.pem
CApath: none
* SSL connection using AES256-SHA
* Server certificate:
* subject: C=US; ST=California; L=Mountain View; O=Google Inc; CN=*.google.com
* start date: 2017-07-25 08:46:44 GMT
* expire date: 2017-10-17 08:28:00 GMT
* subjectAltName: www.youtube.com matched
* issuer: C=US; ST=California; L=Bakersfield; O=Lightspeed Systems; OU=Support; CN=Lightspeed Rocket; emailAddress=support#lightspeedsystems.com
* SSL certificate verify result: self signed certificate in certificate chain (19), continuing anyway.
> HEAD /watch?v=iglQXfPXJHE HTTP/1.1
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Host: www.youtube.com
Accept: */*
< HTTP/1.1 302 Moved Temporarily
< Server: squid/3.3.13
< Date: Thu, 10 Aug 2017 06:52:52 GMT
< Content-Length: 0
< Location: https://lightspeed.ravennaschools.org/access?YT91X4Q5J1HNFABCRE8BZ5FZ22WS4KQ2
< X-Cache: MISS from lightspeed.ravennaschools.org
< Via: 1.1 lightspeed.ravennaschools.org (squid/3.3.13)
< Connection: close
<
* Closing connection #0
* Issue another request to this URL: 'https://lightspeed.ravennaschools.org/access?YT91X4Q5J1HNFABCRE8BZ5FZ22WS4KQ2'
* About to connect() to proxy 198.110.57.6 port 8080 (#0)
* Trying 198.110.57.6... * connected
* Connected to 198.110.57.6 (198.110.57.6) port 8080 (#0)
* Establish HTTP proxy tunnel to lightspeed.ravennaschools.org:443
> CONNECT lightspeed.ravennaschools.org:443 HTTP/1.1
Host: lightspeed.ravennaschools.org:443
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Proxy-Connection: Keep-Alive
< HTTP/1.1 200 Connection established
<
* Proxy replied OK to CONNECT request
* successfully set certificate verify locations:
* CAfile: C:\wwwroot\cacert.pem
CApath: none
* SSL connection using AES256-SHA
* Server certificate:
* subject: OU=Domain Control Validated; CN=lightspeed.ravennaschools.org
* start date: 2017-08-01 16:01:01 GMT
* expire date: 2020-08-01 16:01:01 GMT
* subjectAltName: lightspeed.ravennaschools.org matched
* issuer: C=US; ST=California; L=Bakersfield; O=Lightspeed Systems; OU=Support; CN=Lightspeed Rocket; emailAddress=support#lightspeedsystems.com
* SSL certificate verify result: self signed certificate in certificate chain (19), continuing anyway.
> HEAD /access?YT91X4Q5J1HNFABCRE8BZ5FZ22WS4KQ2 HTTP/1.1
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Host: lightspeed.ravennaschools.org
Accept: */*
Referer: https://www.youtube.com/watch?v=iglQXfPXJHE
< HTTP/1.1 200 OK
< Server: nginx/1.10.0
< Date: Thu, 10 Aug 2017 06:52:53 GMT
< Content-Type: text/html
< Expires: Thu, 10 Aug 2017 06:52:52 GMT
< Cache-Control: no-cache
< Cache-Control: no-cache
< Pragma: no-cache
< X-UA-Compatible: IE=Edge,chrome=1
< X-Lightspeed: suite
< X-Cache: MISS from lightspeed.ravennaschools.org
< Via: 1.1 lightspeed.ravennaschools.org (squid/3.3.13)
< Connection: keep-alive
* no chunk, no close, no size. Assume close to signal end
<
* Closing connection #0
)
If however the sole aim of this script is to increase the hit counter then you may need to rethink as that does not seem to be affected but perhaps the above will be of use.
I am trying to use CURL to post the following fields to PANDADOCS, but for some reason I am getting an error that the values are not being received on their side.
This is the error I am getting:
"type": "validation_error", "detail": {"url": ["This field is required."], "name": ["This field is required."]}}
I am posting with the following:
$docurl = "myurl.com/document.pdf";
$headr = array();
$headr[] = 'Content-length: 0';
$headr[] = 'Content-Type: application/json;charset=UTF-8';
$headr[] = "Authorization: Bearer $ACCESS_TOKEN";
$url = 'https://api.pandadoc.com/public/v1/documents';
$ch = curl_init();
curl_setopt($ch, CURLOPT_HTTPHEADER,$headr);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
$postfields = array();
$postfields['name'] = 'PSA';
$postfields['url'] = $docurl;
$postfields['recipients'] = array ([0]=>array(
['email'] => ['dondon#gmail.com'],
['first_name'] => ['don'],
['last_name'] => ['jones'],
['role']=>['u1'] ));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query( $postfields) );
$ret = curl_exec($ch); //
when I print_r($postfields)
I get
Array ( [name] => PSA [url] => https://api.pandadoc.com/public/v1/documents [recipients] => Array ( ) )
so all the fields arent getting posted.
but whats wierd is that the URL and NAME are in the array but not the other fields yet the error is complaining about not receiving NAME and URL..
dazed and confused...
* Hostname was found in DNS cache
* Trying 54.190.72.92...
* Connected to api.pandadoc.com (54.190.72.92) port 443 (#28)
* SSL connection using TLSv1.2 / ECDHE-RSA-AES256-GCM-SHA384
* Server certificate:
* subject: OU=GT83522468; OU=See www.rapidssl.com/resources/cps (c)14; OU=Domain Control Validated - RapidSSL(R); CN=*.pandadoc.com
* start date: 2014-11-09 00:32:24 GMT
* expire date: 2016-10-11 09:34:58 GMT
* subjectAltName: api.pandadoc.com matched
* issuer: C=US; O=GeoTrust Inc.; CN=RapidSSL SHA256 CA - G3
* SSL certificate verify result: unable to get local issuer certificate (20), continuing anyway.
> POST /public/v1/documents HTTP/1.1
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.3 Safari/533.2
Host: api.pandadoc.com
Accept: */*
Content-length: 0
Content-Type: application/json;charset=UTF-8
Authorization: Bearer [ACCESS TOKEN]
* upload completely sent off: 37 out of 37 bytes
< HTTP/1.1 400 BAD REQUEST
* Server nginx/1.4.6 (Ubuntu) is not blacklisted
< Server: nginx/1.4.6 (Ubuntu)
< Date: Fri, 06 Mar 2015 19:52:53 GMT
< Content-Type: application/json
< Transfer-Encoding: chunked
< Connection: keep-alive
< Vary: Accept
< Allow: GET, POST, DELETE, HEAD, OPTIONS
<
* Connection #28 to host api.pandadoc.com left intact
$string is not defined.
Add
$string = http_build_query( $postfields );
after
$postfields = array();
$postfields['name'] = 'PSA';
$postfields['url'] = $docurl;
// This is invalid array
$postfields['recipients'] = array ([0]=>array(
['email'] => ['dondon#gmail.com'],
['first_name'] => ['don'],
['last_name'] => ['jones'],
['role']=>['u1'] ));
http://php.net/manual/en/function.http-build-query.php
UPDATE
I just read Pandadoc API. They accept json data and your data was invalid. Also content type.
This should work:
<?php
$url = 'https://api.pandadoc.com/public/v1/documents';
$docurl = "myurl.com/document.pdf";
$postfields = array();
$postfields['name'] = 'PSA';
$postfields['url'] = $docurl;
$postfields['recipients'] = array(
array(
'email' => 'dondon#gmail.com',
'first_name' => 'don',
'last_name' => 'jones',
'role' => 'u1'
)
);
$data_string = json_encode( $postfields );
$headr = array();
$headr[] = 'Content-length: '.strlen( $data_string );
$headr[] = 'Content-type: application/json';
$headr[] = "Authorization: Bearer $ACCESS_TOKEN";
$ch = curl_init( $url );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, "POST" );
curl_setopt( $ch, CURLOPT_POSTFIELDS, $data_string );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_HTTPHEADER, $headr );
$result = curl_exec( $ch );
?>
function get_fcontent( $url, $javascript_loop = 0, $timeout = 5 ) {
$url = str_replace( "&", "&", urldecode(trim($url)) );
$cookie = tempnam ("/tmp", "CURLCOOKIE");
$ch = curl_init();
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); # required for https urls
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 );
$content = curl_exec( $ch );
$response = curl_getinfo( $ch );
curl_close ( $ch );
if ($response['http_code'] == 301 || $response['http_code'] == 302) {
ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
if ( $headers = get_headers($response['url']) ) {
foreach( $headers as $value ) {
if ( substr( strtolower($value), 0, 9 ) == "location:" )
return get_url( trim( substr( $value, 9, strlen($value) ) ) );
}
}
}
if ( ( preg_match("/>[[:space:]]+window\.location\.replace\('(.*)'\)/i", $content, $value) || preg_match("/>[[:space:]]+window\.location\=\"(.*)\"/i", $content, $value) ) && $javascript_loop < 5) {
return get_url( $value[1], $javascript_loop+1 );
} else {
return array( $content, $response );
}
}
$url="http://lp.hm.com/hmprod?set=key[source],value[/model/2013/2AE 0165378 025 37 2916.jpg]&set=key[rotate],value[]&set=key[width],value[]&set=key[height],value[]&set=key[x],value[]&set=key[y],value[]&set=key[type],value[STILL_LIFE_FRONT]&hmver=0&call=url[file:/product/large]";
$image= get_fcontent($url);
print_r($image);
return error http_code 400 code and blank content only for perticular this site all other site's images are fetching proper
Array
(
[url] => http://lp.hm.com/hmprod?set=key[source],value[/model/2013/2AE 0165378 025 37 2916.jpg]&set=key[rotate],value[]&set=key[width],value[]&set=key[height],value[]&set=key[x],value[]&set=key[y],value[]&set=key[type],value[STILL_LIFE_FRONT]&hmver=0&call=url[file:/product/large]
[content_type] =>
[http_code] => 400
[header_size] => 166
[request_size] => 334
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0.39
[namelookup_time] => 0.047
[connect_time] => 0.187
[pretransfer_time] => 0.187
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => 0
[upload_content_length] => 0
[starttransfer_time] => 0.39
[redirect_time] => 0
[certinfo] => Array
(
)
[primary_ip] => 95.100.71.34
[primary_port] => 80
[local_ip] => 192.168.1.42
[local_port] => 62061
[redirect_url] =>
)
Replace spaces from your url. It will work.
$url = str_replace(" ", '%20', $url);
Encode the url values
Try this
$url="http://lp.hm.com/hmprod?set=key[source],value[/model/2013/2AE%200165378%20025%2037%202916.jpg]&set=key[rotate],value[]&set=key[width],value[]&set=key[height],value[]&set=key[x],value[]&set=key[y],value[]&set=key[type],value[STILL_LIFE_FRONT]&hmver=0&call=url[file:/product/large]";
$image= get_fcontent($url);
print_r($image);
Here is Sample Code
$url= "http://lp.hm.com/hmprod?set=key[source],value[/model/2013/2AE%200165378%20025%2037%202916.jpg]&set=key[rotate],value[]&set=key[width],value[]&set=key[height],value[]&set=key[x],value[]&set=key[y],value[]&set=key[type],value[STILL_LIFE_FRONT]&hmver=0&call=url[file:/product/large]";
$ch=curl_init();
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt( $ch, CURLOPT_BINARYTRANSFER, true);
$content = curl_exec( $ch );
$response = curl_getinfo( $ch );
curl_close ( $ch );
echo "<pre>";
print_r( $response);
echo "<pre>";