I am receiving an invalid cookie string when trying capture the cookie using file_get_contents and curl. The cookie received while browsing directly from the browser is valid/active. But, the cookie captured from file_get_contents and curl seems to be invalid.
I am trying to capture from file_get_contents like this
$context = array(
'http' => array(
'method' => 'GET',
'header' => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*\/*;q=0.8', 'User-Agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/63.0.3239.84 Chrome/63.0.3239.84 Safari/537.36'),
)
);
$cxContext = stream_context_create($context);
file_get_contents($url, false, $cxContext);
$cookies = array();
foreach ($http_response_header as $hdr) {
if (preg_match('/^Set-Cookie:\s*([^;]+)/', $hdr, $matches)) {
$cookies = $matches[1];
}
}
return $cookies;
I tried playing around with this, by setting headers, but the cookies returned always is either expired or simply invalid.
But, through a browser the cookie I get is always valid.
Anyone faced a similar problem, don't know how to tackle this issue.
There are several unanswered questions from my above comment, but I'll share this bit of code for example purposes. It's what I've used in the past as a base class for browser emulation using cURL:
<?php
if(!function_exists("curl_init")) { throw new Exception("CurlBrowser requires the cURL extension, which is not enabled!"); }
class CurlBrowser
{
public $userAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0";
/*
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0
*/
public $cookiesFile = null;
public $proxyURL = null;
public $saveLastOutput = "";
public $caBundle = "cacert.pem";
public $httpHeaders = array();
public function __construct($UseCookies = true)
{
if(is_bool($UseCookies) && $UseCookies)
{
$this->cookiesFile = dirname(__FILE__)."/cookies.txt";
}
elseif(is_string($UseCookies) && ($UseCookies != ""))
{
$this->cookiesFile = $UseCookies;
}
}
public function SetCustomHTTPHeaders($arrHeaders)
{
$this->httpHeaders = $arrHeaders;
}
public function SetProxy($proxy)
{
$this->proxyURL = $proxy;
}
public function Get($url)
{
return $this->_request($url);
}
public function Post($url,$data = array())
{
return $this->_request($url,$data);
}
private function _request($form_url,$data = null)
{
$ch = curl_init($form_url);
// CA bundle
$caBundle = $this->caBundle;
if(file_exists($caBundle))
{
// Detect and convert relative path to absolute path
if(basename($caBundle) == $caBundle)
{
$caBundle = getcwd() . DIRECTORY_SEPARATOR . $caBundle;
}
// Set CA bundle
curl_setopt($ch, CURLOPT_CAINFO, $caBundle);
}
// Cookies
if($this->cookiesFile !== null)
{
curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookiesFile);
curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookiesFile);
}
// User Agent
curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
// Misc
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_ENCODING, "gzip, deflate");
// Optional proxy
if($this->proxyURL !== null)
{
curl_setopt($ch, CURLOPT_PROXY, $this->proxyURL);
}
// Custom HTTP headers
if(count($this->httpHeaders))
{
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->httpHeaders);
}
// POST data
if($data !== null)
{
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
// Run operation
$result = curl_exec($ch);
if($result === false)
{
throw new Exception(curl_error($ch));
}
else
{
if(!empty($this->saveLastOutput))
{
file_put_contents($this->saveLastOutput,$result);
}
return $result;
}
}
}
?>
You'd use it like so:
<?php
$browser = new CurlBrowser();
$html = $browser->Get("https://....");
...etc...
My gut guess is that you're simply missing a cookie jar in your original code, but that's mostly based on gut feeling, since we don't have all your problem code at this time.
Related
i am using two function for get the url or video play
1. for extract the tiktok for video with watermark
public function getDetails()
{
$url = $this->url;
$resp = $this->getContent($url);
$check = explode("\"contentUrl\":\"", $resp);
if (count($check) > 1) {
$video = explode("\"", $check[1])[0];
$videoWithoutWaterMark = $this->WithoutWatermark($url);
$thumb = explode("\"", explode("\"thumbnailUrl\":[\"", $resp)[1])[0];
$username = explode("/", explode("#", explode("\"", explode("\"url\":\"", $resp)[1])[0])[1])[0];
$result = [
'video'=>$video,
'withoutWaterMark'=>$videoWithoutWaterMark,
'user'=>$username,
'thumb'=>$thumb,
'error'=>false,
'message'=>false
];
}
else
{
$result = [
'video'=>false,
'withoutWaterMark'=>false,
'user'=>false,
'thumb'=>false,
'error'=>true,
'message'=>"Please double check your url and try again."
];
}
return $result;
}
private function cUrl($url)
{
$user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
and another function for get the video url without water mark is
private function WithoutWatermark($url)
{
//videi id for example 6795008547961752326
$dd = explode("video/",$url);
$url = "https://api2.musical.ly/aweme/v1/playwm/?video_id=".$dd[1];
return $url;
}
Please help me to find tiktok video id, or any way to create download link of video without watermark. how can i find the video id of the video so i will use this video id for create a download link " https://api2.musical.ly/aweme/v1/playwm/?video_id=v09044b90000bpfdj5q91d8vtcnie6o0";
Your function WithoutWatermark doesn't work.
If you have an url like: tiktok.com/#user/video/123456
then you can make a curl:
$data = cUrl($url)
You'll get a page from tiktok, with regex you can extract url video:
https://v16.muscdn.com/123etc
Then again curl with this above url, the response is bytes and inside with regex you can find something like this vid:yourvideoid
I have the class below, in order to analyze a URL
<?php
class URLFetcher {
private $ch;
private $url = '';
public function __construct(string $url) {
$this->url = $url;
}
public function fetch(): URLFetcher {
$headers = [];
$this->ch = curl_init($this->url);
curl_setopt_array($this->ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
CURLOPT_HEADERFUNCTION => function($curl, $header) use (&$headers) {
var_dump($header);
$header2 = trim($header);
if($header2 !== '') $headers[] = $header2;
return strlen($header);
}
]);
exit(var_dump($headers));
return $this;
}
}
?>
I use var_dump in the closure in order to see the headers, as they come from CURL, and at the end, I use var_dump again in order to see all headers.
The problem is that there are no headers printed in the function and at the end, the $headers is printing the empty array.
If I do this without the class, everything works correctly. My question is what am I missing and the code does not work? I hypothesize that it has to do with the closure.
You simply forgot curl_exec($this->ch); at the end of the method.
public function fetch(): URLFetcher {
$headers = [];
$this->ch = curl_init($this->url);
curl_setopt_array($this->ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
CURLOPT_HEADERFUNCTION => function($curl, $header) use (&$headers) {
$header2 = trim($header);
if($header2 !== '') $headers[] = $header2;
return strlen($header);
}
]);
curl_exec($this->ch);
exit(var_dump($headers));
return $this;
}
My function:
function raspislinks($url)
{
$chs = curl_init($url);
curl_setopt($chs, CURLOPT_URL, $url);
curl_setopt($chs, CURLOPT_COOKIEFILE, 'cookies.txt'); //Подставляем куки раз
curl_setopt($chs, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($chs, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36 OPR/29.0.1795.60");
curl_setopt($chs, CURLOPT_SSL_VERIFYPEER, 0); // не проверять SSL сертификат
curl_setopt($chs, CURLOPT_SSL_VERIFYHOST, 0); // не проверять Host SSL сертификата
curl_setopt($chs, CURLOPT_COOKIEJAR, 'cookies.txt'); //Подставляем куки два
$htmll = curl_exec($chs);
$pos = strpos($htmll, '<strong><em><font color="green"> <h1>');
$htmll = substr($htmll, $pos);
$pos = strpos($htmll, '<!-- </main>-->');
$htmll = substr($htmll, 0, $pos);
$htmll = end(explode('<strong><em><font color="green"> <h1>', $htmll));
$htmll = str_replace('<a href ="', '<a href ="https://nfbgu.ru/timetable/fulltime/', $htmll);
$GLOBALS['urls'];
preg_match_all("/<[Aa][ \r\n\t]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\n\r\t]*([^ \"'>\r\n\t#]+)[^>]*>/", $htmll, $urls);
curl_close($chs);
}
How can I use a variable $urls outside the function? It is array.
"return $urls"not working or am I doing something wrong. Help me please.
As you load a value into $GLOBALS['urls']; in the function, you can then use $urls in code outside this function.
The $GLOBALS array holds one occurance for each of the variables available in the global scope, so once $GLOBALS['urls']; is set a value that value can also be referenced as $urls
Like
function raspislinks($url) {
...
//$GLOBALS['urls'];
preg_match_all("/<[Aa][ \r\n\t]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\n\r\t]*([^ \"'>\r\n\t#]+)[^>]*>/",
$htmll,
$GLOBALS['urls']
);
}
raspislinks('google.com');
foreach ( $urls as $url) {
}
A simpler way would be to put the data in a simple varibale and return it from the function
function raspislinks($url) {
...
//$GLOBALS['urls'];
preg_match_all("/<[Aa][ \r\n\t]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\n\r\t]*([^ \"'>\r\n\t#]+)[^>]*>/",
$htmll,
$t
);
return $t;
}
$urls = raspislinks('google.com');
foreach ( $urls as $url) {
}
I got open curl session, that has cookie files set, so it would remember session id (from logging in). But how do I now modify those cookies?
curl_setopt($curl, CURLOPT_HTTPHEADER, array('Cookie:blabla=bleble'));
Does not seem to act correctly, as it adds another Cookie header, so if for example I had between request headers:
...
Cookie:blabla=uhuhu;tralala=ahahaha
...
And I run this:
curl_setopt($curl, CURLOPT_HTTPHEADER, array('Cookie:blabla=bleble'));
It will simply add another Cookie header with same variable but different value:
...
Cookie:blabla=uhuhu;tralala=ahahaha
...
Cookie:blabla=bleble
So how do I modify existing Cookies instead of adding another Cookie header?
For example, when I login to site, keep session cookies in file, and in second request would like to replace it, I get something like this (being send):
When replacing by CURLOPT_HTTPHEADER
curl_setopt($curl, CURLOPT_HTTPHEADER, 'Cookie: 46db1eb1/sessid=blabla; 46db1eb1/zoom-accessibility=small');
POST /sysbus/NeMo/Intf/data:setFirstParameter HTTP/1.1
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1
Host: 192.168.1.1
Accept: */*
Cookie: 46db1eb1/sessid=YHGDba31faykfyTJraQMewP+
Content-Type:application/x-sah-ws-1-call+json; charset=UTF-8
X-Context:n1psjUEjqs5j7Bb5vvT0J3wXMunQ2e8c3ksb5Uikcdt9Ct1XOJUmAh8azkOqmMqe
X-Prototype-Version:1.7
X-Requested-With:XMLHttpRequest
Referer:http://192.168.1.1/advConfigAccessType.html
Cookie:46db1eb1/sessid=blabla; 46db1eb1/zoom-accessibility=small
Content-Length: 73
As you can see - cookies are doubled.
When replacing with CURLOPT_COOKIE
curl_setopt($curl, CURLOPT_COOKIE, '46db1eb1/sessid=blabla; 46db1eb1/zoom-accessibility=small');
POST /sysbus/NeMo/Intf/data:setFirstParameter HTTP/1.1
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1
Host: 192.168.1.1
Accept: */*
Cookie: 46db1eb1/sessid=FHjCFhn/VKgkC09y7772fXpp; 46db1eb1/sessid=blabla; 46db1eb1/zoom-accessibility=small
Content-Type:application/x-sah-ws-1-call+json; charset=UTF-8
X-Context:VCtJDc4PrVjybyFcw8jGAc3anO5mXm7GPnfgn8VES54TqwUKr4fP68PhJHtuSn2x
X-Prototype-Version:1.7
X-Requested-With:XMLHttpRequest
Referer:http://192.168.1.1/advConfigAccessType.html
Content-Length: 73
Values are doubled in same cookie header.
curl 7.26.0, Raspbian (Debian 7.8)
You can manipulate the CURLOPT_COOKIEJAR file. Something like this:
<?php
function init($f)
{
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, "http://localhost/a.php");
curl_setopt($curl, CURLOPT_COOKIEJAR, $f);
curl_setopt($curl, CURLOPT_COOKIEFILE, $f);
return $curl;
}
function readCookies($f)
{
$cookies = [];
if (($handle = fopen($f, "r")) !== FALSE) {
while (($cookie = fgetcsv($handle, 1000, "\t")) !== FALSE) {
$cookies[] = $cookie;
}
fclose($handle);
}
return $cookies;
}
function writeCookies($f, $c)
{
$fp = fopen($f, "w");
foreach ($c as $cookie) {
fputcsv($fp, $cookie, "\t");
}
fclose($fp);
}
$f = './c.tmp';
$curl = init($f);
curl_exec($curl);
// playing with cookies:
$newCookie = ['blabla', '123'];
$cookies = readCookies($f);
if (!empty($cookies)) {
$found = false;
foreach($cookies as $key=>$val) {
// ignore comments and empty lines
if (count($val) == 7) {
if ($val[5] == $newCookie[0]) {
$found = $key;
break;
}
}
}
if ($found) {
$cookies[$found][6] = $newCookie[1];
} else {
// I am using first one as a template,
// but you may need to set all fields explicitly
// as they may differ
$cookie = $cookies[0];
$cookie[5] = $newCookie[0];
$cookie[6] = $newCookie[1];
$cookies[] = $cookie;
}
writeCookies($f, $cookies);
}
$curl = init($f);
if (empty($cookies)) {
curl_setopt($curl, CURLOPT_HTTPHEADER, array('Cookie:blabla=123'));
}
curl_exec($curl);
While trying to use cUrl with the Post method in Yii 2, I receive a 400 error code.
Bad Request (#400)
Unable to verify your data submission.
The above error occurred while the Web server was processing your request.
Please contact us if you think this is a server error. Thank you.
This is my code, where i instantiate the CurlTool class:
public function actionSend() {
$model = new \app\models\Licitatie;
if ($model->load(Yii::$app->request->post())) {
$curl_tool = new \common\components\CurlTool();
$result = $curl_tool->fetchContent('http://www.william.ro/licitatia_bursa/frontend/web/index.php/organizator/licitatie/evrika', $model->attributes);
print_r($result);
}
}
public function actionEvrika() {
return json_encode(
array(
'a' => 'b',
)
);
}
this is the curltool class code:
<?php
namespace common\components;
class CurlTool {
public static $userAgents = array(
'FireFox3' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; rv:1.9) Gecko/2008052906 Firefox/3.0',
'GoogleBot' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'IE7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Netscape' => 'Mozilla/4.8 [en] (Windows NT 6.0; U)',
'Opera' => 'Opera/9.25 (Windows NT 6.0; U; en)'
);
public static $options = array(
CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
CURLOPT_AUTOREFERER => true,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FRESH_CONNECT => true,
CURLOPT_COOKIEJAR => "cookies.txt",
CURLOPT_COOKIEFILE => "cookies.txt",
CURLOPT_SSL_VERIFYPEER => false,
//CURLOPT_COOKIESESSION => false,
);
private static $proxyServers = array();
private static $proxyCount = 0;
private static $currentProxyIndex = 0;
public static $getinfo;
public static function addProxyServer($url) {
self::$proxyServers[] = $url;
++self::$proxyCount;
}
public static function fetchContent($url, $fields = null, $verbose = false) {
//print '*'.$fields.'*';
if (($curl = curl_init($url)) == false) {
throw new Exception("curl_init error for url $url.");
}
if (self::$proxyCount > 0) {
$proxy = self::$proxyServers[self::$currentProxyIndex++ % self::$proxyCount];
curl_setopt($curl, CURLOPT_PROXY, $proxy);
if ($verbose === true) {
echo "Reading $url [Proxy: $proxy] ... ";
}
} else if ($verbose === true) {
echo "Reading $url ... ";
}
//$verbose=TRUE;
//print_r($fields);
// debug_print_backtrace();
//url-ify the data for the POST
$fields_string = '';
if (is_array($fields))
foreach ($fields as $key => $value) {
if (empty($key))
continue;
$fields_string .= $key . '=' . urlencode($value) . '&';
if ($verbose === true) {
echo $key . ": " . $value;
}
}
rtrim($fields_string, '&');
if (count($fields) > 0) {
curl_setopt($curl, CURLOPT_POST, count($fields));
curl_setopt($curl, CURLOPT_POSTFIELDS, $fields_string);
}
if ($verbose === true) {
echo "Fields string $fields_string ... ";
}
curl_setopt_array($curl, self::$options);
$content = curl_exec($curl);
self::$getinfo = curl_getinfo($curl);
if ($content === false) {
throw new Exception("curl_exec error for url $url " . curl_error($curl));
}
curl_close($curl);
if ($verbose === true) {
echo "Done.\n";
}
$content = preg_replace('#\n+#', ' ', $content);
$content = preg_replace('#\s+#', ' ', $content);
return $content;
}
}
class Controller extends \yii\base\Controller
{
/**
* #var boolean whether to enable CSRF validation for the actions in this controller.
* CSRF validation is enabled only when both this property and [[Request::enableCsrfValidation]] are true.
*/
public $enableCsrfValidation = false; <- set this to false
...
be careful, i just found out that if this setting is used within the action itself, it might fail;
it might fail within beforeaction;
you can disable csrf in beforeaction
public function beforeAction($action)
{
if($action->id == 'source-in')
{
return true;
}
return parent::beforeAction($action);
}
Is there another way to fix it? Maybe generate a new CSRF token and send it in the POST data?