<?php
$url='http://edition.cnn.com/?fbid=4OofUbASN5k';
$var = fread_url($url);// function calling to get the page from curl
$search = array('#<script[^>]*?>.*?</script>#si'); // Strip out javascript
$var = preg_replace($search, "\n", html_entity_decode($var)); // Strip out javascript
$linklabel = array();
$link = array();
$dom = new DOMDocument($var);
#$dom->loadHTML($var);
$xpath = new DOMXPath($dom);// Grab the DOM nodes
foreach($xpath->find('a') as $element) {
array_push($linklabel, $element->innerText);
print $linklabel;
array_push($link, $element->href);
print $link.'<br>';
}
function fread_url($url) {
if(function_exists("curl_init")) {
$ch = curl_init();
$user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; ".
"Windows NT 5.0)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt( $ch, CURLOPT_HTTPGET, 1 );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
$html = curl_exec($ch);
//print $html;//printing the web page.
curl_close($ch);
}
else {
$hfile = fopen($url,"r");
if($hfile) {
while(!feof($hfile)) {
$html.=fgets($hfile,1024);
}
}
}
return $html;
}
i need to seperate links and link labels into two seperate arrays. i followed several forums and made a code, but is getting error. i don't know about the find function used in the code
Several problems, mainly calls to inexistent functions and references to inexistent properties. Correct version:
<?php
$var = <<<EOD
<html>
sdfd
</html>
EOD;
$dom = new DOMDocument();
#$dom->loadHTML($var);
$xpath = new DOMXPath($dom);
foreach($xpath->query('//a') as $element) {
$linklabel[] = $element->textContent;
$link[] = $element->getAttribute("href");
}
var_dump($linklabel);
var_dump($link);
Related
I know, there is already a thread about this ... see How to pass the steam age check using curl? ... but I'm a new user and can't comment in an existing thread and the answer marked as solution there doesn't work anymore.
I had my own code that worked fine in the past (around 2017), but doesn't work anymore as well.
Here is my code that worked in the past:
function curl_redirect_exec2($ch, &$redirects, $curlopt_header = false) {
$ckfile = tempnam(sys_get_temp_dir(), "CURLCOOKIE");
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, 'snr=1_agecheck _agecheck__age-gate&ageDay=1&ageMonth=May&ageYear=1990');
curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
curl_setopt($ch, CURLOPT_UNRESTRICTED_AUTH, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile);
//new start
curl_setopt($ch, CURLOPT_COOKIE, 'mature_content=1; path=/app/'.$gameid.';');
//new end
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if($http_code == 301 || $http_code == 302) {
list($header) = explode("\r\n\r\n", $data, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
$url = trim(array_pop($matches));
$url_parsed = parse_url($url);
if(isset($url_parsed)) {
curl_setopt($ch, CURLOPT_URL, $url);
$redirects++;
return curl_redirect_exec2($ch, $redirects);
}
}
if($curlopt_header) {
return $data;
} else {
list(,$body) = explode("\r\n\r\n", $data, 2);
return $body;
}
}
And here is the code sample from the thread linked above that also seemed to work in the past (but doesn't anymore):
<?php
$url = "http://store.steampowered.com/app/312660/";
// $file = __DIR__ . DIRECTORY_SEPARATOR . "cookie.txt";
// $postData = array(
// 'ageDay' => '31',
// 'ageMonth' => 'July',
// 'ageYear' => '1993'
// );
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_POST,true);
curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13");
curl_setopt($ch,CURLOPT_POSTFIELDS,$postData);
// curl_setopt($ch,CURLOPT_COOKIESESSION, true);
// curl_setopt($ch,CURLOPT_COOKIEJAR,$file);
// curl_setopt($ch,CURLOPT_COOKIEFILE,$file);
$strCookie = 'mature_content=' . 1 . '; path=/';
curl_setopt( $ch, CURLOPT_COOKIE, $strCookie );
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
curl_close($ch);
echo $data;
?>
What I tested so far:
You can use the game "RUST" as an example: https://store.steampowered.com/app/252490/
Page redirects to age check: https://store.steampowered.com/agecheck/app/252490/
I saw that the cookie set uses other names now ("wants_mature_content" instead of "mature_content" in the JavaScript), but even after changing the PHP to use the new name, it doesn't work.
JavaScript code from Steam page:
function HideAgeGate( )
{
var bHideAll = false;
console.log(bHideAll);
var strCookiePath = bHideAll ? '/' : "\/app\/252490";
V_SetCookie( 'wants_mature_content', 1, 365, strCookiePath );
document.location = "https:\/\/store.steampowered.com\/app\/252490\/Rust\/?snr=";
}
Edit: I also found the function "V_SetCookie" ... in https://store.akamai.steamstatic.com/public/shared/javascript/shared_global.js ... that is called by the code above:
function V_SetCookie( strCookieName, strValue, expiryInDays, path )
{
if ( !path )
path = '/';
var strDate = '';
if( typeof expiryInDays != 'undefined' && expiryInDays )
{
var dateExpires = new Date();
dateExpires.setTime( dateExpires.getTime() + 1000 * 60 * 60 * 24 * expiryInDays );
strDate = '; expires=' + dateExpires.toGMTString();
}
document.cookie = strCookieName + '=' + strValue + strDate + ';path=' + path;
}
Can somebody help please? :-) Thanks!
This is working for me
<?php
$url = 'https://store.steampowered.com/bundle/5699/Grand_Theft_Auto_V_Premium_Edition/';
$ch = curl_init();
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Cookie: birthtime=470682001;lastagecheckage=1-0-1985;']);
$html = curl_exec($ch);
curl_close($ch);
var_dump($html);
Ok, got it working.
There are actually 3 cookies: "wants_mature_content", "lastagecheckage" and "birthtime"
Open a page with age check in e.g. Chrome, click on "View Page" and then look for the 3 cookies in chrome (and their content). Set all 3 cookies with PHP's curl and it's working ;-)
I want to get the paragraphs under this tag:
I tried to:
<?php
$doc = new DOMDocument();
$doc->loadHTMLFile("https://sabq.org/xMQjz2");
$elements = $doc->getElementsByTagName('p');
if (!is_null($elements)) {
foreach ($elements as $element) {
$nodes = $element->childNodes;
foreach ($nodes as $node) {
echo $node->textContent. "\n";
}
}
}
?>
And I got the paragraphs I wanted along with unwanted ones, and they were duplicated.
EDIT:
I changed the URL, hope it works
The link that you have provided throws an error when accessing it so what I did, I found a function that could get the contents of the webpage using curl instead of the DOMDocument class which you were using.
I used preg_match and regex to extract the specific element that you were looking for.
Here's the code:
<?php
//opened url
$content = get_fcontent("https://sabq.org/%D8%B4%D8%A7%D9%87%D8%AF-%D8%A3%D9%84%D9%81-%D8%B5%D9%81%D8%AD%D8%A9-%D8%AA%D8%B1%D9%88%D9%8A-%D9%82%D8%B5%D8%B5-%D8%A7%D9%84%D8%AD%D8%B1%D9%85%D9%8A%D9%86-%D9%85%D9%86%D8%B0-%D8%A7%D9%86%D8%B7%D9%84%D8%A7%D9%82-%D8%A7%D9%84%D8%B9%D9%87%D8%AF-%D8%A7%D9%84%D8%B3%D8%B9%D9%88%D8%AF%D9%8A");
//extract specific html tag and its innerHTML
preg_match('/<p .*? ng\-bind\-html\=\"getContent\(material\.content\)\" .*?>.*?<\/p>/m', $content[0], $matches);
//display the wanted element
echo $matches[0];
//getting contents using curl because threw error: failed to open stream
function get_fcontent( $url, $javascript_loop = 0, $timeout = 5 ) {
$url = str_replace( "&", "&", urldecode(trim($url)) );
$cookie = tempnam ("/tmp", "CURLCOOKIE");
$ch = curl_init();
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); # required for https urls
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 );
$content = curl_exec( $ch );
$response = curl_getinfo( $ch );
curl_close ( $ch );
if ($response['http_code'] == 301 || $response['http_code'] == 302) {
ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
if ( $headers = get_headers($response['url']) ) {
foreach( $headers as $value ) {
if ( substr( strtolower($value), 0, 9 ) == "location:" )
return get_url( trim( substr( $value, 9, strlen($value) ) ) );
}
}
}
if ( ( preg_match("/>[[:space:]]+window\.location\.replace\('(.*)'\)/i", $content, $value) || preg_match("/>[[:space:]]+window\.location\=\"(.*)\"/i", $content, $value) ) && $javascript_loop < 5) {
return get_url( $value[1], $javascript_loop+1 );
} else {
return array( $content, $response );
}
}
?>
For testing, I created a local file called test.html:
<!DOCTYPE html>
<html>
<head>
<title></title>
</head>
<body>
<p>This should not be showing.</p>
<p ng-bind-html="getContent(material.content)" id="dev-content" class="details-text">This is a test.</p>
</body>
</html>
I used the local url http://localhost/example/test.html instead of the link you provided for testing purposes.
And from the local file I created for testing, I got the following result:
<p ng-bind-html="getContent(material.content)" id="dev-content" class="details-text">This is a test.</p>
Here's the result that I got from the original url:
<p ng-bind-html="getContent(material.content)" id="dev-content" class="details-text"></p>
I hope this helps!
I am new to OOP. I am just learning and I have to use it to find the actual/final URL of a link that redirects.
Class ABC {
public function getWebPage($url, $redirectcallback = null){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");
$html = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
list($httpheader) = explode("\r\n\r\n", $html, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $httpheader, $matches);
$nurl = trim(array_pop($matches));
$url_parsed = parse_url($nurl);
if (isset($url_parsed)) {
if($redirectcallback){ // callback
$redirectcallback($nurl, $url);
}
$html = $this->getWebPage($nurl, $redirectcallback);
}
}
return $html;
}
}
The above function inside class call the same function again and again to find the actual url. However I am already calling the class in some other file
$obj = new ABC;
$url = "http://www.anrdoezrs.net/asd/?ak=123";
$someurl = $obj->getWebPage($url);
But this does not work. Please suggest.
Class ABC {
public function xyz($url){
$html = $this->xyz($url);
return $html;
}
}
In another class call it with
$obj = new ABC;
$url = "Some value";
$someurl = $obj->xyz($url);
Here is the code
<?php
$url='http://isrc.ulster.ac.uk';
$var = fread_url($url);// function calling to get the page from curl
$i=0;
$linklabel = array();
$linklabelmod = array();
$link = array();
$dom = new DOMDocument();
#$dom->loadHTML($var);
$xpath = new DOMXPath($dom);
foreach($xpath->query('//a') as $element) {
$linklabel[] = $element->textContent;
$link[] = $element->getAttribute("href");
$i=$i+1;
}
for($k=0;$k<$i;$k++) {
$linklabelmod[$k] = str_replace($linklabel[$k], $linklabel[$k]."[$k]", $linklabel[$k]);
$var = preg_replace( "/\\Q$linklabel[$k]\\E/", $linklabelmod[$k], $var, 1 );//modifying link labels
}
print $var;
function fread_url($url){
if(function_exists("curl_init")){
$ch = curl_init();
$user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; "."Windows NT 5.0)";
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt( $ch, CURLOPT_HTTPGET, 1 );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
$html = curl_exec($ch);
//print $html;//will printing the web page .
curl_close($ch);
}
else{
$hfile = fopen($url,"r");
if($hfile){
while(!feof($hfile)){
$html.=fgets($hfile,1024);
}
}
}
return $html;
}
?>
Not all link labels are changing. I want each link label to be modified by attaching a unique number. Plz run the code so that you can see error.. Thx in advance..
What about checking if a match was found before attempting to replace it? Using preg_match.
It is not my intention to ruin your question by asking this, but how would one reply to someone elses comment? I only see the 'add comment' on my own comments, thank you.
I have built a php script which receives values in $_POST and $_FILES
I'm catching those values, and then trying to use CURL to make posts to FogBugz.
I can get text fields to work, but not files.
$request_url = "http://fogbugz.icarusstudios.com/fogbugz/api.php";
$newTicket = array();
$newTicket['cmd'] = 'new';
$newTicket['token'] = $token;
$newTicket['sPersonAssignedTo'] = 'autobugz';
$text = "\n";
foreach( $form as $pair ) {
$text .= $pair[2] . ": " . $pair[0] . "\n";
}
$text = htmlentities( $text );
$newTicket['sEvent'] = $text;
$f = 0;
foreach ($_FILES as $fk => $v) {
if ($_FILES[$fk]['tmp_name'] != '') {
$extension = pathinfo( $_FILES[$fk]['name'], PATHINFO_EXTENSION);
//only take the files we have specified above
if (in_array( array( $fk, $extension ) , $uploads)) {
$newTicket['File'.$f] = $_FILES[$fk]['tmp_name'];
//echo ( $_FILES[$fk]['name'] );
//echo ( $_FILES[$fk]['tmp_name'] );
//print $fk;
//print '<br/>';
//print_r( $v );
}
}
}
$ch = curl_init( $request_url );
$timeout = 5;
curl_setopt($ch, CURLOPT_POST,1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_POSTFIELDS, $newTicket );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($ch);
curl_close($ch);
To upload files with CURL you should prepend a # to the path, see this example:
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible;)");
curl_setopt($ch, CURLOPT_URL, _VIRUS_SCAN_URL);
curl_setopt($ch, CURLOPT_POST, true);
// same as <input type="file" name="file_box">
$post = array(
"file_box"=>"#/path/to/myfile.jpg",
);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
$response = curl_exec($ch);
Taken from http://dtbaker.com.au/random-bits/uploading-a-file-using-curl-in-php.html.
The other answer -- for FogBugz reasons only --
$f cannot be set to 0 initially. It must be 1, so the files go through as File1, File2, etc.
The # symbol is also key.