I am trying to get more than 10 results form google using the search API. I know that the google search API only gives 10 results and you have to call it 10 times to get a hundred but I can't seem to get it working. I tried creating a do while loop as well as a for loop but all it seems to do is gives me the same results over and over.
<?php
if(isset($_GET['input']) && $_GET['input'] != "")
{
echo "<br />Your Search Results Google:<br /><br />";
$i=0;
$url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0& key=AIzaSyBacVRiPNo7uMqhtjXG4Zeq1DtSQA_UOD4&cx=014517126046550339258:qoem7fagpyk
&num=10&start=".$i."&"."q=".str_replace(' ', '%20', $_GET['input'])
// sendRequest
// note how referer is set manually
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, 'http://www.google.com');
$body = curl_exec($ch);
curl_close($ch);
// now, process the JSON string
$json = json_decode($body,true);
do
{
foreach ($json['responseData']['results'] as $data) {
echo '
<p>
', $data ['title']," ---> <u>Google SE </u>" ,'<br />
', '<a href ='.$data['url'].'>'.$data['url']."</a>" , '<br />
', $data['content'],'
</p>';
}
$i++;
}
while($i<3);
}
?>
Any input appreciated.
ok. just try the code below:
<?php
if(isset($_GET['input']) && $_GET['input'] != "")
{
echo "<br />Your Search Results Google:<br /><br />";
for ($i = 0; $i < 10; $i++)
{
$url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyBacVRiPNo7uMqhtjXG4Zeq1DtSQA_UOD4&cx=014517126046550339258:qoem7fagpyk
&num=10&start=".$i."&"."q=".str_replace(' ', '%20', $_GET['input']);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, 'http://www.google.com');
$body = curl_exec($ch);
curl_close($ch);
$json = json_decode($body,true);
foreach ($json['responseData']['results'] as $data) {
echo '
<p>
', $data ['title']," ---> <u>Google SE </u>" ,'<br />
', '<a href ='.$data['url'].'>'.$data['url']."</a>" , '<br />
', $data['content'],'
</p>';
}
}
}
?>
Related
I am using the attached script to output 100 results at a time from an XML file. The total record count e.g 1000 is held in $totalRecordCount = $oXML['total_record_count']; The user enters a search term in a form and results get outputted on same page. Each result is a link to a detail page. How do I integrate pagination in it if there's a 1000 results 1|2|3...10? I tried integrating something as per Simple pagination for foreach loop with no success however. Any help appreciated. Thanks
<?php
if (isset($_GET['submit2'])) {
$search2 = preg_replace('/\s+/', '+', $_GET["dept-keywords"]);
$sanitizeSearch2 = filter_var($search2, FILTER_SANITIZE_STRING);
echo '<b>Results: ' . $_GET["dept-keywords"] . '</b>';
$ch = curl_init();
$baseUrl = 'https://example.com/';
$templateParamNames = array('{user_id}');
$templateParamValues = array(urlencode('exl_impl'));
$baseUrl = str_replace($templateParamNames, $templateParamValues, $baseUrl);
$queryParams = array(
//info
);
$url = $baseUrl . "?" . http_build_query($queryParams);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$response = curl_exec($ch);
$oXML = new SimpleXMLElement($response);
echo '<ol>';
$totalRecordCount = $oXML['total_record_count'];
$count = 0;
foreach ($oXML->user as $user) {
$first_name = $user->first_name;
$user_link = strtolower("https://example.com/" . $first_name);
echo '<li>';
echo "<a href='" . $user_link . "'> " . $first_name . " </a>" . "\r\n";
echo '</li>';
$count++;
}
if ($count == 0) {
echo '<label>Sorry, no results!</label>';
}
echo '</ol>';
curl_close($ch);
}
I m trying to pull all the queries listed in this page using a webcrawler (code below). But it seems like I have missed something.
My Code goes as:
<?php
function getSslPage($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_USERAGENT, "Chrome/36.0.1985.125");
$login = curl_exec($ch);
return $login;
}
$milesfeed = getSslPage('http://www.usmleforum.com/forum/index.php?forum=1');
preg_match_all('/<td class="FootNotes2">(.*?)<\/td>/s',$milesfeed,$links);
$milesfeed_links=[];
$milesfeed_text=[];
$fourth="abc";
$third="abc";
//$third="https://onemileatatime";
foreach($links[1] as $miles){
$milesfeed_text[] = strip_tags($miles);
preg_match_all('/<a target="_top" class="Links2" href="(.*?)">/s', $miles, $link);
$milesfeed_links[] = strip_tags($link[1][0]);
$first=explode("://",$link[1][0]);
$second=explode(".",$first[1]);
//print_r($second);
if($second[0]!=$third || $third=="abc"){
if($second[0]=="www"){
echo "<h3>".ucfirst($second[1])."</h3>";
}else{
echo "<h3>".ucfirst($second[0])."</h3>";
}
}
echo ''.wordwrap(strip_tags($miles),30).'<br><br>';
$third=$second[0];
}
?>
I spent 4 hours straight trying to figure it out by myself. Any help is greatly appreciated...
The class class="FootNotes2" is not in the tr but in the td maybe that will change your result
Edit:
Your Rexeg is not right in this situation. You search for <td class="FootNotes2" but between <td and the class="FootNotes2" you also have other attributes
Change your regex to <td .*? class="FootNotes2">(.*?)<\/td> will maybe help you
You can use https://regex101.com/ to test. I dropped the curl response in it and changed your regex to test it there
EDIT again:
I toke a better look at your code and looked at the website you try to scrape.
But you have way to many errors in your code. In your loop you asume value exist without checking then, you try to rexeg values and paterns that does not exist in the page you try to scrape. Copy the html of that page and study it and test it in a regex tester, also var_dump your results to what you get back from your functions and write checks to make sure the data exist you whant to work with. Do this step by step and you will get your result.
<?php
function con(){
$q=mysqli_connect('localhost','root','','usmle');
return $q;
}
function addquery($a,$b,$c,$d){
$a=mysqli_real_escape_string(con(),$a);
$b=mysqli_real_escape_string(con(),$b);
$c=mysqli_real_escape_string(con(),$c);
$d=mysqli_real_escape_string(con(),$d);
mysqli_query(con(),"insert into query(Query,QueryBy,QueryLink,Date)values('$a','$b','$c','$d')");
}
function addreply($a,$b){
$a=mysqli_real_escape_string(con(),$a);
$b=mysqli_real_escape_string(con(),$b);
mysqli_query(con(),"insert into replies(QueryID,Reply)values($a,'$b')");
}
function lastID(){
$q=mysqli_query(con(),"select MAX(QueryID) as LastID from query LIMIT 1");
return $q;
}
function getSslPage($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_USERAGENT, "Chrome/36.0.1985.125");
$login = curl_exec($ch);
return $login;
}
function cleannr($str)
{
// $str = str_replace("£", "", $str);
$str = str_replace(array("\r\n", "\n\r", "\n", "\r"), ',', $str);
$str = str_replace("\n" , "" , $str);
$str = trim(strip_tags($str));
return str_replace("," , " " , $str);
}
$set=0;
$athomepage = getSslPage('http://www.usmleforum.com/forum/index.php?forum=1&Page=1');
preg_match_all('/<td width="64%" height="25" class="FootNotes2"><a href="(.*?)" target="_top" class="Links2">(.*?)<\/a>(.*?)<\/td>/s',$athomepage,$pages);
/* $count = $pages[2][0];
$total_pages = 1;
if($count > 0){
$pages = $pages[0][$count-1];
$total_pages = strip_tags($pages);
} */
// echo $total_pages;
// die;
// echo "<pre>".print_r($listres,true)."</pre>";
// die;
foreach($pages[1] as $links){
ob_flush();
flush();
$set++;
echo 'Query No.'.$set.'<br />';
//echo "https://www.immobilienscout24.de/Suche/controller/exposeNavigation/goToExpose.go?exposeId=".$links."<br>";
//die;
$link="http://www.usmleforum.com".$links;
$ipage=getSslPage("http://www.usmleforum.com".$links."");
preg_match_all('/<td width="95%" valign="top" colspan="2" class="FormText2">(.*?)<\/td>/s',$ipage,$query);
preg_match_all('/<td width="97%" colspan="2" valign="top" class="FootNotes2">(.*?)<\/td>/s',$ipage,$by);
$explodation=explode("-",$by[1][0]);
$date=$explodation[1];
$name=$explodation[0];
$actualquery=cleannr($query[1][0]);
echo '<h2>Query : </h2><br />';
echo 'Query : '.$actualquery.'<br />';
echo 'Query By : '.$name.'<br />';
echo 'Link : '.$link.'<br />';
echo 'Date : '.$date.'<br />';
addquery($actualquery,$name,$link,$date);
$id=lastID();
foreach($id as $ids){
$lastID=$ids["LastID"];
}
echo '<h2>Replies : </h2><br />';
for($i=1;$i<count($query[1]);$i++){
if($query[1][$i]!=""){
$replyquery=cleannr($query[1][$i]);
echo 'Reply : '.$replyquery.'<br />';
addreply($lastID,$replyquery);
}
}
}
echo "<center><h2>Scraping Done</h2></center>";
?>
Nailed it.
I have a question, please go to this site https://bri.co.id/web/guest/deposit-interest-rate . On the page you will find this table:
How can I get only the data in that circle on that picture using cURL PHP ?
In my code I'm using $dom->getElementsByTagName('tbody'); but its showing all data on the table. Sorry for my bad English I'm Korean.
Check this code
<?php
$url = "https://bri.co.id/web/guest/deposit-interest-rate";
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$html = curl_exec($ch);
curl_close($ch);
preg_match("'<td style=\"font-weight: bold; text-align: center;\">(.*?)</td> <td style=\"text-align: center;\">(.*?)</td> <td style=\"text-align: center;\">(.*?)</td>'si", $html, $match);
if ($match) {
echo $match[1] . '<br />';
echo $match[2] . '<br />';
echo $match[3] . '<br />';
}
And the result is:
< 100 Juta
1
4.75%
Hey I'm sure this is an easy fix but it's driving me nuts.
I'm working with the youtube api and I'm trying to post a user generated search term into the url like so:
<form action="pagination.php" method="post">
<input style="width:50%" type="text" name="search_term">
<input type="submit" value="Submit">
</form>
<?
$search_term = $_POST['search_term'];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://gdata.youtube.com/feeds
/api/videos?q='.$search_term.'&safeSearch=none&orderby=viewCount&v=2&alt=json&start-
index=75&max-results=50');
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$output = curl_exec($ch);
curl_close($ch);
$data = json_decode($output,true);
$info = $data["feed"];
$video = $info["entry"];
$nVideo = count($video);
echo "<ul style='float:right'>";
foreach($video as $video) {
echo '<img src="'.$video['media$group']['media$thumbnail'][0]['url'].'"><br><br>';
$title = $video['title']['$t'];
$video_id = $video['media$group']['yt$videoid']['$t'];
echo ''.$title.'';
echo '<br>';
When I run this code nothing happens, however if I manually assign a value to $search_term like this:
$search_term = 'baseball';
everything works perfectly.
Any help would be greatly appreciated!
I found some syntax errors.
The following code works for me.
NOTE: The change of the filename so you can test this file and update the code in your own file.
Maybe on purpose, but at start it will always do a retrieve with "no terms", since the code is executed.
ENTER terms and press Submit button
<form action="ytcurltest1.php" method="post">
<input style="width:50%" type="text" name="search_term">
<input type="submit" value="Submit">
</form>
<?PHP
$search_term = $_POST['search_term'];
$startIndex = 1;
$maxResults = 25;
$ch = curl_init();
$url = 'http://gdata.youtube.com/feeds/api/videos?q='
. $search_term
. '&safeSearch=none&orderby=viewCount&v=2&alt=json'
. '&start-index=' . $startIndex
. '&max-results=' . $maxResults;
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$output = curl_exec($ch);
curl_close($ch);
$data = json_decode($output,true);
if ( count($data) == 0 )
{
echo 'NO RESULTS FOUND. ENTER other terms';
RETURN;
}
$info = $data["feed"];
$video = $info["entry"];
$nVideo = count($video);
echo "<ul style='float:right'>";
foreach ($video as $video)
{
echo '<img src="'.$video['media$group']['media$thumbnail'][0]['url']
.'"><br><br>';
$title = $video['title']['$t'];
$video_id = $video['media$group']['yt$videoid']['$t'];
echo ''.$title.'';
echo '<br>';
}
?>
A freelance coder wrote some custom PHP cURL for me a couple of months ago to log into my business PayPal account and print shipping labels via Canada Post.
As of roughly one month ago, the login process started failing. See code below. It fails the sanity check every single time. One day this code worked, the next day no dice. The coder cannot find the source of the problem. Can anyone here see anything wrong with this code?
////
// 1. INITIALIZE
cp_progress(1, 'Initializing');
if (file_exists(PAYPAL_COOKIE_FILE)) {
unlink(PAYPAL_COOKIE_FILE); // delete old cookie file
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
curl_setopt($ch, CURLOPT_REFERER, '');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //Windows 2003 Compatibility
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
// curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.10) Gecko/2009042523 Ubuntu/9.04 (jaunty) Firefox/3.0.10');
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0');
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, PAYPAL_COOKIE_FILE);
curl_setopt($ch, CURLOPT_COOKIEJAR, PAYPAL_COOKIE_FILE);
////
// 2. LOG IN
cp_progress(2, 'Logging in');
$response = cp_get_page($ch, 'https://www.paypal.com/ca/cgi-bin/webscr?cmd=_ship-now');
// echo '<br /><br /><br /><br /> Response = ',$response,'<br /><br /><br /><br />'; // exit;
cp_sanity_check($response, '<title>Login - PayPal</title>');
$matches = '';
// preg_match("/<form method=\"post\" name=\"login_form\" target=\"paypal\" action=\"([^\"]*)\"/siU", $response, $matches);
preg_match("/<form method=\"post\" name=\"login_form\" action=\"([^\"]*)\"/siU", $response, $matches);
$form_action = $matches[1];
$matches = '';
preg_match("/<input type=\"hidden\" id=\"CONTEXT_CGI_VAR\" name=\"CONTEXT\" value=\"([^\"]*)\"/siU", $response, $matches);
$form_context = $matches[1];
// echo "form_action = $form_action<br>";
// echo "form_context = $form_context<br>";
/*$query_string = "CONTEXT=$form_context&login_email=" . PAYPAL_EMAIL . "&login_password=" . PAYPAL_PASSWORD
. "&login_cmd=&login_params=&submit.x=Log%20In&operating_system=Linux&form_charset=UTF-8&browser_name=Firefox&browser_version=3";*/
$query_string = "CONTEXT=$form_context&login_email=" . PAYPAL_EMAIL . "&login_password=" . PAYPAL_PASSWORD
. "&login_cmd=&login_params=&submit.x=Log%20In&operating_system=Windows%2NT&form_charset=UTF-8&browser_name=Firefox&browser_version=19";
// echo '$form_action = ',$form_action;
// echo '$query_string = ',$query_string; exit;
$response = cp_post_page($ch, $form_action, $query_string);
// echo '<br /><br /><br /><br /> Response = ',$response,'<br /><br /><br /><br />'; // exit;
cp_sanity_check($response, '<title>Canada Post - Create Your Shipping Label - PayPal</title>');
$matches = '';
preg_match("/<form method=\"post\" name=\"shippingForm\" action=\"([^\"]*)\"/siU", $response, $matches);
$form_action = $matches[1];
$matches = '';
preg_match("/<input type=\"hidden\" id=\"CONTEXT_CGI_VAR\" name=\"CONTEXT\" value=\"([^\"]*)\"/siU", $response, $matches);
$form_context = $matches[1];
$matches = '';
preg_match("/<input name=\"auth\" type=\"hidden\" value=\"([^\"]*)\"/siU", $response, $matches);
$form_auth = $matches[1];
// echo $form_auth; exit;
////
Paypal has updated the fields in this login page. Replace code:
$matches = '';
preg_match("/<input type=\"hidden\" id=\"CONTEXT_CGI_VAR\" name=\"CONTEXT\" value=\"([^\"]*)\"/siU", $response, $matches);
$form_context = $matches[1];
// echo "form_action = $form_action<br>";
// echo "form_context = $form_context<br>";
/*$query_string = "CONTEXT=$form_context&login_email=" . PAYPAL_EMAIL . "&login_password=" . PAYPAL_PASSWORD
. "&login_cmd=&login_params=&submit.x=Log%20In&operating_system=Linux&form_charset=UTF-8&browser_name=Firefox&browser_version=3";*/
$query_string = "CONTEXT=$form_context&login_email=" . PAYPAL_EMAIL . "&login_password=" . PAYPAL_PASSWORD
. "&login_cmd=&login_params=&submit.x=Log%20In&operating_system=Windows%2NT&form_charset=UTF-8&browser_name=Firefox&browser_version=19";
with:
//changed
$matches = '';
preg_match("/<input type=\"hidden\" id=\"CONTEXT_CGI_VAR\" name=\"CONTEXT\" value=\"([^\"]*)\"/siU", $response, $matches);
$form_context = html_entity_decode($matches[1]);
$matches = '';
preg_match("/<input name=\"auth\" type=\"hidden\" value=\"([^\"]*)\"/siU", $respone, $matches);
$form_auth = urlencode($matches[1]);
//echo "form_action = $form_action<br>";
//echo "form_context = $form_context<br>";
$query_string = "CONTEXT=$form_context&login_email=" . PAYPAL_EMAIL . "&login_password=" . PAYPAL_PASSWORD
. "&login_cmd=&login_params=&submit.x=Log%20In&operating_system=Linux&form_charset=UTF-8&browser_name=Firefox&browser_version=3&auth=$form_auth";
//end changed
You are adding a new preg_match line and $form_auth