I am working with NCBI Blast api. I created logic to get result in xml format but result is comming in html format and this result again going back to redirect to display xml format. when i check original url in browser it shows direct xml data. Can any one give me solutions.
Visit this url you can find my problem peerscientist.net. This is the code.
class BlastAPI{
public function blastdata(){
$url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Put&QUERY=SSWWAHVEMGPPDPILGVTEAYKRDTNSKK&PROGRAM=blastp&FILTER=L&DATABASE=nr&FORMAT_TYPE=XML";
$ncontent = $this->rid($url);
echo '<br/>RID :'.$ncontent.'<br/>';
$geturl = $this->geturl($ncontent);
echo $geturl;
$fulldata = $this->getfullData($geturl);
if($fulldata != ''){
echo '<pre>';
var_dump($fulldata);
echo '</pre>';
}else{
echo "Code Once";
}
}
public function rid($url){
$surl = $url;
echo $surl;
//header('Content-type: text/html');
$content = file_get_contents($surl);
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadHtml($content);
libxml_use_internal_errors(false);
$data = $doc->getElementById("rid");
$rid = $data->getAttribute('value');
return $rid;
}
public function geturl($rid){
$srid = $rid;
$mainurl = "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&RID=$srid&FORMAT_TYPE=XML&DESCRIPTIONS=200&ALIGNMENTS=200&NOHEADER=true";
return $mainurl;
}
public function getfullData($url){
$murl = $url;
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $murl);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$str = curl_exec($curl);
curl_close($curl);
if ($str === FALSE) {
echo "cURL Error: " . curl_error($curl);
}
return $str;
}
}
$nblast = new BlastAPI();
$nblast->blastdata();
Related
I am a beginner in PHP OOP.
I was wondering what is the best practice in PHP OOP and classes. Keep values in a Variables ($var) or keep them in the object ($this->var).
As you can see in below code I set the variables ($this->var) in methods and ,therefore, do not need to return anything from that method.
Is this a correct way of doing it or do I need to return somthing from method? and pass that value to the next method?
Below is a simple curl class for API that extracts a clients email address by using a code.
Pay specific attention to extract_ib_email() Method, no variable is passed to functions (Methods). Is this correct and is there a better way of doing it please?
//Parent Class
class CurlRequest
{
/* get the data from a URL */
protected function curl_get_data() {
$ch = curl_init();
//$timeout = 5;
curl_setopt($ch, CURLOPT_VERBOSE, $this->curloptVERBOSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $this->curloptSSLVERIFYPEER);
curl_setopt($ch, CURLOPT_URL, $this->curlURL);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, $this->curloptRETURNTRANSFER);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
$data = curl_exec($ch);
if ($data === FALSE) {
printf("cUrl error (#%d): %s<br>\n", curl_errno($ch),
htmlspecialchars(curl_error($ch)));
}
//echo "<pre>"; print_r($data); echo "</pre>";
//var_dump($data);
curl_close($ch);
$this->data = $data;
return $data;
}
}
class GetIBEmail extends CurlRequest
{
private $ibRef;
function __construct($ibRef)
{
// Set Variables
$this->baseURL = 'https://www.example.com/api/iel/';
$this->ibRef = $ibRef;
//$this->ibCode = $ibCode;
//set curl options for this request specifically
$this->timeout = $timeout = 5;
$this->curloptVERBOSE = $curlopt_VERBOSE = true;
$this->curloptSSLVERIFYPEER = $curlopt_SSL_VERIFYPEER = false;
$this->curloptRETURNTRANSFER = $curlopt_RETURNTRANSFER = 1;
}
// IB Code form is like ib_xxx need to get the last 3 letters
private function getIBCode(){
$ibRefBits = explode('_', $this->ibRef);
$this->ibCode = $ibRefBits[1];
//return $this->ibCode;
}
//construct the curl url using the IB code
private function construct_ib_curl_url(){
//$url = 'https://www.example.com/api/iel/xxx';
//$url = $this->baseURL . $this->ibCode;
$this->curlURL = $this->baseURL . $this->ibCode;
//return $url;
}
// extract email from returned curl response
public function extract_ib_email() {
$this->getIBCode();
$this->construct_ib_curl_url();
$this->curl_get_data();
$resArr = array();
$resArr = json_decode($this->data);
echo "<pre>"; print_r($resArr); echo "</pre>";
$ib_email = $resArr[0]->email;
$this->ib_email = $ib_email;
return $ib_email;
}
}
//Set ibCode
$ibCode = 'ib_xxx'; //$_SESSION['ib_code'];
$ibEmail = new GetIBEmail($ibCode); //pass value to the construct function
echo '<br>' . $ibEmail->extract_ib_email() . '<br>';
I want to get all the information (date, location, price, etc.) from a remote page and parse it to HTML. I tried this script below but nothing appears to be happening:
<?php
$path = 'https://www.airbnb.com/s/Fukuoka-Prefecture--Japan?checkin=10%2F26%2F2015&checkout=11%2F03%2F2015&guests=&ss_id=xyn63dgs&page=1';
$html = file_get_contents($path);
$dom = new DOMDocument;
$dom->loadHTML($html);
foreach ($dom->getElementsByTagName('div') as $tag) {
if ($tag->getAttribute('class') === 'col-sm-12 row-space-2 col-md-6') {
echo $tag->nodeValue;
}
}
If you look at the error returned from the file_get_contents() it's giving you an error:
HTTP request failed! HTTP/1.0 403 Forbidden in yada/yada/yada.php
Try using a cURL library (or script) that will emulate a browser, similar to below (this is what I use when file_get_contents() fails. If it fails, remember that cURL is basically stateless, so if the destination site uses something based on sessions or cookies, you might be S.O.L.):
<?php
class cURL
{
public $response;
protected $sendHeader;
protected $PostFields;
private $query;
public function __construct($query = '')
{
$this->sendHeader = false;
$this->query = $query;
if(!empty($this->query)) {
if(!is_array($this->query))
$this->response = $this->Connect($this->query);
else
$this->encode();
}
}
public function SendPost($array = array())
{
$this->PostFields['payload'] = $array;
$this->PostFields['query'] = http_build_query($array);
return $this;
}
public function Connect($_url,$deJSON = true)
{
// Remote Connect
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
if(strpos($_url,"https://") !== false) {
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,2);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,2);
}
if(!empty($this->PostFields['payload'])) {
curl_setopt($ch, CURLOPT_POST, count($this->PostFields['payload']));
curl_setopt($ch, CURLOPT_POSTFIELDS, $this->PostFields['query']);
}
if(!empty($this->sendHeader))
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56');
$decode = curl_exec($ch);
$_response = ($deJSON)? json_decode($decode, true) : $decode;
$error = curl_error($ch);
curl_close($ch);
return (empty($error))? $_response: $error;
}
public function emulateBrowser()
{
$this->sendHeader = true;
return $this;
}
public function encode($_filter = 0)
{
foreach($this->query as $key => $value) {
$string[] = urlencode($key).'='.urlencode($value);
}
if($_filter == true)
$string = array_filter($string);
return implode("&",$string);
}
}
To use:
$path = 'https://www.airbnb.com/s/Fukuoka-Prefecture--Japan?checkin=10%2F26%2F2015&checkout=11%2F03%2F2015&guests=&ss_id=xyn63dgs&page=1';
$cURL = new cURL();
$html = $cURL->emulateBrowser()->connect($path,false);
$dom = new DOMDocument;
$dom->loadHTML($html);
foreach ($dom->getElementsByTagName('div') as $tag) {
if ($tag->getAttribute('class') === 'col-sm-12 row-space-2 col-md-6') {
echo $tag->nodeValue;
}
}
I am searching products on an e-commerce website using their API. The API returns all the information in JSON format as a response. Each JSON response has 50 items and a URL that has 50 other items and so on. So to match the searched string with product titles, I need to parse all the JSON files sequentially. But if the item to be searched is on the last page, it is taking about 40 minutes to reach there. Can you please help on how I can reduce that time?
My website is currently hosted on localhost(XAMPP).
index.php
$fk = new Flipkart();
$fk->getProductFeedJason($fk->result);
for($i=0; $i<51; $i++)
{
if($fk->flag==1)
break;
$fk->curl($fk->links[$i]);
$fk->getProducts($fk->result, $sstring);
}
flipkart.php
<?php
class Flipkart
{
private $baseUrl = "https://affiliate-api.flipkart.net/affiliate/api/psblesson.json";
private $headers = array(
'Fk-Affiliate-Id: id',
'Fk-Affiliate-Token: token'
);
public $result;
public $links;
public $mainLinksCount=0;
public $pc=0;
public $id;
public $title;
public $image;
public $sellingPrice;
public $maximumRetailPrice;
public $productURL;
public $flag=0;
function __construct()
{
ini_set('max_execution_time', 0);
ini_set('memory_limit', '1024M');
$this->curl($this->baseUrl);
}
public function curl($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$this->result = curl_exec($ch);
curl_close($ch);
}
public function getProductFeedJason($res)
{
$i=0;
$productFeeds = json_decode($res, TRUE);
$this->links = $productFeeds['apiGroups']['affiliate']['apiListings'];
foreach($this->links as $keys=>$value)
{
$this->links[$i] = $value['availableVariants']['v0.1.0']['get'];
//echo $this->links[$i];
$i = $i + 1;
}
}
public function getProducts($res, $str)
{
$products = null;
$products = json_decode($res, TRUE);
$outerPart = $products['productInfoList'];
$nextUrl = $products['nextUrl'];
//echo $nextUrl.'<br />';
foreach($outerPart as $data)
{
$t = $data['productBaseInfo']['productAttributes']['title'];
similar_text($str, $t, $percent);
if($percent>70)
{
$this->id[$this->pc] = $data['productBaseInfo']['productIdentifier'] ['productId'];
//echo $this->id[$this->pc].'<br />';
$this->title[$this->pc] = $data['productBaseInfo']['productAttributes']['title'];
//echo $this->title[$this->pc].'<br />';
$this->image[$this->pc] = $data['productBaseInfo']['productAttributes']['imageUrls']['400x400'];
//echo $this->image[$this->pc].'<br />';
$this->sellingPrice[$this->pc] = $data['productBaseInfo']['productAttributes']['sellingPrice']['amount'];
//echo $this->sellingPrice[$this->pc].'<br />';
$this->maximumRetailPrice[$this->pc] = $data['productBaseInfo']['productAttributes']['maximumRetailPrice']['amount'];
//echo $this->maximumRetailPrice[$this->pc].'<br />';
$this->productUrl[$this->pc] = $data['productBaseInfo']['productAttributes']['productUrl'];
//echo $this->productUrl[$this->pc].'<br />';
$this->pc = $this->pc+1;
if($this->pc >=10)
{
$this->flag=1;
break;
}
}
}
if($nextUrl && $this->flag==0)
{
$this->curl($nextUrl);
$this->getProducts($this->result, $str);
}
}
}
?>
So I downloaded a wrapper class from this github link:
https://github.com/ignaciovazquez/Highrise-PHP-Api
and I'm just trying to get any response whatsoever. So far, I can't even authenticate with my credentials so I was wondering if any who has used the API could help me.
I tried running one of the test files on Terminal with no arguments and this is what it told me:
Usage: php users.test.php [account-name] [access-token]
Alright, so then decided to get my credentials. So this is what I understand, and, please, correct if I'm wrong:
the account-name is that part that goes in the url to your highrise account. So if your url is:
https://exampleaccount.highrisehq.com/
then your account name is: "exampleaccount"
and your access token is your authentication token that you can find by going clicking on My info > API token inside your Highrise account.
Is that right?
Well anyways, I enter this info and script terminates with a fatal error and this message:
Fatal error: Uncaught exception 'Exception' with message 'API for User returned Status Code: 0 Expected Code: 200' in /Users/me/Sites/sandbox/PHP/highrise_api_class/lib/HighriseAPI.class.php:137
Stack trace:
#0 /Users/me/Sites/sandbox/PHP/highrise_api_class/lib/HighriseAPI.class.php(166): HighriseAPI->checkForErrors('User')
#1 /Users/me/Sites/sandbox/PHP/highrise_api_class/test/users.test.php(13): HighriseAPI->findMe()
#2 {main}
thrown in /Users/me/Sites/sandbox/PHP/highrise_api_class/lib/HighriseAPI.class.php on line 137
I'm complete n00b and I don't really understand what it's saying so I was wondering if any could help. It would be greatly appreciated.
The source of the test script (users.test.php) is:
<?php
require_once("../lib/HighriseAPI.class.php");
if (count($argv) != 3)
die("Usage: php users.test.php [account-name] [access-token]\n");
$hr = new HighriseAPI();
$hr->debug = false;
$hr->setAccount($argv[1]);
$hr->setToken($argv[2]);
print "Finding my user...\n";
$user = $hr->findMe();
print_r($user);
print "Finding all users...\n";
$users = $hr->findAllUsers();
print_r($users);
?>
and the source to the Highrise API wrapper file (Highrise.API.class) is:
<?php
/*
* http://developer.37signals.com/highrise/people
*
* TODO LIST:
* Add Tasks support
* Get comments for Notes / Emails
* findPeopleByTagName
* Get Company Name, etc proxy
* Convenience methods for saving Notes $person->saveNotes() to check if notes were modified, etc.
* Add Tags to Person
*/
class HighriseAPI
{
public $account;
public $token;
protected $curl;
public $debug;
public function __construct()
{
$this->curl = curl_init();
curl_setopt($this->curl,CURLOPT_RETURNTRANSFER,true);
curl_setopt($this->curl, CURLOPT_HTTPHEADER, array('Accept: application/xml', 'Content-Type: application/xml'));
// curl_setopt($curl,CURLOPT_POST,true);
curl_setopt($this->curl,CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($this->curl,CURLOPT_SSL_VERIFYHOST,0);
}
public function setAccount($account)
{
$this->account = $account;
}
public function setToken($token)
{
$this->token = $token;
curl_setopt($this->curl,CURLOPT_USERPWD,$this->token.':x');
}
protected function postDataWithVerb($path, $request_body, $verb = "POST")
{
$this->curl = curl_init();
$url = "https://" . $this->account . ".highrisehq.com" . $path;
if ($this->debug)
print "postDataWithVerb $verb $url ============================\n";
curl_setopt($this->curl, CURLOPT_URL,$url);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $request_body);
if ($this->debug == true)
curl_setopt($this->curl, CURLOPT_VERBOSE, true);
curl_setopt($this->curl, CURLOPT_HTTPHEADER, array('Accept: application/xml', 'Content-Type: application/xml'));
curl_setopt($this->curl, CURLOPT_USERPWD,$this->token.':x');
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER,true);
if ($verb != "POST")
curl_setopt($this->curl, CURLOPT_CUSTOMREQUEST, $verb);
else
curl_setopt($this->curl, CURLOPT_POST, true);
$ret = curl_exec($this->curl);
if ($this->debug == true)
print "Begin Request Body ============================\n" . $request_body . "End Request Body ==============================\n";
curl_setopt($this->curl,CURLOPT_HTTPGET, true);
return $ret;
}
protected function getURL($path)
{
curl_setopt($this->curl, CURLOPT_HTTPHEADER, array('Accept: application/xml', 'Content-Type: application/xml'));
curl_setopt($this->curl, CURLOPT_USERPWD,$this->token.':x');
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER,true);
$url = "https://" . $this->account . ".highrisehq.com" . $path;
if ($this->debug == true)
curl_setopt($this->curl, CURLOPT_VERBOSE, true);
curl_setopt($this->curl,CURLOPT_URL,$url);
$response = curl_exec($this->curl);
if ($this->debug == true)
print "Response: =============\n" . $response . "============\n";
return $response;
}
protected function getLastReturnStatus()
{
return curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
}
protected function getXMLObjectForUrl($url)
{
$xml = $this->getURL($url);
$xml_object = simplexml_load_string($xml);
return $xml_object;
}
protected function checkForErrors($type, $expected_status_codes = 200)
{
if (!is_array($expected_status_codes))
$expected_status_codes = array($expected_status_codes);
if (!in_array($this->getLastReturnStatus(), $expected_status_codes))
{
switch($this->getLastReturnStatus())
{
case 404:
throw new Exception("$type not found");
break;
case 403:
throw new Exception("Access denied to $type resource");
break;
case 507:
throw new Exception("Cannot create $type: Insufficient storage in your Highrise Account");
break;
default:
throw new Exception("API for $type returned Status Code: " . $this->getLastReturnStatus() . " Expected Code: " . implode(",", $expected_status_codes));
break;
}
}
}
/* Users */
public function findAllUsers()
{
$xml = $this->getUrl("/users.xml");
$this->checkForErrors("User");
$xml_object = simplexml_load_string($xml);
$ret = array();
foreach($xml_object->user as $xml_user)
{
$user = new HighriseUser();
$user->loadFromXMLObject($xml_user);
$ret[] = $user;
}
return $ret;
}
public function findMe()
{
$xml = $this->getUrl("/me.xml");
$this->checkForErrors("User");
$xml_obj = simplexml_load_string($xml);
$user = new HighriseUser();
$user->loadFromXMLObject($xml_obj);
return $user;
}
/* Tasks */
public function findCompletedTasks()
{
$xml = $this->getUrl("/tasks/completed.xml");
$this->checkForErrors("Tasks");
return $this->parseTasks($xml);
}
public function findAssignedTasks()
{
$xml = $this->getUrl("/tasks/assigned.xml");
$this->checkForErrors("Tasks");
return $this->parseTasks($xml);
}
public function findUpcomingTasks()
{
$xml = $this->getUrl("/tasks/upcoming.xml");
$this->checkForErrors("Tasks");
return $this->parseTasks($xml);
}
private function parseTasks($xml)
{
$xml_object = simplexml_load_string($xml);
$ret = array();
foreach($xml_object->task as $xml_task)
{
$task = new HighriseTask($this);
$task->loadFromXMLObject($xml_task);
$ret[] = $task;
}
return $ret;
}
public function findTaskById($id)
{
$xml = $this->getURL("/tasks/$id.xml");
$this->checkForErrors("Task");
$task_xml = simplexml_load_string($xml);
$task = new HighriseTask($this);
$task->loadFromXMLObject($task_xml);
return $task;
}
/* Notes & Emails */
public function findEmailById($id)
{
$xml = $this->getURL("/emails/$id.xml");
$this->checkForErrors("Email");
$email_xml = simplexml_load_string($xml);
$email = new HighriseEmail($this);
$email->loadFromXMLObject($email_xml);
return $email;
}
public function findNoteById($id)
{
$xml = $this->getURL("/notes/$id.xml");
$this->checkForErrors("Note");
$note_xml = simplexml_load_string($xml);
$note = new HighriseNote($this);
$note->loadFromXMLObject($note_xml);
return $note;
}
public function findPersonById($id)
{
$xml = $this->getURL("/people/$id.xml");
$this->checkForErrors("Person");
$xml_object = simplexml_load_string($xml);
$person = new HighrisePerson($this);
$person->loadFromXMLObject($xml_object);
return $person;
}
public function findAllTags()
{
$xml = $this->getUrl("/tags.xml");
$this->checkForErrors("Tags");
$xml_object = simplexml_load_string($xml);
$ret = array();
foreach($xml_object->tag as $tag)
{
$ret[(string)$tag->name] = new HighriseTag((string)$tag->id, (string)$tag->name);
}
return $ret;
}
public function findAllPeople()
{
return $this->parsePeopleListing("/people.xml");
}
public function findPeopleByTagName($tag_name)
{
$tags = $this->findAllTags();
foreach($tags as $tag)
{
if ($tag->name == $tag_name)
$tag_id = $tag->id;
}
if (!isset($tag_id))
throw new Excepcion("Tag $tag_name not found");
return $this->findPeopleByTagId($tag_id);
}
public function findPeopleByTagId($tag_id)
{
$url = "/people.xml?tag_id=" . $tag_id;
$people = $this->parsePeopleListing($url);
return $people;
}
public function findPeopleByEmail($email)
{
return $this->findPeopleBySearchCriteria(array("email"=>$email));
}
public function findPeopleByTitle($title)
{
$url = "/people.xml?title=" . urlencode($title);
$people = $this->parsePeopleListing($url);
return $people;
}
public function findPeopleByCompanyId($company_id)
{
$url = "/companies/" . urlencode($company_id) . "/people.xml";
$people = $this->parsePeopleListing($url);
return $people;
}
public function findPeopleBySearchTerm($search_term)
{
$url = "/people/search.xml?term=" . urlencode($search_term);
$people = $this->parsePeopleListing($url, 25);
return $people;
}
public function findPeopleBySearchCriteria($search_criteria)
{
$url = "/people/search.xml";
$sep = "?";
foreach($search_criteria as $criteria=>$value)
{
$url .= $sep . "criteria[" . urlencode($criteria) . "]=" . urlencode($value);
$sep = "&";
}
$people = $this->parsePeopleListing($url, 25);
return $people;
}
public function findPeopleSinceTime($time)
{
$url = "/people/search.xml?since=" . urlencode($time);
$people = $this->parsePeopleListing($url);
return $people;
}
public function parsePeopleListing($url, $paging_results = 500)
{
if (strstr($url, "?"))
$sep = "&";
else
$sep = "?";
$offset = 0;
$return = array();
while(true) // pagination
{
$xml_url = $url . $sep . "n=$offset";
// print $xml_url;
$xml = $this->getUrl($xml_url);
$this->checkForErrors("People");
$xml_object = simplexml_load_string($xml);
foreach($xml_object->person as $xml_person)
{
// print_r($xml_person);
$person = new HighrisePerson($this);
$person->loadFromXMLObject($xml_person);
$return[] = $person;
}
if (count($xml_object) != $paging_results)
break;
$offset += $paging_results;
}
return $return;
}
}
Sorry it's such a long file but if it helps, then so be it.
EDIT: So I guess I got it to work. I should've said that I was trying to test this library out on my local server and for some reason it would keep failing but when I moved the script to my development server on Rackspace cloud then it would work. This just puzzles me. Both servers have support for PHP curl so I can't really understand where the problem is.
EDIT: I'm not sure what the difference between the two server configurations could be but anyways here's a couple of screenshots from my phpinfo function output from both servers of my curl configuration:
Localhost server:
and the rackspace cloud server:
The fork of the API at...
https://github.com/AppSaloon/Highrise-PHP-Api
...seems more developed and better maintained.
Not so much as to provide an answer, but more a better starting point.
Ah, since there is really no HTTP error code 0 I expect that your request isn't being made to Highrise's website, or you are not correctly passing in the account name and token to the class. Can you include the source of your users.test.php class?
EDIT: tested the class and your code, and it works for me. You probably either copied the library file wrong or have your token copied wrong.
I had the same issue. I definitely had the wrong account. I had https://foo.highrisehq.com instead of just foo.
I'm trying to make a script that will load a desired URL (as entered by user) and check if that page links back to my domain before their domain is published on my site. I'm not very experienced with regular expressions and this is what I have so far:
$loaded = file_get_contents('http://localhost/small_script/page.php');
// $loaded will be equal to the users site they have submitted
$current_site = 'site2.com';
// $current_site is the domain of my site, this the the URL that must be found in target site
$matches = Array();
$find = preg_match_all('/<a(.*?)href=[\'"](.*?)[\'"](.*?)\b[^>]*>(.*?)<\/a>/i', $loaded, $matches);
$c = count($matches[0]);
$z = 0;
while($z<$c){
$full_link = $matches[0][$z];
$href = $matches[2][$z];
$z++;
$check = strpos($href,$current_site);
if($check === false) {
}else{
// The link cannot have the "no follow" tag, this is to check if it does and if so, return a specific error
$pos = strpos($full_link,'no follow');
if($pos === false) {
echo $href;
}
else {
//echo "rel=no follow FOUND";
}
}
}
As you can see, it's pretty messy and I'm entirely sure where it's headed. I was hoping someone could give me a small, fast and concise script that would do exactly what I've attempted.
Load specified URL as entered by user
Check if specified URL links back to my site (if not, return error code #1)
If link is there, check for 'no follow', if found return error code #2
If everything is OK, set a variable to true, so I can continue with other functions (like displaying their link on my page)
this is the code :)
helped by http://www.merchantos.com/makebeta/php/scraping-links-with-php/
<?php
$my_url = 'http://online.bulsam.net';
$target_url = 'http://www.bulsam.net';
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
// make the cURL request to $target_url
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$target_url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html= curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}
// parse the html into a DOMDocument
$dom = new DOMDocument();
#$dom->loadHTML($html);
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
// find result
$result = is_my_link_there($hrefs, $my_url);
if ($result == 1) {
echo 'There is no link!!!';
} elseif ($result == 2) {
echo 'There is, but it is NO FOLLOW !!!';
} else {
// blah blah blah
}
// used functions
function is_my_link_there($hrefs, $my_url) {
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
if ($my_url == $url) {
$rel = $href->getAttribute('rel');
if ($rel == 'nofollow') {
return 2;
}
return 3;
}
}
return 1;
}