We have one search engine which was programed in asp.net.
I manage to curl individual results as they have a more or less direct link that I can predict and curl.
But I don't manage to curl the result list, here is how it works :
On the search page we have to select the database we want to search via a checkbox menu.
Once I check the db I want to search, I click on "Search" button which forwards me to the search page taking the db chosen in consideration.
If I try to go to the search page with a direct link, it doesn't work as it does not know in which db the search will be.
I tried to look at the post parameters with Firebug and I got the following :
Checkbox_db1 on
__EVENTARGUMENT
__EVENTTARGET LinkButtonCategory
__VIEWSTATE zeyhbf5vg41g6a4f1ezragf136er46ga4gfv658a4r6g4 (something looking like that but longer)
Here is what I try in curl :
$ch = curl_init();
$fields = array ('Checkbox_db1' => 'on', '__EVENTARGUMENT' => '',
'__EVENTTARGET' => 'LinkButtonCategory', '__VIEWSTATE' => '');
$postvars = '';
foreach($fields as $key=>$value)
{
$postvars .= $key.'='.$value.'&';
}
rtrim ($postvars, '&');
curl_setopt ($ch, CURLOPT_URL, "monsite.com/choosedb.aspx");
curl_setopt ($ch, CURLOPT_POST, count($fields));
curl_setopt ($ch, CURLOPT_POSTFIELDS, $postvars);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
$output1 = curl_exec($ch);
$fields2 = array ('TxtBox1' => 'value1', 'Txtbox2' => 'value2', '__EVENTARGUMENT' => '',
'__EVENTTARGET' => '', '__VIEWSTATE' => '');
$postvars = '';
foreach($fields2 as $key=>$value)
{
$postvars .= $key.'='.$value.'&';
}
rtrim ($postvars, '&');
curl_setopt ($ch, CURLOPT_URL, "monsite.com/search.aspx");
curl_setopt ($ch, CURLOPT_POST, count($fields2));
curl_setopt ($ch, CURLOPT_POSTFIELDS, $postvars);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
$output2 = curl_exec($ch);
But of course this doesn't work.... and the issue is that I am not familiar at all with ASP.NET :/
Anyone could help ? Thanks in advance
So first you get the initial page with a regular curl get.
Then you have to extract the VIEWSTATE parameter :
$regexViewstate = '/__VIEWSTATE\" value=\"(.*)\"/i';
function regexExtract($text, $regex, $regs, $nthValue)
{
if (preg_match($regex, $text, $regs)) {
$result = $regs[$nthValue];
}
else {
$result = "";
}
return $result;
}
$viewstate = regexExtract($data,$regexViewstate,$regs,1);
And you make up your new post :
$postData = '__EVENTARGUMENT=&__EVENTTARGET=LinkButtonCategory&__VIEWSTATE=';
$postData .= rawurlencode($viewstate).'&TxtBox1=value1&TxtBox2=value2';
curl_setOpt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
curl_setopt($ch, CURLOPT_URL, $urlLogin);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
$output = curl_exec($ch);
Related
I'm not able to post values to field and process further steps to download a file. Could anyone suggest where I'm going wrong?
extract($_POST);
$url=header("Location: http://gis.lntecc.com/bwssblnt/Scada.aspx? field1=Kathriguppe%2cSW2DM0402%2c235505H073%2c450");
$fields =array(
"TextBoxFromDate" => urlencode("2014-10-01"),
"TextBoxToDate" => urlencode("2014-10-09")
);
foreach($fields as $key => $value)
$fields_string .= $key.'='.$value.'&';
rtrim($fields, '&');
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, count($fields));
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec($ch);
echo $result;
curl_close($ch);
Where did I make a mistake since I'm not able to post values to the field?
Simply remove that huge space in your URL:
$url=header("Location: http://gis.lntecc.com/bwssblnt/Scada.aspx? field1=Kathriguppe%2cSW2DM0402%2c235505H073%2c450");
Becomes:
$url=header("Location: http://gis.lntecc.com/bwssblnt/Scada.aspx?field1=Kathriguppe%2cSW2DM0402%2c235505H073%2c450");
I have the following php Curl code, to submit a form and get the tables of result
<?php
function httpPost($url,$params)
{
//echo 1;
$postData = '';
//create name value pairs seperated by &
foreach($params as $k => $v)
{
$postData .= $k . '='.$v.'&';
}
rtrim($postData, '&');
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_POST, count($postData));
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$output=curl_exec($ch);
curl_close($ch);
//return $params;
return $output;
}
$params = array(
"form_hf_0"=>null,
"searchMode:edit"=>"Births",
"searchSwitch:birthContainer:regNumber:regNumber"=>null,
"searchSwitch:birthContainer:regNumber:regYear"=>null,
"searchSwitch:birthContainer:subjectName:familyName:edit"=>"smith",
"searchSwitch:birthContainer:subjectName:givenName:edit"=>null,
"searchSwitch:birthContainer:subjectName:otherNames:edit"=>null,
"searchSwitch:birthContainer:fatherGivenName:edit"=>null,
"searchSwitch:birthContainer:fatherOtherNames:edit"=>null,
"searchSwitch:birthContainer:motherGivenName:edit"=>null,
"searchSwitch:birthContainer:motherOtherNames:edit"=>null,
"searchSwitch:birthContainer:dateOfEvent:range:edit"=>true,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateFrom:day"=>01,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateFrom:month"=>01,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateFrom:year"=>1788,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateTo:day"=>31,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateTo:month"=>12,
"searchSwitch:birthContainer:dateOfEvent:switchGroup:range:dateTo:year"=>1913,
"searchSwitch:birthContainer:district:edit"=>null,
"search-button"=>"Search"
);
$param1 = array("username"=>"sa","password"=>"1");
echo httpPost("https://lifelink.bdm.nsw.gov.au/lifelink/familyhistory/search?0-2.IFormSubmitListener-mainContent-form",$params);
?>
The form link is here:https://lifelink.bdm.nsw.gov.au/lifelink/familyhistory/search?0
I have nothing printed.
Can anyone pointed where is wrong?
The result is here http://ec2-54-213-181-25.us-west-2.compute.amazonaws.com/htdocs/lib/CURL/curl.php
Nothing in the table as normal search with family name "smith", range date 1788 to 1914.
I am trying to figure out how to randomly select a proxy ip from a list and then performing a curl with it, and if a fail occurs use a new proxy ip. Here is my working code without the randomization:
$url = "www.example.com";
$loginpassw = 'myproxypw';
$proxy_ip = '23.27.37.128';
$proxy_port = '29842';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_PROXYPORT, $proxy_port);
curl_setopt($ch, CURLOPT_PROXY, $proxy_ip);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $loginpassw);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
$html = curl_exec($ch);
if (strpos($html,'To continue, please type the characters below') !== false) {
echo "now an error has occurred, let's try a new proxy";
}
curl_close($ch);
Ideally the proxy_ip and proxy_port must stay the same in a list of say:
$proxylist = array (
array("ip" => "23.27.37.128", "port" => "29842"),
array("ip" => "23.27.37.111", "port" => "29852"),
array("ip" => "23.27.37.112", "port" => "29742"),
array("ip" => "23.27.37.151", "port" => "29242")
);
I was wondering if I could possibly use shuffle:
shuffle($proxylist);
while($element = array_pop($proxylist)){
return $element;
}
My second question would be the best way of doing this, my PHP is not perfect so I am wondering rather than rewriting the top curl over and over should I store it inside a function?
Any help appreciated.
Thanks,
Simon
Edit:
The following code seems to be working where I have split my code into two functions:
function curltime($url, $proxy_ip, $proxy_port, $loginpassw){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_PROXY, $proxy_ip);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $loginpassw);
curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
return curl_exec($ch);
curl_close($ch);
}
//now let's do the curl
$url = "www.example.com";
$proxylist = array (
array("proxyip" => "23.27.37.128", "proxyport" => "29842"),
array("proxyip" => "23.27.37.111", "proxyport" => "29852"),
array("proxyip" => "23.27.37.112", "proxyport" => "29742"),
array("proxyip" => "23.27.37.151", "proxyport" => "29242")
);
foreach ($proxylist[mt_rand(0,count($proxylist)-1)] as $key => $value) {
$$key = $value;
}
$html = $this->curltime($url, $proxyip, $proxyport, 'somepassword');
if (strpos($html,'To continue, please type the characters below') !== false) {
echo "now we have errors so let's try again"
foreach ($proxylist[mt_rand(0,count($proxylist)-1)] as $key => $value) {
$$key = $value;
}
$html = $this->curltime($url, $proxyip, $proxyport, 'somepassword');
}
$cache .= $html;
Anyone know of a better way for me to do the looping?
To get a random proxy from the list you could use this:
$proxylist[mt_rand(0,count($proxylist)-1)]
Explained:
count($array) Get length of array
mt_rand($x,$y) Get a random number between $x and $y
Edit:
It is totaly possible to do like you did also. Then just always take like the first element of the array.
shuffle($array);
$array[0]
Which of these two options are best for the randomness I can't really say though.
These are the steps I want to do:
Get the HTML code of http://www.skyscanner.es/ , a search of flights.
Get only some part of that HTML: a specific "span" which has the price.
Operate with it.
This is the PHP code what I do:
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_POST, 1);
curl_setopt ($ch, CURLOPT_POSTFIELDS, "from=Bilbao (BIO)&to=Barcelona (BCN)&depdatetext=25/03/2013&sc_returnOrOneWay=2");
$output = curl_exec($ch);
curl_close($ch);
echo $output;
?>
But I get a strange string like this:
‹¥TkoÚ0ý^‰ÿpTi“ê< t%<¤RuR»U+{}4ñ…X5qf›×Pÿûì$ZõÛ‚Äu¬sî=çú:ýÓ믣Éï‡1¤f!àáû§»Ï#ðHül‚àzr ¿n'÷wù!<Åã/x©1yëõÚ_·}©æÁä[°qY"G«–DŸæ 'ý¢Êf!2=x#CÔívKb FÊ\\ ¡àÐÿ,ùjàdf03d²Íу¤|x7&pì$)UÍ€kI®®:]yKe¸8¼;#àv2y€ª),520h’ Ö`R®!§s3i€ !×Èü~Pòm"m¶ÁXUÝDëBô)!“©dÛÝ‚ª9Ïâ°7³‰æ1ö?à¢|ÑÛø*F3z§ânQ¬ÐðÄîhši¢QñYoJ“§¹’ËŒÅÍqñôž'3Ž‚Y“»œ2ƳyBÔÉ7…îÏ®zÏÐ8I£Ý¡~Ë¿°ja‰RÅÍ››—/m!£BêkähÚ§ÌÛ~nÐEýÐýö´0¬iMw¨¨vkÎLw/ÏêeoæÒ&iA^ôÌ3 §Ë$E÷Þ9Ô=<êØ‘3{uûHµß)gºYMÏî…[1—š.³X¡ †¯Ð¡ý M\¤<³FŽÏÆ•{mŒ™ÇWö0öÆ\{ÞÎNˆ bµ¿nœ\d|œÙ›SôÐöÓhøˆÊÎ0Œ•’Ê2¢a?°°ct¥ÙM'›‰ Z×û/6á~¦úië?®Š%—IÚÃIŠ%h+—#‚òÉöfRAB3Gœ"0®sA·¶Àj+Í€g+*8ûH%ƒwµ”÷°¦ú Ç\ä¦ÒåÊ·¿Aí¨îK÷m-¾vñà-ú¡
So, I have not even passed the first step!
I tried to fix it in several ways but I don't know yet what I am doing wrong. I imagine that can be:
The request because I don't add how much adult, children...
The CURLOPT_URL has to be www.skyscanner.es/search.html as the form has in the action.
Not do a POST request, do an cURL directly to an URL like http://www.skyscanner.es/flights/bio/bcn/130325/airfares-from-bilbao-to-barcelona-in-march-2013.html?flt=1
Please can anyone help me?
Thanks in advance!
Edited: I've changed the title, is closer to the problem I have now.
It doesn't matter what message is encoded in the body since you're receiving:
HTTP/1.1 405 Method Not Allowed
which means you can't use POST.
If you'll read all the headers of the response you'll see that one of them says:
Allow: GET, HEAD, OPTIONS, TRACE
If you'll remove the two lines:
curl_setopt ($ch, CURLOPT_POST, 1);
curl_setopt ($ch, CURLOPT_POSTFIELDS, "from=Bilbao (BIO)&to=Barcelona (BCN)");
and change:
curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/");
into:
curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/vuelos/bio/bcn/130325/tarifas-de-bilbao-a-barcelona-en-marzo-2013.html");
It'll work.
Checkout the following code:
<?php
$accept = array(
'type' => array('application/rss+xml', 'application/xml', 'application/rdf+xml', 'text/xml'),
'charset' => array_diff(mb_list_encodings(), array('pass', 'auto', 'wchar', 'byte2be', 'byte2le', 'byte4be', 'byte4le', 'BASE64', 'UUENCODE', 'HTML-ENTITIES', 'Quoted-Printable', '7bit', '8bit'))
);
$header = array(
'Accept: '.implode(', ', $accept['type']),
'Accept-Charset: '.implode(', ', $accept['charset']),
);
$encoding = null;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/vuelos/bio/bcn/130325/tarifas-de-bilbao-a-barcelona-en-marzo-2013.html?flt=1");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt ($ch, CURLOPT_POST, 1);
// curl_setopt ($ch, CURLOPT_POSTFIELDS, "from=Bilbao (BIO)&to=Barcelona (BCN)");
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
$response = curl_exec($ch);
curl_close($ch);
if (!$response) {
// error fetching the response
} else {
echo $response;
}
?>
I thought that it was using POST method because I get a page whithout prices.
Now I realize that the URL were relatives, so scrips were not loaded. I've add base tag.
[code before]
$result = str_replace("<head>", "<head><base href=\"$skyScannerURL\" />", $response);
Now it has styles and try to load something, but it enter in a bucle, the page is reloaded and the URL has a parameter increasing, it is: ?crty=107
The full code:
$accept = array(
'type' => array('application/rss+xml', 'application/xml', 'application/rdf+xml', 'text/xml'),
'charset' => array_diff(mb_list_encodings(), array('pass', 'auto', 'wchar', 'byte2be', 'byte2le', 'byte4be', 'byte4le', 'BASE64', 'UUENCODE', 'HTML-ENTITIES', 'Quoted-Printable', '7bit', '8bit'))
);
$header = array(
'Accept: '.implode(', ', $accept['type']),
'Accept-Charset: '.implode(', ', $accept['charset']),
);
$encoding = null;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/vuelos/bio/bcn/130325/tarifas-de-bilbao-a-barcelona-en-marzo-2013.html?flt=1");
//curl_setopt($ch, CURLOPT_URL, "http://www.skyscanner.es/flights/bio/bcn/130325/airfares-from-bilbao-to-barcelona-in-march-2013.html?flt=1");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
$response = curl_exec($ch);
curl_close($ch);
if (!$response) {
// error fetching the response
} else {
$skyScannerURL = 'http://www.skyscanner.es/';
$result = str_replace("<head>", "<head><base href=\"$skyScannerURL\" />", $response);
echo $result;
}
You can see online here: codepad.viper-7.com
Obvious something is not working well.
Thanks again everyone.
I have some working cURL scripts but have been relying on the remote server for SS validation. I want to add some validation to our cURL scripts so that if they aren't filled out correctly the request won't get sent. I have client side JS validation, but want to duplicate with SS validation.
Here is an example of my cURL script:
<?php
$url = 'https://remoteserver.com/POST.svc/Foo';
$Field1 = $_POST["Field1"];
//other input data
$fields = array(
'Field1'=>urlencode($Field1),
//other input data
);
foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; }
$fields_string = rtrim($fields_string,'& ');
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($ch);
echo $output;
curl_close($ch);
?>
Can I just create an if/else statement like:
<?php
if(empty($_POST['Field1']))) {
echo "Error";
die();
} else {
$url = 'https://remoteserver.com/POST.svc/Foo';
$Field1 = $_POST["Field1"];
//other input data
$fields = array(
'Field1'=>urlencode($Field1),
//other input data
);
foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; }
$fields_string = rtrim($fields_string,'& ');
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($ch);
echo $output;
curl_close($ch);
}
I have ajax stuff that is displayed if the return is "Error". This can't be that easy, is it?
thx
Instead of just returning 'Error', why not return some useful stuff? A PHP structure such as
$request_status = array(
'error' => 1,
'error_msg' => 'Form was not completed properly'
'error_details' => array(
0 => 'Name field not completed',
1 => 'Invalid state specified',
2 => 'Password1 and Password 2 do not match'
)
)
echo json_encode($request_status);
would be of far more use. Your client-side ajax code can simply look for the 'error' parameter in there and either go "Hey, it worked!" or do further based on the error_details you supplied, such as highlighting the form fields that weren't completed correctly.
This also allows you to send back other types of error messages as well, such as saying "hey, the place we're CURLing your form to isn't responding" and the like.