I'm having trouble scraping an ASPX.NET website. Even after searching through stackoverflow I haven't been able to find a fix.
First off, this is a public accessible website that I'm trying to scrape so that I can email myself a copy occasionally in order to not bother having to go there with a browser.
My code goes so far as to capture the __VIEWSTATE and __EVENTVALIDATION but when I submit the form I get the "Validation of viewstate MAC failed" error.
Any ideas?
<?php
require_once ("simple_html_dom.php");
ini_set('display_errors', 'On');
error_reporting(E_ALL);
// Create curl connection
$url = 'http://www.ariautodirect.com/ui/index.aspx';
$cookieFile = 'cookie.txt';
$ch = curl_init();
// We must request the login page and get the ViewState and EventValidation hidden values
// and pass those along in the post request.
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setOpt($ch, CURLOPT_REFERER, 'http://www.ariautodirect.com/ui/index.aspx');
curl_setopt($ch, CURLOPT_HTTPHEADER,array('Origin: http://www.ariautodirect.com', 'Host: www.ariautodirect.com'));
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
$curl_scraped_page = curl_exec($ch);
// Grab ViewState and EventValidation data
$html = str_get_html($curl_scraped_page);
$viewState = $html->find("#__VIEWSTATE", 0);
$eventValidation = $html->find("#__EVENTVALIDATION", 0);
$previousPage = $html->find("#__PREVIOUSPAGE", 0);
//create array of data to be posted
// This matches exactly what I am seeing being posted when looking at Fiddler
$post_data['__EVENTTARGET'] = '';
$post_data['__EVENTARGUMENT'] = '';
$post_data['__VIEWSTATE'] = $viewState->value;
$post_data['__EVENTVALIDATION'] = $eventValidation->value;
$post_data['__PREVIOUSPAGE'] = 'Kn9nmdEGkbDIIX-v7Z_vg0t3njAt1lzkUm-uXH6djRcAGjPo6-w6RWKF3BBQz1ijEoJZGJVqOvHTqB5ghEU40C8xhnw1';//hard-coded because $previousPage->value throughs error
$post_data['ctl00$HeaderCtl1$LoginControl1$txtUserName'] = '';
$post_data['ctl00$HeaderCtl1$LoginControl1$txtPassword'] = '';
$post_data['ctl00$HeaderCtl1$LoginControl1$txtClientCode'] = '';
$post_data['ctl00$SearchCriteria1$drpDownMiles'] = '50';
$post_data['ctl00$SearchCriteria1$txtZipCode'] = '30301';
$post_data['ctl00$SearchCriteria1$ddlMake'] = 'All Makes ';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender1_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender2_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender3_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender4_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender5_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender6_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender7_ClientState'] = 'true';
$post_data['ctl00$SearchCriteria1$CollapsiblePanelExtender8_ClientState'] = 'true';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$drpDownMiles'] = '50';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$txtZipCode'] = '30301';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$btnSearch'] = 'Search';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$drpDownPageList'] = '100';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$txtVinSearch'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$SrtxtClientId'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$SrtxtVehicleNo'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$hdnSortYear'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$hdnSortModel'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$hdnSortMileage'] = '';
$post_data['ctl00$ContentPlaceHolder1$DataGridResults1$hdnSortPrice'] = '';
//traverse array and prepare data for posting (key1=value1)
foreach ( $post_data as $key => $value) {
$post_items[] = rawurlencode($key) . '=' . rawurlencode($value);
}
//create the final string to be posted using implode()
$post_string = implode ('&', $post_items);
$post_string = http_build_query($post_data);
//Set options for post
$urlAcctSummary = "http://www.ariautodirect.com/ui/DisplayResults.aspx";
curl_setopt($ch, CURLOPT_URL, $urlAcctSummary);
curl_setOpt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_HTTPHEADER,array('Origin: http://www.ariautodirect.com', 'Host: www.ariautodirect.com', 'Content-Type: application/x-www-form-urlencoded'));
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setOpt($ch, CURLOPT_REFERER, 'http://www.ariautodirect.com/ui/index.aspx');
// Perform our post request
$curl_scraped_page = curl_exec($ch);
echo $curl_scraped_page;
curl_close($ch);
?>
Related
I have a simple code that I want when the name of a bank is empty and a site binlist has not obtained it from a database that writes something else in its place, for example : N/A
$BIN = str_replace(' ', '',$PCT);
$BIN = substr($BIN, 0, 6);
$url = "https://lookup.binlist.net/" . $BIN;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$resp = curl_exec($ch);
curl_close($ch);
$details = json_decode($resp, true);
$countryname = strtoupper($details['country']['name']);
$cardtype = ucwords($details['type']);
$cardbrand = ucwords($details['brand']);
$bankname = strtoupper($details['bank']['name']);
$_SESSION['bankname'] = $bankname;
Why not simply check if it empty and assign the value in this case?
For example:
$bankname = empty($details['bank']['name']) ? 'N/A' : strtoupper($details['bank']['name']);
When i am Decoding using commented "$jsonString" String it is working very well.
But after using curl it is not working, showing Null.
Please Help Me in this.
if (isset($_POST['dkno'])) {
$dcktNo = $_POST['dkno'];
$url = 'http://ExampleStatus.php?dkno=' . $dcktNo;
$myvars = '';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $myvars);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$jsonString = curl_exec($ch);
// $jsonString = '[{"branchname":"BHUBNESHWAR","consignee":"ICICI BANK LTD","currentstatus":"Delivered by : BHUBNESHWAR On - 25/07/2015 01:00","dlyflag":"Y","PODuploaded":"Not Uploaded"}]';
if ($jsonString != '') {
$json = str_replace(array('[', ']'), '', $jsonString);
echo $json;
$obj = json_decode($json);
if (is_null($obj)) {
die("<br/>Invalid JSON, don't need to keep on working on it");
} else {
$podStatus = $obj->PODuploaded;
}
}
}
}
After curl I used following concept to get only JSON data from HTML Page.
1) fetchData.php
$url = 'http://DocketStatusApp.aspx?dkno=' . $dcktNo;
$myvars = '';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $myvars);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$jsonString = curl_exec($ch);
// now get only value
$dom = new DOMDocument();
$dom->loadHTML($jsonString);
$thediv = $dom->getElementById('Label1');
echo $thediv->textContent;
2) JSONprocess.php
if (isset($_POST['dkno'])) {
$dcktNo = $_POST['dkno'];
ob_start(); // begin collecting output
include_once 'fetchData.php';
$result = ob_get_clean(); // Completed collecting output
// Now it will show & take only JSON Data from Div Tag
$json = str_replace(array('[', ']'), '', $result);
$obj = json_decode($json);
if (is_null($obj)) {
die("<br/>Invalid JSON, don't need to keep on working on it");
} else {
$podStatus = $obj->PODuploaded;
}
}
Is there any way in which i can use the WHMCS API without displaying WHMCS to the clients and users.
I want my PHP scripts to first create a WHMCS client, add an order for the client and then copy some files to the client's directory.
But i don't want my clients to be able to login to their WHMCS panel or even be able to see the WHMCS
WHMCS has something called External API that will help you.
Here is the documentation. But for what you need yo should do this:
Connect to the API
$url = "http://www.yourdomain.com/includes/api.php"; # URL to WHMCS API file goes here
$username = "Admin"; # Admin username goes here
$password = "demoxyz"; # Admin password goes here
Add the Client
$postfields = array();
$postfields["username"] = $username;
$postfields["password"] = md5($password);
$postfields["action"] = "addclient";
$postfields["firstname"] = "Test";
$postfields["lastname"] = "User";
$postfields["companyname"] = "WHMCS";
$postfields["email"] = "demo#whmcs.com";
$postfields["address1"] = "123 Demo Street";
$postfields["city"] = "Demo";
$postfields["state"] = "Florida";
$postfields["postcode"] = "AB123";
$postfields["country"] = "US";
$postfields["phonenumber"] = "123456789";
$postfields["password2"] = "demo";
$postfields["customfields"] = base64_encode(serialize(array("1"=>"Google")));
$postfields["currency"] = "1";
$query_string = "";
foreach ($postfields AS $k=>$v) $query_string .= "$k=".urlencode($v)."&";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $query_string);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$jsondata = curl_exec($ch);
if (curl_error($ch)) die("Connection Error: ".curl_errno($ch).' - '.curl_error($ch));
curl_close($ch);
$arr = json_decode($jsondata); # Decode JSON String
print_r($arr); # Output XML Response as Array
Add the Order
$postfields = array();
$postfields["username"] = $username;
$postfields["password"] = md5($password);
$postfields["action"] = "addorder";
$postfields["clientid"] = "1";
$postfields["pid"] = "1";
$postfields["domain"] = "whmcs.com";
$postfields["billingcycle"] = "monthly";
$postfields["addons"] = "1,3,9";
$postfields["customfields"] = base64_encode(serialize(array("1"=>"Google")));
$postfields["domaintype"] = "register";
$postfields["regperiod"] = "1";
$postfields["paymentmethod"] = "mailin";
$query_string = "";
foreach ($postfields AS $k=>$v) $query_string .= "$k=".urlencode($v)."&";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $query_string);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$jsondata = curl_exec($ch);
if (curl_error($ch)) die("Connection Error: ".curl_errno($ch).' - '.curl_error($ch));
curl_close($ch);
$arr = json_decode($jsondata); # Decode JSON String
print_r($arr); # Output XML Response as Array
Then you can copy the files to the client's directory. Hope it helps!
I'm trying to use Spreadshirt API to create a product on their platform, but i'm stuck with this weird error:
Fatal error: Uncaught exception 'Exception' with message 'String could not be parsed as XML' in /home/anarchoi/public_html/test.php:102 Stack trace: #0 /home/anarchoi/public_html/test.php(102): SimpleXMLElement->__construct('') #1 {main} thrown in /home/anarchoi/public_html/test.php on line 102
Most of the code is just copied from their wiki so i really don't understand why it doesn't work.
I'm looking for help to understand where the error is coming from and why it is happening.
$productTypeId = "210";
$printTypeId = "17";
$printColorIds = "13,20";
$productTypeAppearanceId = "1";
$productTypePrintAreaId = "4";
$designId = "10438193";
// 1. Get shop data
$shopUrl = "http://api.spreadshirt.com/api/v1/shops/266497";
$ch = curl_init($shopUrl);
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
$result = curl_exec($ch);
// Close the handle
curl_close($ch);
$shop = new SimpleXMLElement($result);
$namespaces = $shop->getNamespaces(true);
// 2. Get product type data
$attributes = $shop->productTypes->attributes($namespaces['xlink']);
$productTypeUrl = $attributes->href . "/" . $productTypeId;
$ch = curl_init($productTypeUrl);
// echo "<br>$productTypeUrl<br>";
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
$result = curl_exec($ch);
// Close the handle
curl_close($ch);
$productType = new SimpleXMLElement($result);
// 3. Get design data
$attributes = $shop->designs->attributes($namespaces['xlink']);
$designUrl = $attributes->href . "/" . $designId;
$ch = curl_init($designUrl);
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
$result = curl_exec($ch);
// Close the handle
curl_close($ch);
$design = new SimpleXMLElement($result);
// 4. Prepare product
// get positioning data for selected product type
$printArea = null;
foreach ($productType->printAreas->printArea as $current) {
if ($current['id'] == $productTypePrintAreaId) {
$printArea = $current;
}
}
$product = new SimpleXMLElement(getFileData("product.xml"));
$product->productType['id'] = $productTypeId;
$product->appearance['id'] = $productTypeAppearanceId;
$configuration = $product->configurations->configuration;
$configuration->printArea['id'] = $productTypePrintAreaId;
$configuration->printType['id'] = $printTypeId;
$configuration->offset->x =
((doubleval($printArea->boundary->size->width) - doubleval($design->size->width)) / 2);
$configuration->offset->y =
((doubleval($printArea->boundary->size->height) - doubleval($design->size->height)) / 4);
$image = $product->configurations->configuration->content->svg->image;
$image['width'] = $design->size->width;
$image['height'] = $design->size->height;
$image['designId'] = $designId;
$image['printColorIds'] = $printColorIds;
// 5. Create product
$attributes = $shop->products->attributes($namespaces['xlink']);
$productsUrl = $attributes->href;
$header = array();
$header[] = createSprdAuthHeader("POST", $productsUrl);
$header[] = "Content-Type: application/xml";
$ch = curl_init("$productsUrl"."?fullData=true");
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POSTFIELDS, $product->asXML());
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
$result = curl_exec($ch);
// Close the handle
curl_close($ch);
$productUrl = parseHttpHeaders($result, "Location");
$ch = curl_init($productUrl);
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
$result = curl_exec($ch);
// Close the handle
curl_close($ch);
$product = new SimpleXMLElement($result);
$resource = $product->resources->resource[0];
$attributes = $resource->attributes($namespaces['xlink']);
echo '<html><body>';
echo 'Product available at: ' . $productUrl . '</br>';
echo 'Product image is available at: ' . $attributes->href . '</br>';
echo '<img src="' . $attributes->href . '?width=1000"/>';
echo '</body></html>';
function createSprdAuthHeader($method, $url) {
$apiKey = "***";
$secret = "***";
$time = time()*1000;
$data = "$method $url $time";
$sig = sha1("$data $secret");
return "Authorization: SprdAuth apiKey=\"$apiKey\", data=\"$data\", sig=\"$sig\"";
}
function parseHttpHeaders( $header, $headername ) {
$retVal = array();
$fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $header));
foreach( $fields as $field ) {
if( preg_match('/('.$headername.'): (.+)/m', $field, $match) ) {
return $match[2];
}
}
return $retVal;
}
function getFileData($file) {
$fp = fopen($file, "r");
$data = "";
while(!feof($fp)) {
$data .= fgets($fp, 1024);
}
fclose($fp);
return $data;
}
product.xml = https://www.ni-dieu-ni-maitre.com/product.xml
I stumbled upon similar issue when implementing Spreadshirt, looks like their API server is sending (at least some) content gzipped regardless of any Accept-Encoding headers. You can tell that it's your case by var_dumping the string before it's passed to SimpleXMLElement (as suggested by others) – if it's gibberish, it's very possible you have the same issue as I did.
Setting the curl option of CURLOPT_ENCODING to an empty string ('') fixed that for me – it "magically" turned on ungzipping the response (see man page for curl_setopt() for more information).
I am fairly new to php/cURL and i am trying to login to Barnes and noble website through php and access the hot books section i.e my php code when run should login to my account and display "Hot-selling books" section of the Barnes and nobles site. I have attached the code and the loging works fine but i do not know how to access the HOTseller book page after that, Any help would be great..
Thanks
<?php
// options
$EMAIL = 'xxxx';
$PASSWORD = 'yyyy';
$cookie_file_path = "/tmp/cookies.txt";
$LOGINURL = "https://cart2.barnesandnoble.com/mobileacct/op.asp?stage=signIn";
$agent = "Nokia-Communicator-WWW-Browser/2.0 (Geos 3.0 Nokia-9000i)";
// begin script
$ch = curl_init();
// extra headers
$headers[] = "Accept: */*";
$headers[] = "Connection: Keep-Alive";
// basic curl options for all requests
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
// set first URL
curl_setopt($ch, CURLOPT_URL, $LOGINURL);
// execute session to get cookies and required form inputs
$content = curl_exec($ch);
// grab the hidden inputs from the form required to login
$fields = getFormFields($content);
$fields['emailAddress'] = $EMAIL;
$fields['acctPassword'] = $PASSWORD;
// get x value that is used in the login url
$x = '';
if (preg_match('/op\.asp\?x=(\d+)/i', $content, $match)) {
$x = $match[1];
}
//$LOGINURL = "https://cart2.barnesandnoble.com/mobileacct/op.asp?stage=signIn";
$LOGINURL = "https://cart2.barnesandnoble.com/mobileacct/op.asp?stage=signIn";
// set postfields using what we extracted from the form
$POSTFIELDS = http_build_query($fields);
// change URL to login URL
curl_setopt($ch, CURLOPT_URL, $LOGINURL);
// set post options
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS);
// perform login
$result = curl_exec($ch);
print $result;
function getFormFields($data)
{
if (preg_match('/(<form action="op.*?<\/form>)/is', $data, $matches)) {
$inputs = getInputs($matches[1]);
return $inputs;
} else {
die('didnt find login form');
}
}
function getInputs($form)
{
$inputs = array();
$elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
if ($elements > 0) {
for($i = 0; $i < $elements; $i++) {
$el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
$name = $name[1];
$value = '';
if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
$value = $value[1];
}
$inputs[$name] = $value;
}
}
}
return $inputs;
}