All of my application is written in PHP, bar 1 script which happens to create a md5 hash which is used later via PHP scripts. Problem being they dont match up.
#$linkTrue = ''
And for testing purposes i did this in PHP:
echo md5("");
both return different values. Does anyone know why this is?
echo md5("");
WHOLE PERL SCRIPT (sorry its long)
use strict;
use warnings;
use LWP::Simple;
require LWP::UserAgent;
sub trim($);
use DBI;
use Net::FTP;
use Digest::MD5 qw(md5 md5_hex md5_base64);
print "Content-type: text/html\n\n";
print "<html>\n<head>\n</head><body>\n";
my $ua = LWP::UserAgent->new;
#my %get = ();
#for (split /\&/, $ENV{'QUERY_STRING'}) { my ($key, $val) = split /=/; $val =~ s/\+/ /g; $val =~ s/%([0-9a-fA-F]{2})/chr(hex($1))/ge; $get{$key} = $val; }
#my %post = ();
#for (split /\&/, <STDIN>) { my ($key, $val) = split /=/; $val =~ s/\+/ /g; $val =~ s/%([0-9a-fA-F]{2})/chr(hex($1))/ge; $post{$key} = $val; }
my %get = ('findAllPages' => 'true' );
my %post = ('ki' => '############################' );
sub trim($){
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
sub extention {
my($data) = #_;
if( substr( trim($data), -1) eq "/" ){
my #extArray = ('.html', '.php', '.htm', '.asp', '.shtml', '.aspx');
my $ext = $_;
my $testResponse = $ua->get('http://' . trim($data . "index" . $ext));
my $testResponseCode = $testResponse->code;
if( $testResponseCode == 200 || $testResponseCode == 301 || $testResponseCode == 302 ){
return trim($data . "index" . $ext);
return $data;
if( defined( $get{findAllPages} ) && defined( $post{ki} ) ){
my ($database, $hostname, $port, $password, $user );
$database = "##########";
$hostname = "############";
$password = "##########";
$user = "#########";
my $KI = $post{ki};
# connect to the database
my $dsn = "DBI:mysql:database=$database;host=$hostname;";
my $dbh = DBI->connect($dsn, $user, $password);
my $sth = $dbh->prepare("SELECT * FROM accounts WHERE KI = '$KI' ") or die "Could not select from table" . $DBI::errstr;
if( $sth->rows != 0 ) {
my $ref = $sth->fetchrow_hashref();
my $domain = $ref->{website};
my $DB_username = $ref->{db_name};
my $DB_password = $ref->{db_pass};
my $DB_ftpuser = $ref->{ftpuser};
my $DB_ftppass = $ref->{ftppass};
my $DB_ftpserver = $ref->{ftpserver};
chomp(my $url = trim($domain));
# try and find full path
sub findFullPath {
my($link, $landingPage) = #_;
# strip ./ and / from beggining of string
$link =~ s/^(?:(?:\/)|(?:\.\/))//g;
# find out whether link is backtracing to previous folder
if( $link =~ m/^\.\.\// ) { # link desination is back tracing
if( $landingPage =~ m/(?:(?:\.html)|(?:\.php)|(?:\.htm)|(?:\.asp)|(?:\.shtml)|(?:\.aspx))$/g ) {
# find destination folder from landing page
my #folders = split( "/", $landingPage );
#find size of array
my $foldersSize = scalar #folders;
delete $folders[$foldersSize - 1];
$foldersSize = scalar #folders;
my #backFolders = ( $link =~ m/\.\.\//g ); # get rid of ../
my $amountOfBackFolders = scalar #backFolders; # find how many folders back
for( my $x=0; $x < $amountOfBackFolders; $x++ ) {
my $numberToDelete = ($foldersSize - 1) - $x;
delete $folders[$numberToDelete];
$landingPage = join( "/", #folders );
$link =~ s/\.\.\///g;
return $landingPage . "/" . $link . "\n";
} elsif( $landingPage =~ m/(?:\/)$/g ) {
my #folders = split( "/", $landingPage );
#find size of array
my $foldersSize = scalar #folders;
delete $folders[$foldersSize - 1];
$foldersSize = scalar #folders;
my #backFolders = ( $link =~ m/\.\.\//g ); # get rid of ../
my $amountOfBackFolders = scalar #backFolders; # find how many folders back
for( my $x=0; $x < $amountOfBackFolders; $x++ ) {
my $numberToDelete = ($foldersSize) - $x;
delete $folders[$numberToDelete];
$landingPage = join( "/", #folders );
$link =~ s/\.\.\///g;
return $landingPage . "/" . $link . "\n";
} else {
if( substr( $landingPage, -1) eq "/" ){
return $landingPage . $link;
my #splitLandingPage = split( "/", $landingPage );
my $amountSplit = scalar #splitLandingPage;
my $toDelete = $amountSplit - 1;
my $lastEntry = $splitLandingPage[$toDelete];
if( $lastEntry =~ m/(?:(?:com)|(?:co\.uk)|(?:net)|(?:org)|(?:cc)|(?:tv)|(?:info)|(?:org\.uk)|(?:me\.uk)|(?:biz)|(?:name)|(?:eu)|(?:uk\.com)|(?:eu\.com)|(?:gb\.com)|(?:gb\.net)|(?:uk\.net)|(?:me)|(?:mobi))$/g ) {
return join( "/", #splitLandingPage ) . "/" . $link . "\n";
delete $splitLandingPage[$toDelete];
return join( "/", #splitLandingPage ) . "/" . $link . "\n";
# get HTTP details
my $response = $ua->get('http://' . trim($url));
my $responseCode = $response->code;
my $responseLocation = $response->header( 'Location' );
# contintue only if status code is 200 or 301
if( $responseCode != 200 && $responseCode != 301 && $responseCode != 302 ){
print "<span class=\"red\"> error: http://" . trim($url) . "Domain name invalid, please use differnet domain name: http status - " . $responseCode . "</span><br />\n";
# change url if domain status eq 301
if( $responseCode == 301 || $responseCode == 302 ){
if($response->header( 'Location' ) =~ m/^http:\/\/www\./g ) {
$url = substr( $response->header( 'Location' ), 11 );
}elsif($response->header( 'Location' ) =~ m/^http:\/\//g ) {
$url = substr( $response->header( 'Location' ), 7 );
$url = findFullPath($response->header( 'Location' ), $url);
my #pagesArray = ($url);
my #pagesScannedArray;
my #mainPagesArray;
my #pagesNotScanned;
my $z = 0;
#print "\nGethering all valid links from " . $domain . "...\n\n";
while ( #pagesArray && $z < 100 ) {
# get the next in queue for proccessing
my $page = trim(shift #pagesArray);
if( ! grep {$_ eq trim($page)} #pagesNotScanned ) {
# check page http status
$response = $ua->get("http://" . trim($page));
$responseCode = $response->code;
if( $responseCode == 200 || $responseCode == 301 || $responseCode == 302 ){
# change page url if 301 redirect
if( $responseCode == 301 || $responseCode == 302 ){
if($response->header( 'Location' ) =~ m/^http:\/\/www\./g ) {
$page = substr( $response->header( 'Location' ), 11 );
}elsif($response->header( 'Location' ) =~ m/^http:\/\//g ) {
$page = substr( $response->header( 'Location' ), 7 );
$page = findFullPath($response->header( 'Location' ), $url);
# connect to page and get contents
if( my $pageData = get "http://" . trim($page) ) {
# get all links on page
my #pageLinksArray = ( $pageData =~ m/href=["']([^"']*)["']/g );
# foreach link on the page
foreach( #pageLinksArray ) {
my $link = trim($_);
# remove url if located on same domain
$link =~ s/(?:http:\/\/)?(?:www\.)?$url//g;
# if link is format we are looking for
if( $link =~ m/(?:(?:\.html)|(?:\.php)|(?:\.htm)|(?:\.asp)|(?:\.shtml)|(?:\.aspx)|(?:\/))$/ ) {
# if link is outbound
if( $link =~ m/^http:\/\//g ) {
if( ! grep {$_ eq trim($link)} #pagesNotScanned ) {
if( ! grep {$_ eq trim($page)} #mainPagesArray ) {
push ( #pagesNotScanned, trim($link) );
# find full path for link
my $newUrl = &findFullPath(trim($link), trim($page));
# if link has not already been claimed to be a main page
if( ! grep {$_ eq trim($newUrl)} #mainPagesArray ) {
# if link is not already in queue
if( ! grep {$_ eq trim($newUrl)} #pagesArray ) {
push ( #pagesArray, trim($newUrl) );
if( ! grep {$_ eq trim($page)} #mainPagesArray ) {
push ( #mainPagesArray, trim($page) );
if( ! grep {$_ eq trim($page)} #pagesNotScanned ) {
if( ! grep {$_ eq trim($page)} #mainPagesArray ) {
push ( #pagesNotScanned, trim($page) );
if( scalar #mainPagesArray != 0 ) {
my ($database, $hostname, $port, $password, $user );
$database = $DB_username;
$hostname = "###########";
$password = $DB_password;
$user = $DB_username;
# connect to the database
my $dsn = "DBI:mysql:database=$database;host=$hostname;";
my $dbh = DBI->connect($dsn, $user, $password) or die " error: Couldn't connect to database: " . DBI->errstr;
print "\nTesting links' extentions from " . $domain . "...\n\n";
my $root;
my $ftp = Net::FTP->new($DB_ftpserver, Debug => 0) or die "Cannot connect to $#";
$ftp->login($DB_ftpuser, $DB_ftppass) or die "Cannot login ", $ftp->message;
my #list = $ftp->dir;
if( scalar #list != 0 ) {
foreach( #list ){
if( $_ =~ m/((?:www)|(?:public_html)|(?:htdocs))$/g ){
$root = $1;
if( $root eq "" ) {
print "error: could not identify root directory.<br />\n";
foreach( #mainPagesArray ) {
my $webpage = &extention(trim($_));
if( trim($webpage) ne trim($domain) ){
my $webpageQuote = $dbh->quote("http://www." . $webpage);
my $sth = $dbh->prepare("SELECT * FROM page_names WHERE linkTrue = $webpageQuote ") or die "Could not select from table" . $DBI::errstr;
if( $sth->rows == 0 ) {
print "http://www." . $webpage . "<br />\n";
my $linkTrue = $dbh->quote("http://www." . $webpage);
my $string = ($webpage =~ s/^$domain//g);
my $linkFromRoot = $dbh->quote($root . $webpage);
my $page_name = $dbh->quote("");
my $table_name = $dbh->quote(md5_hex(trim($linkTrue)));
my $navigation = $dbh->quote("");
my $location = $dbh->quote("");
$dbh->do("INSERT INTO page_names (linkFromRoot, linkTrue, page_name, table_name, navigation, location) VALUES ( $linkFromRoot, $linkTrue, $page_name, $table_name, $navigation, $location )") or die " error: Couldn't connect to database: " . DBI->errstr;
print "<span class=\"red\"> error: No pages where found. This CMS is designed for pre-existing sites. Please contact support for more information.</span><br />\n";
print "<span class=\"red\"> error: input key incorrerct.</span><br />\n";
print "<span class=\"red\"> error: This area is forbidden please locate back to</span><br />\n";
print "</body>\n</html>";
I believe its on line 274. The code might be messy but its my first script with perl, only been at it a week.
thing i got it. $dbh->quote() adds single quotes around the value.
When I try it, both programs return a4cbeef10b3c6d44ca30d96370619eef
I have the feeling you're not giving us the whole picture. Show us the code leading up to this. In particular, check for newlines. Have you used chomp in the perl script?
Try for yourself. Here is the complete php script I used:
echo md5("");
And here is a complete perl script I used:
use Digest::Perl::MD5 'md5_hex';
$linkTrue = '';
print md5_hex($linkTrue);
Which if the two scripts is not returning that value for md5? That's the one that has a bug. Log the value that you're passing to md5, (with '[' before and ']' after to detect extra whitespace). Does that value match what you expect?
edit 2:
It looks like you found it, right? It's the single quotes. This:
print md5_hex("''");
Notice the extra quotes. The above line gives me: 486c944b10ef539aa7ba4bfe607861f2
i using a wordpress plugin, i notice that returns a error on
$alias = (string)end(array_keys($settings));
above line .the error is
PHP Strict Standards: Only variables should be passed by reference in on wordpress function
i added that function below. anyone know how to solve that error please, becoz admin dashboard of the plugin not loading because of this error.
* GET modules lists
function load_modules ()
$folder_path = $this->cfg['paths']['plugin_dir_path'] . 'modules/';
$cfgFileName = 'config.php';
// static usage, modules menu order
$menu_order = array();
foreach(glob($folder_path . '*/' . $cfgFileName) as $module_config ){
$module_folder = str_replace($cfgFileName, '', $module_config);
// Turn on output buffering
if( is_file( $module_config ) ) {
require_once( $module_config );
$settings = ob_get_clean(); //copy current buffer contents into $message variable and delete current output buffer
if(trim($settings) != "") {
$settings = json_decode($settings, true);
$alias = (string) end(array_keys($settings));
// create the module folder URI
// fix for windows server
$module_folder = str_replace( DIRECTORY_SEPARATOR, '/', $module_folder );
$__tmpUrlSplit = explode("/", $module_folder);
$__tmpUrl = '';
$nrChunk = count($__tmpUrlSplit);
if($nrChunk > 0) {
foreach ($__tmpUrlSplit as $key => $value){
if( $key > ( $nrChunk - 4) && trim($value) != ""){
$__tmpUrl .= $value . "/";
// get the module status. Check if it's activate or not
$status = false;
// default activate all core modules
if(in_array( $alias, $this->cfg['core-modules'] )) {
$status = true;
// activate the modules from DB status
$db_alias = $this->alias . '_module_' . $alias;
if(get_option($db_alias) == 'true'){
$status = true;
// push to modules array
$this->cfg['modules'][$alias] = array_merge(array(
'folder_path' => $module_folder,
'folder_uri' => $this->cfg['paths']['plugin_dir_url'] . $__tmpUrl,
'db_alias' => $this->alias . '_' . $alias,
'status' => $status
), $settings );
// add to menu order array§ion=Social_Stats
$this->cfg['menu_order'][(int)$settings[$alias]['menu']['order']] = $alias;
// add the menu to next free key
$this->cfg['menu_order'][] = $alias;
// add module to activate modules array
if($status == true){
$this->cfg['activate_modules'][$alias] = true;
// load the init of current loop module
if( $status == true && isset( $settings[$alias]['module_init'] ) ){
if( is_file($module_folder . $settings[$alias]['module_init']) ){
//if( is_admin() ) {
$current_module = array($alias => $this->cfg['modules'][$alias]);
require_once( $module_folder . $settings[$alias]['module_init'] );
// order menu_order ascendent
End rereceives value by reference, but result of function is not variable.
You could rewrite your code.
$array_keys = array_keys($settings);
$alias = (string)end($array_keys);
I would like to be able to set a global username like <anythinghere> as a username value in the $usernames array (in code below). This is so that I can then go and redirect users based on domain, having already been "authenticated".
I will put example in code below.
Can i do something like $usernames = array("", $X) where $X = <anything-so-long-as-not-blank>
Full Code Below:
$usernames = array("", "", "", "", "", "");
$passwords = array("password1", "password2", "password3", "password4", "password5", "password6");
function get_page($username) {
$username = strtolower($username);
switch ($username) {
case "" : return "";
case "" : return "";
case "" : return "";
case "" : return "";
return preg_match('/#domain3\.com$/',$username) ?
"" : "DefaultBackupPage.php";
$page = get_page($_POST['username']);
$found = 0;
if ($usernames[$i] == $_POST["username"])
$found = 1;
if ($found == 0)
header('Location: login.php?login_error=1');
header('Location: '.$page);
header('Location: login.php?login_error=1');
#inhan Has already helped me like a champ. I am wondering if any one can get me over the line? Cheers!
Your code needed a clean-up first. There's a bunch of errors in it if you do a test run. It's also a bit hard to read IMO.
I've attached a working code sample below.
// Get users
$input_pwd = ( isset( $_POST["password"] ) ? $_POST["password"] : '' );
$input_user = ( isset( $_POST["username"] ) ? $_POST["username"] : '' );
// Your pseudo database here ;)
$usernames = array(
"/[a-z][A-Z][0-9]#domain2\.com/", // use an emtpy password string for each of these
"/[^#]+#domain3\.com/" // entries if they don't need to authenticate
$passwords = array( "password1", "password2", "password3", "password4", "", "" );
// Create an array of username literals or patterns and corresponding redirection targets
$targets = array(
"" => "",
"" => "",
"" => "",
"" => "",
"/[a-z][A-Z][0-9]#domain2\.com/" => "",
"/[^#]+#domain3\.com/" => "",
"/.+/" => "",
$logindata = array_combine( $usernames, $passwords );
if ( get_user_data( $input_user, $logindata ) === $input_pwd ) {
$_SESSION["username"] = $input_user;
header('Location: ' . get_user_data( $input_user, $targets ) );
} else {
// Supplied username is invalid, or the corresponding password doesn't match
header('Location: login.php?login_error=1');
function get_user_data ( $user, array $data ) {
$retrieved = null;
foreach ( $data as $user_pattern => $value ) {
if (
( $user_pattern[0] == '/' and preg_match( $user_pattern, $user ) )
or ( $user_pattern[0] != '/' and $user_pattern === $user)
) {
$retrieved = $value;
return $retrieved;
Using Centos 5.5 final, Apache Tomcat 6, Java 1.6.0_24, PHP/Java Bridge 6.2.1, and JasperReports 4.0.1.
PHP fails to add a text value to a java.util.HashMap.
Does any one have any suggestions on how to accomplish this task?
If I comment out the line $map->put("text", "This is a test string");, the report compiles and runs.
* see if the java extension was loaded.
function checkJavaExtension()
$sapi_type = php_sapi_name();
$port = (isset($_SERVER['SERVER_PORT']) && (($_SERVER['SERVER_PORT'])>1024)) ? $_SERVER['SERVER_PORT'] : '8080';
if ($sapi_type == "cgi" || $sapi_type == "cgi-fcgi" || $sapi_type == "cli")
if(!(PHP_SHLIB_SUFFIX=="so" && #dl(''))&&!(PHP_SHLIB_SUFFIX=="dll" && #dl('php_java.dll'))&&!(#include_once("java/"))&&!(require_once("$port/java/")))
return "java extension not installed.";
return "The loaded java extension is not the PHP/Java Bridge";
return true;
* convert a php value to a java one...
* #param string $value
* #param string $className
* #returns boolean success
function convertValue($value, $className)
// if we are a string, just use the normal conversion
// methods from the java extension...
if ($className == 'java.lang.String')
$temp = new Java('java.lang.String', $value);
return $temp;
else if ($className == 'java.lang.Boolean' ||
$className == 'java.lang.Integer' ||
$className == 'java.lang.Long' ||
$className == 'java.lang.Short' ||
$className == 'java.lang.Double' ||
$className == 'java.math.BigDecimal')
$temp = new Java($className, $value);
return $temp;
else if ($className == 'java.sql.Timestamp' ||
$className == 'java.sql.Time')
$temp = new Java($className);
$javaObject = $temp->valueOf($value);
return $javaObject;
catch (Exception $err)
echo ( 'unable to convert value, ' . $value .
' could not be converted to ' . $className);
return false;
echo ( 'unable to convert value, class name '.$className.
' not recognised');
return false;
$map = new Java("java.util.HashMap");
$map->put("text", "This is a test string");
$compileManager = new JavaClass("net.sf.jasperreports.engine.JasperCompileManager");
$report = $compileManager->compileReport(realpath("test.jrxml"));
$fillManager = new JavaClass("net.sf.jasperreports.engine.JasperFillManager");
$emptyDataSource = new Java("net.sf.jasperreports.engine.JREmptyDataSource");
$jasperPrint = $fillManager->fillReport($report, $map, $emptyDataSource);
$outputPath = realpath(".")."/"."output.pdf";
$exportManager = new JavaClass("net.sf.jasperreports.engine.JasperExportManager");
$exportManager->exportReportToPdfFile($jasperPrint, $outputPath);
header("Content-type: application/pdf");
Here is some code to pass parameters from PHP to JasperReports automatically, based on a naming convention for user input fields.
include_once( 'db-account.php' );
function report_parse_post_parameters() {
# Automatically extract report parameters (data types converted in report).
$params = new java('java.util.HashMap');
# Pass the remaining POST "report_TYP" variables as report parameters.
foreach( $_POST as $name => $value ) {
if( strpos( $name, 'report_' ) === 0 ) {
$length = strlen( 'report_' );
if( strpos( $name, 'report_int_' ) === 0 ) {
$value = intval( $value );
$length = strlen( 'report_int_' );
$value = convertValue( $value, 'java.lang.Integer' );
$params->put( substr( $name, $length ), $value );
else if( strpos( $name, 'report_arr_' ) === 0 ) {
$length = strlen( 'report_arr_' );
$arrays = array_filter( explode( ',', $_POST[ $name ] ) );
# Map the values of the array form parameter to a java.util.ArrayList.
$arrayList = new java( 'java.util.ArrayList' );
foreach( $arrays as $value ) {
$arrayList->add( $value );
# Pass values into the report (without the "report_arr_" prefix).
$params->put( substr( $name, $length ), $arrayList );
else {
$params->put( substr( $name, $length ), $value );
return $params;
function report_execute( $filename = 'FILENAME' ) {
global $dbhost;
global $dbname;
global $dbuser;
global $dbpass;
$conn = null;
$report = realpath( '/PATH/TO/REPORT.jasper' );
try {
$params = report_parse_post_parameters();
# Load the PostgreSQL database driver.
java( 'java.lang.Class' )->forName( 'org.postgresql.Driver' );
# Attempt a database connection.
$conn = java( 'java.sql.DriverManager' )->getConnection(
"jdbc:postgresql://$dbhost/$dbname?user=$dbuser&password=$dbpass" );
# Use the fill manager to produce the report.
$fm = java('net.sf.jasperreports.engine.JasperFillManager');
$pm = $fm->fillReport($report, $params, $conn);
header('Cache-Control: private');
header('Content-Description: File Transfer');
header("Content-Disposition: attachment, filename=$filename.pdf");
header('Content-Type: application/pdf');
header('Content-Transfer-Encoding: binary');
$em = java('net.sf.jasperreports.engine.JasperExportManager');
$result = $em->exportReportToPdf($pm);
header('Content-Length: ' . strlen( $result ) );
echo $result;
catch( Exception $ex ) {
if( $conn != null ) {
throw $ex;
Change this line:
function report_execute( $filename = 'FILENAME' ) {
Change this line:
$report = realpath( '/PATH/TO/REPORT.jasper' );
Change these lines to match your database software:
java( 'java.lang.Class' )->forName( 'org.postgresql.Driver' );
$conn = java( 'java.sql.DriverManager' )->getConnection(
"jdbc:postgresql://$dbhost/$dbname?user=$dbuser&password=$dbpass" );
Edit db-account.php:
$dbhost = 'HOSTNAME';
$dbname = 'DATABASE';
$dbuser = 'USERNAME';
$dbpass = 'PASSWORD';
Example Form Input
Use a report_ prefix for user inputs, such as:
<form method="post" action='report.dhtml' class="climate" id="report-form">
<input type="hidden" name="report_int_ReportId" value="1" />
<input type="hidden" name="report_int_Radius" value="35" />
<input type="hidden" name="report_int_Trend" value="0" />
<input type="hidden" name="report_int_Relevance" value="1" />
<select id="category" name="report_int_CategoryId" value="2"></select>
<input type="text" id="city" name="report_int_CityId" />
<input class="submit-button" id="submit" type="submit" name="submit" value="Report" />
These values are then passed automatically into the report.
You should not have to change the code for checkjavaExtension().
How would one go about deleting all of the directories in a directory tree that have a certain name if the only access to the server available is via FTP?
To clarify, I would like to iterate over a directory tree and delete every directory whose name matches a certain string via FTP. A way to implement this in PHP would be nice - where should I start? Also, if anyone knows of any utilities that would already do this, that would be great as well.
Here is a starting point- a function that will scan through an FTP directory and print the name of any directory in the tree which matches the pattern. I have tested it briefly.
function scan_ftp_dir($conn, $dir, $pattern) {
$files = ftp_nlist($conn, $dir);
if (!$files) {
foreach ($files as $file) {
//the quickest way i can think of to check if is a directory
if (ftp_size($conn, $file) == -1) {
//get just the directory name
$dirName = substr($file, strrpos($file, '/') + 1);
if (preg_match($pattern, $dirName)) {
echo $file . ' matched pattern';
} else {
//directory didn't match pattern, recurse
scan_ftp_dir($conn, $file, $pattern);
Then do something like this
$host = 'localhost';
$user = 'user';
$pass = 'pass';
if (false === ($conn = ftp_connect($host))) {
die ('cannot connect');
if (!ftp_login($conn, $user, $pass)) die ('cannot authenticate');
scan_ftp_dir($conn, '.', '/^beginswith/');
Unfortunately you can only delete an empty directory with ftp_rmdir(), but if you look here there is a function called ftp_rmAll() which you could use to remove whole directory structures which you find.
Also I have only tested on Unix the trick of using the fail status returned from ftp_size() as a method of checking if an item returned by ftp_nlist() is a directory.
Presumably there's more to this question than it first appears.
FTP supports DIR to list directory contents, RMDIR to remove directories and DEL to delete files, so it supports the operations you need.
Or are you asking how to iterate over an FTP directory tree?
Do you have a preferred/required implementation language for this?
Well, this is the script I ended up using. It's a rather specific instance where one would have to use this, but if you are in the same predicament I was in, simply put in your ftp server address, username, password, the name of the folders you want deleted, and the path of the folder to start in and this will iterate through the directory, deleting all folders that match the name. There is a bug with reconnecting if the connection to the server is broken so you might need to run the script again if it disconnects.
if( $argc == 2 ) {
$directoryToSearch = $argv[1];
$host = '';
$username = '';
$password = '';
$connection = connect( $host, $username, $password );
deleteDirectoriesWithName( $connection, 'directoryToDelete', $directoryToSearch );
ftp_close( $connection );
exit( 0 );
else {
cliPrint( "This script currently only supports 1 argument.\n");
cliPrint( "Usage: php deleteDirectories.php directoryNameToSearch\n");
exit( 1 );
* Recursively traverse directories and files starting with the path
* passed in and then delete all directories that match the name
* passed in
* #param $connection the connection resource to the database.
* #param $name the name of the directories that should be * deleted.
* #param $path the path to start searching from
function deleteDirectoriesWithName( &$connection, $name, $path ) {
global $host, $username, $password;
cliPrint( "At path: $path\n" );
//Get a list of files in the directory
$list = ftp_nlist( $connection, $path );
if ( empty( $list ) ) {
$rawList = ftp_rawlist( $connection, $path );
if( empty( $rawList ) ) {
cliPrint( "Reconnecting\n");
ftp_close( $connection );
$connection = connect( $host, $username, $password );
cliPrint( "Reconnected\n" );
deleteDirectoriesWithName( $connection, $name, $path );
return true;
$pathToPass = addSlashToEnd( $path );
$list = RawlistToNlist( $rawList, $pathToPass );
//If we have selected a directory, then 'visit' the files (or directories) in the dir
if ( $list[0] != $path ) {
$path = addSlashToEnd( $path );
//iterate through all of the items listed in the directory
foreach ( $list as $item ) {
//if the directory matches the name to be deleted, delete it recursively
if ( $item == $name ) {
DeleteDirRecursive( $connection, $path . $item );
//otherwise continue traversing
else if ( $item != '..' && $item != '.' ) {
deleteDirectoriesWithName( $connection, $name, $path . $item );
return true;
*Put output to STDOUT
function cliPrint( $string ) {
fwrite( STDOUT, $string );
*Connect to the ftp server
function connect( $host, $username, $password ) {
$connection = ftp_connect( $host );
if ( !$connection ) {
die('Could not connect to server: ' . $host );
$loginSuccessful = ftp_login( $connection, $username, $password );
if ( !$loginSuccessful ) {
die( 'Could not login as: ' . $username . '#' . $host );
cliPrint( "Connection successful\n" );
return $connection;
* Delete the provided directory and all its contents from the FTP-server.
* #param string $path Path to the directory on the FTP-server relative to
* the current working directory
function DeleteDirRecursive(&$resource, $path) {
global $host, $username, $password;
cliPrint( $path . "\n" );
$result_message = "";
//Get a list of files and directories in the current directory
$list = ftp_nlist($resource, $path);
if ( empty($list) ) {
$listToPass = ftp_rawlist( $resource, $path );
if ( empty( $listToPass ) ) {
cliPrint( "Reconnecting\n" );
ftp_close( $resource );
$resource = connect( $host, $username, $password );
$result_message = "Reconnected\n";
cliPrint( "Reconnected\n" );
$result_message .= DeleteDirRecursive( $resource, $path );
return $result_message;
$list = RawlistToNlist( $listToPass, addSlashToEnd( $path ) );
//if the current path is a directory, recursively delete the file within and then
//delete the empty directory
if ($list[0] != $path) {
$path = addSlashToEnd( $path );
foreach ($list as $item) {
if ($item != ".." && $item != ".") {
$result_message .= DeleteDirRecursive($resource, $path . $item);
cliPrint( 'Delete: ' . $path . "\n" );
if (ftp_rmdir ($resource, $path)) {
cliPrint( "Successfully deleted $path\n" );
} else {
cliPrint( "There was a problem deleting $path\n" );
//otherwise delete the file
else {
cliPrint( 'Delete file: ' . $path . "\n" );
if (ftp_delete ($resource, $path)) {
cliPrint( "Successfully deleted $path\n" );
} else {
cliPrint( "There was a problem deleting $path\n" );
return $result_message;
* Convert a result from ftp_rawlist() to a result of ftp_nlist()
* #param array $rawlist Result from ftp_rawlist();
* #param string $path Path to the directory on the FTP-server relative
* to the current working directory
* #return array An array with the paths of the files in the directory
function RawlistToNlist($rawlist, $path) {
$array = array();
foreach ($rawlist as $item) {
$filename = trim(substr($item, 55, strlen($item) - 55));
if ($filename != "." || $filename != "..") {
$array[] = $filename;
return $array;
*Adds a '/' to the end of the path if it is not already present.
function addSlashToEnd( $path ) {
$endOfPath = substr( $path, strlen( $path ) - 1, 1 );
if( $endOfPath == '/' ) {
$pathEnding = '';
else {
$pathEnding = '/';
return $path . $pathEnding;
I have a script that parses the page for the link tag, but as there are multiple ways to download a page ( wget, file_get_contents, curl, etc ... ) and there are multiple ways to include a favicon, the script is getting to big.
Is there a concise way to do this? Maybe an API that could be used?
Below is the growing script:
// Use a direct GET request for debugging, just pass in the domain ( ? )
$obj = new FaviconFinder();
class FaviconFinder
// domain before and after redirects
private $domain;
private $real_domain;
// the file and how it was obtained
private $file_code = '0';
private $file_page;
// the favicon and how it was obtained
private $favicon_code = 'z';
private $file_favicon;
private $ext;
// paths local to server and on the internet (URL)
private $path_local1 = "../../favicons/";
private $path_local;
private $path_internet;
public function invokeTest($pipe)
exec('wget ' . $pipe['domain'] . ' -O ../sites/temp.html 2>&1', $output);
public function invokeDebug($pipe)
echo "<br><br> domain: " . $pipe['domain'] . "";
$pipe = $this->invoke($pipe);
echo "<br><br> real_domain: " . $this->real_domain . "";
echo "<br><br> file_code | " . $this->file_code;
echo "<br><br> favicon_code | " . $this->favicon_code;
echo "<br><br> favicon_path | " . $this->path_internet;
echo "<br><br> favicon_file | " . $this->file_favicon;
echo "<br><br> favicon_file type | " . gettype($this->file_favicon);
echo "<br><br> favicon_file length | " . strlen($this->file_favicon);
echo "<br><br> IMAGE: ";
if ($this->file_favicon)
echo "<br><br> path_local | " . $this->path_local . "<br><br>";
$file64 = base64_encode($this->file_favicon);
echo "<img src= 'data:image/" . $this->ext . ";base64," . $file64 . "'></img>";
echo "<br><br>";
public function invoke( $pipe )
$domain = $pipe['domain'];
if ( $this->pageFound($domain) && $this->linkFound() && $this->faviconFoundFromLink() )
$pipe = $this->saveFavicon($pipe);
$pipe['favicon'] = $this->path_internet;
$pipe['favicon_local'] = $this->path_local;
} else {
$pipe['favicon'] = 'NULL';
$pipe['favicon_local'] = 'image_generic.png';
$pipe['method'] = $this->file_code . $this->favicon_code;
return $pipe;
pageFound - uses the facade pattern to find a page and record how it was found
private function pageFound ($domain)
return $this->pageFoundCurl($domain) || $this->pageFoundGet($domain);
// wget is another way to get past login page
// uses curl_exec to retreive a page
private function pageFoundCurl ($domain)
$types = array(
"curl - 4"=>'https://www.' . $domain,
"curl - 3"=>'http://www.' . $domain,
"curl - 6"=>'https://' . $domain,
"curl - 5"=>'http://' . $domain,
// returned 302 errors for
"curl - 1"=>$domain,
"curl - 2"=>'www.' . $domain
foreach ($types as $key => $value) {
$this->file_page = $this->curlExec($value, true);
if ($this->file_page)
$this->file_code = $key;
return true;
return false;
// uses file_get_contents to retreive a page
private function pageFoundGet( $domain )
$types = array(
"file_get - 3"=>'http://www.' . $domain,
"file_get - 4"=>'https://www.' . $domain,
"file_get - 5"=>'http://' . $domain,
"file_get - 6"=>'https://' . $domain,
"file_get - 1"=>$domain,
"file_get - 2"=>'www.' . $domain
foreach ($types as $key => $value) {
if ($this->file_page = $this->fileGetContents( $value ))
$this->file_code = $key;
return true;
return false;
private function linkFound()
$domain = $this->real_domain;
$regex = '#<link\s+(?=[^>]*rel=(?:\'|")(?:shortcut\s)?icon(?:\'|")\s*)(?:[^>]*href=(?:\'|")(.+?)(?:\'|")).*>#i';
$link_found = preg_match( $regex , $this->file_page, $matches );
if($link_found === 1)
$path = $matches[1];
// handles ( // )
if ( $path[0] === '/' && $path[1] === '/' )
$this->favicon_code = 'a';
$this->path_internet = 'http:' . $path;
// handles ( / )
else if( $path[0] === '/' )
$this->favicon_code = 'b';
$this->path_internet = 'http://www.' . $domain . $path;
// handles ( http:// || https:// )
else if ( substr($path, 0, 4) === 'http' )
$this->favicon_code = 'c';
$this->path_internet = $path;
// difference between b and d?
$this->favicon_code = 'd';
$this->path_internet = 'http://www.' . $domain . '/' . $path;
$default_location = 'http://www.' . $domain . '/favicon.ico';
if( $this->faviconFound($default_location) )
$this->favicon_code = 'e';
$this->path_internet = $default_location;
$this->path_internet = null;
$this->favicon_code = 'g';
return false;
return true;
private function faviconFoundFromLink ()
$this->file_favicon = $this->faviconFoundFacade( $this->path_internet );
return $this->file_favicon ? true : false;
private function faviconFound ($default_location)
$this->file_favicon = $this->faviconFoundFacade( $default_location );
return $this->file_favicon ? true : false;
private function faviconFoundFacade($url)
return $this->faviconFoundCurl($url) ;
private function faviconFoundExec($url)
exec('wget ' . $url . ' -O ../sites/temp.html 2>&1', $output);
private function faviconFoundGet($url)
return #file_get_contents( $url );
// make less than 10 characters equate to false so I don't save bogus files
// does this
// does similar
private function faviconFoundCurl($url)
$temp = $this->curlExec( $url, false );
if($temp === false)
return false;
if(strlen($temp) < 20)
return false;
return $temp;
public function saveFavicon( $pipe )
// this will remove any query parameters on the favicon link
// and create a valid file name from the real domain
$arr = parse_url($this->path_internet);
$this->ext = pathinfo($arr['path'], PATHINFO_EXTENSION);
$name = str_replace('.', '_', $this->real_domain);
// add the extension if it exists, verify you need to to do this
if ($this->ext) {
$name = $name . "." . $this->ext;
// finally save it
file_put_contents($this->path_local1 . $name, $this->file_favicon);
$this->path_local = $name;
return $pipe;
helper and wrapper functions
// curl_exec wrapper
private function curlExec ($url, $set)
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $url,
$temp = curl_exec($curl);
if ($set) $this->setRealDomain($curl);
return $temp;
private function setRealDomain ($curl)
$url = curl_getinfo( $curl )['url'];
$url = parse_url($url);
$url = $url['host'];
$this->real_domain = preg_replace('#^www\.(.+\.)#i', '$1', $url);
// deprecated as curl can do everything I need, just in case though
// 6009284/how-do-i-ignore-a-moved-header-with-file-get-contents-in-php
private function fileGetContents($value)
$opts = array(
'follow_location' => true,
'max_redirects' => 20
$context = stream_context_create($opts);
return #file_get_contents( $value, false, $context );
private function removed ()
$res = preg_match('#(.*?)([^\.]*)(\.)([^\.]*)$#', $domain, $matches);
$main = $matches[2] . $matches[3] . $matches[4];
$default_location = 'http://www.' . $main . '/favicon.ico';
$this->file_favicon = #file_get_contents( $default_location );
if( $this->file_favicon )
$this->path_internet = $default_location;
$this->favicon_code = 'f';
return true;
Here is one API for the front-side.
To check favicon using Google API
There is no strategy or API for favicons. Parse the HTML, look for:
<link rel="shortcut icon" href="...">
or just:
<link rel="icon" href="...">
and extract the value of the href attribute.
If no such tag exists (or the referenced icon is not there) then check for /favicon.ico (this is how everything started in 1999, on Internet Explorer 5).
Additionally, iOS (and some versions of Android) searches for extra <link> elements having rel="apple-touch-icon" or rel="apple-touch-icon-precomposed".
Everything else is just guessing and speculations.
See also: