How to get the signature algorithm out of a certificate? - php

I want to use the PHP function openssl_verify() to verify the signatures of different X.509 certificates.
I have all it needs (certificate, $data, $signature, $pub_key_id) except of the signature algorithm but which is stored in the certificate.
My simple question is: How can I extract signature algorithm from certificates?

How about this?
$cer = file_get_contents('certificate.cer');
$res = openssl_x509_read($cer);
openssl_x509_export($res, $out, FALSE);
$signature_algorithm = null;
if(preg_match('/^\s+Signature Algorithm:\s*(.*)\s*$/m', $out, $match)) $signature_algorithm = $match[1];
var_dump($signature_algorithm);
It produces the output:
string(21) "sha1WithRSAEncryption"
Which you would have to map to OPENSSL_ALGO_SHA1 yourself.

Look at this question, you can do it similar, try this:
private function GetCertSignatureAlgorithm($certSignatureBinary, $pubKeyResourceId)
{
if(false === openssl_public_decrypt($certSignatureBinary, $sigString, $pubKeyResourceId))
{
return false;
}
if (empty($sigString) ||
strlen($sigString) < 5)
{
return false;
}
if (ord($sigString[0]) !== 0x30 ||
ord($sigString[2]) !== 0x30 ||
ord($sigString[4]) !== 0x06)
{
return false;
}
$sigString = substr($sigString, 4);
$len = ord($sigString[1]);
$bytes = 0;
if ($len & 0x80)
{
$bytes = ($len & 0x7f);
$len = 0;
for ($i = 0; $i < $bytes; $i++)
{
$len = ($len << 8) | ord($sigString[$i + 2]);
}
}
$oidData = substr($sigString, 2 + $bytes, $len);
$hashOid = floor(ord($oidData[0]) / 40) . '.' . ord($oidData[0]) % 40;
$value = 0;
for ($i = 1; $i < strlen($oidData); $i++)
{
$value = $value << 7;
$value = $value | (ord($oidData[$i]) & 0x7f);
if (!(ord($oidData[$i]) & 0x80))
{
$hashOid .= '.' . $value;
$value = 0;
}
}
//www.iana.org/assignments/hash-function-text-names/hash-function-text-names.xml
//www.php.net/manual/en/openssl.signature-algos.php
switch($hashOid)
{
case '1.2.840.113549.2.5': return 'md5';
case '1.3.14.3.2.26': return 'sha1';
case '2.16.840.1.101.3.4.2.1': return 'sha256';
case '2.16.840.1.101.3.4.2.2': return 'sha384';
case '2.16.840.1.101.3.4.2.3': return 'sha512';
//not secure = not accepted
//case '1.2.840.113549.2.2': //'md2';
//case '1.2.840.113549.2.4': //'md4';
//case '1.3.14.3.2.18': //'sha';
}
throw new Exception('CertSignatureAlgorithm not found');
}

One way might be openssl x509 -text -noout < $certfile | grep "Signature Algorithm"

Using phpseclib, a pure PHP X.509 parser...
<?php
include('File/X509.php');
$x509 = new File_X509();
$cert = $x509->loadX509(file_get_contents('sample.pem'));
echo $cert['signatureAlgorithm']['algorithm'];

Related

Unable to use phpseclib classes

I've installed the phpseclib library from github and I'm trying to encrypt a password (for steamcommunity) with PHP. I am able to do it with Javascript by using the javascript code Steam has on their website but I cannot encrypt the password using pure PHP.
The Javascript encryption code:
var RSAPublicKey = function($modulus_hex, $encryptionExponent_hex) {
this.modulus = new BigInteger( $modulus_hex, 16);
this.encryptionExponent = new BigInteger( $encryptionExponent_hex, 16);
};
var Base64 = {
base64: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function($input) {
if (!$input) {
return false;
}
var $output = "";
var $chr1, $chr2, $chr3;
var $enc1, $enc2, $enc3, $enc4;
var $i = 0;
do {
$chr1 = $input.charCodeAt($i++);
$chr2 = $input.charCodeAt($i++);
$chr3 = $input.charCodeAt($i++);
$enc1 = $chr1 >> 2;
$enc2 = (($chr1 & 3) << 4) | ($chr2 >> 4);
$enc3 = (($chr2 & 15) << 2) | ($chr3 >> 6);
$enc4 = $chr3 & 63;
if (isNaN($chr2)) $enc3 = $enc4 = 64;
else if (isNaN($chr3)) $enc4 = 64;
$output += this.base64.charAt($enc1) + this.base64.charAt($enc2) + this.base64.charAt($enc3) + this.base64.charAt($enc4);
} while ($i < $input.length);
return $output;
},
decode: function($input) {
if(!$input) return false;
$input = $input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
var $output = "";
var $enc1, $enc2, $enc3, $enc4;
var $i = 0;
do {
$enc1 = this.base64.indexOf($input.charAt($i++));
$enc2 = this.base64.indexOf($input.charAt($i++));
$enc3 = this.base64.indexOf($input.charAt($i++));
$enc4 = this.base64.indexOf($input.charAt($i++));
$output += String.fromCharCode(($enc1 << 2) | ($enc2 >> 4));
if ($enc3 != 64) $output += String.fromCharCode((($enc2 & 15) << 4) | ($enc3 >> 2));
if ($enc4 != 64) $output += String.fromCharCode((($enc3 & 3) << 6) | $enc4);
} while ($i < $input.length);
return $output;
}
};
var Hex = {
hex: "0123456789abcdef",
encode: function($input) {
if(!$input) return false;
var $output = "";
var $k;
var $i = 0;
do {
$k = $input.charCodeAt($i++);
$output += this.hex.charAt(($k >> 4) &0xf) + this.hex.charAt($k & 0xf);
} while ($i < $input.length);
return $output;
},
decode: function($input) {
if(!$input) return false;
$input = $input.replace(/[^0-9abcdef]/g, "");
var $output = "";
var $i = 0;
do {
$output += String.fromCharCode(((this.hex.indexOf($input.charAt($i++)) << 4) & 0xf0) | (this.hex.indexOf($input.charAt($i++)) & 0xf));
} while ($i < $input.length);
return $output;
}
};
var RSA = {
getPublicKey: function( $modulus_hex, $exponent_hex ) {
return new RSAPublicKey( $modulus_hex, $exponent_hex );
},
encrypt: function($data, $pubkey) {
if (!$pubkey) return false;
$data = this.pkcs1pad2($data,($pubkey.modulus.bitLength()+7)>>3);
if(!$data) return false;
$data = $data.modPowInt($pubkey.encryptionExponent, $pubkey.modulus);
if(!$data) return false;
$data = $data.toString(16);
if(($data.length & 1) == 1)
$data = "0" + $data;
return Base64.encode(Hex.decode($data));
},
pkcs1pad2: function($data, $keysize) {
if($keysize < $data.length + 11)
return null;
var $buffer = [];
var $i = $data.length - 1;
while($i >= 0 && $keysize > 0)
$buffer[--$keysize] = $data.charCodeAt($i--);
$buffer[--$keysize] = 0;
while($keysize > 2)
$buffer[--$keysize] = Math.floor(Math.random()*254) + 1;
$buffer[--$keysize] = 2;
$buffer[--$keysize] = 0;
return new BigInteger($buffer);
}
};
And to encrypt it I simply call the following:
var pubKey = RSA.getPublicKey('<?php echo $curl->response->publickey_mod; ?>', '<?php echo $curl->response->publickey_exp; ?>');
var encrypted_password = RSA.encrypt('<?php echo $inviter['password']; ?>', pubKey);
When I try to include a file from the phpseclib and use the class, the file includes successfully but the class displays an error saying it's not a valid class.
My PHP code:
<?php
// publickey_mod = C2242A41A84E56D9CD7952CCB985343A6DCE26AF7CA31036415B7B9DA190DF68BBEC93ED6A9AAE5CA9D6EB1A65244E0013DB38C9462076F8165EDB2A1ECB383A75FB0C71388AFD2C2DC15F9088BA252B4ED2F2C304F545C3704B6B7F5CEE01AF7DD0DC9038CBFBDE7B99689006C5272EBCEC221DB3D9CEB8514E246A571E49709AD94442A8611BE131E6F3FFA40AD632BBD6C5B5F85F7A5C87310E17632145A57BDF17A4F7305E959B4A6126AA8C64C794975F17051E6EAF5462F1189AAD0E7B5F2F254AEEB030D8ED8BAD0F8BD753482E652FFB2405D8947994E143032D5511AF63977A51C9013B5516FA51C5AC50E56588C09C761A473E8D18566B1D435687
// publickey_exp = 010001
require_once 'phpseclib/Crypt/RSA.php';
require_once 'phpseclib/Crypt/RSA/PKCS1.php';
$pkcs = new \phpseclib\Crypt\RSA\PKCS1();
$key = $pkcs->savePublicKey(new \phpseclib\Math\BigInteger(C2242A41A84E56D9CD7952CCB985343A6DCE26AF7CA31036415B7B9DA190DF68BBEC93ED6A9AAE5CA9D6EB1A65244E0013DB38C9462076F8165EDB2A1ECB383A75FB0C71388AFD2C2DC15F9088BA252B4ED2F2C304F545C3704B6B7F5CEE01AF7DD0DC9038CBFBDE7B99689006C5272EBCEC221DB3D9CEB8514E246A571E49709AD94442A8611BE131E6F3FFA40AD632BBD6C5B5F85F7A5C87310E17632145A57BDF17A4F7305E959B4A6126AA8C64C794975F17051E6EAF5462F1189AAD0E7B5F2F254AEEB030D8ED8BAD0F8BD753482E652FFB2405D8947994E143032D5511AF63977A51C9013B5516FA51C5AC50E56588C09C761A473E8D18566B1D435687, 16), new \phpseclib\Math\BigInteger(010001, 16));
$rsa = new phpseclib\Crypt\RSA();
$rsa->setPrivateKeyFormat(CRYPT_RSA_PRIVATE_FORMAT_PKCS1);
$rsa->setPublicKeyFormat(CRYPT_RSA_PUBLIC_FORMAT_PKCS1);
$rsa->setEncryptionMode(CRYPT_RSA_ENCRYPTION_PKCS1);
define('CRYPT_RSA_PKCS15_COMPAT', true);
$rsa->loadKey($key);
$password = $rsa->encrypt("mySteamPassword"); // encrypting password
var_dump($password);
?>
And the error message that this code displays:
Fatal error: Class 'phpseclib\Crypt\RSA\PKCS' not found in C:\xampp\htdocs\RSA-Encryption\phpseclib\Crypt\RSA\PKCS1.php on line 40
See this part?
require_once 'phpseclib/Crypt/RSA.php';
require_once 'phpseclib/Crypt/RSA/PKCS1.php';
You're explicitly including specific classes that depend on other classes, but are not including them directly. The correct solution is to, instead of typing in require_once statements directly, use an autoloader.
If you're loading this library with composer, this is easy.
Run composer require phpseclib/phpseclib
Add require_once 'vendor/autoload.php'; to your PHP scripts.
Otherwise, you need to use spl_autoload_register() instead:
/**
* Register a PSR autoloader for a given namespace and directory
*
* #param string $namespace
* #param string $dir
* #param string $type ('psr0' or 'psr4')
* #return boolean
* #throws Exception
* #ref http://stackoverflow.com/a/35015933/2224584
*/
function generic_autoload($namespace, $dir, $type = 'psr4')
{
switch ($type) {
case 'psr0':
$spl = '_';
break;
case 'psr4':
$spl = '\\';
break;
default:
throw new Exception('Invalid type; expected "psr0" or "psr4"');
}
$ns = trim($namespace, DIRECTORY_SEPARATOR.$spl);
return spl_autoload_register(
function($class) use ($ns, $dir, $spl)
{
// project-specific namespace prefix
$prefix = $ns.$spl;
// base directory for the namespace prefix
$base_dir = $dir . DIRECTORY_SEPARATOR;
// does the class use the namespace prefix?
$len = strlen($prefix);
if (strncmp($prefix, $class, $len) !== 0) {
// no, move to the next registered autoloader
return;
}
// get the relative class name
$relative_class = substr($class, $len);
// replace the namespace prefix with the base directory, replace
// namespace separators with directory separators in the relative
// class name, append with .php
$file = $base_dir .
str_replace($spl, DIRECTORY_SEPARATOR, $relative_class) .
'.php';
// if the file exists, require it
if (file_exists($file)) {
require $file;
}
}
);
}
Usage:
generic_autoload('phpseclib', '/path/to/phpseclib', 'psr4');
Word of caution: Make sure you aren't using PKCS1v1.5 padding for RSA encryption. If you're going to use RSA, you should use RSAES-OAEP with MGF1+SHA256 and e = 65537. I know phpseclib supports this; if Steam Community doesn't, raise hell until they do.

/dev/urandom error (permission denied by webhost)

I am using function:
private function random($len) {
if (#is_readable('/dev/urandom')) {
$f=fopen('/dev/urandom', 'r');
$urandom=fread($f, $len);
fclose($f);
}
$return='';
for ($i=0;$i<$len;++$i) {
if (!isset($urandom)) {
if ($i%2==0) mt_srand(time()%2147 * 1000000 + (double)microtime() * 1000000);
$rand=48+mt_rand()%64;
} else $rand=48+ord($urandom[$i])%64;
if ($rand>57)
$rand+=7;
if ($rand>90)
$rand+=6;
if ($rand==123) $rand=52;
if ($rand==124) $rand=53;
$return.=chr($rand);
}
return $return;
}
I have some forms which trigger this function and I get the error:
int(2) string(200) "is_readable(): open_basedir restriction in effect.
File(/dev/urandom) is not within the allowed path(s):
Is there a way to replace this function and not to use /dev/urandom ?
Thank you very much.
From the (previously accepted) answer:
Instead of urandom you can use "rand":
Nooooooooo!
Dealing with open_basedir is one of the things we handle gracefully in random_compat. Seriously consider importing that library then just using random_bytes() instead of reading from /dev/urandom.
Whatever you do, DON'T USE rand(). Even if you believe there's a use case for it, the security trade-offs are a lie.
Also, if you need a function to generate a random string (depends on PHP 7 or random_compat):
/**
* Note: See https://paragonie.com/b/JvICXzh_jhLyt4y3 for an alternative implementation
*/
function random_string($length = 26, $alphabet = 'abcdefghijklmnopqrstuvwxyz234567')
{
if ($length < 1) {
throw new InvalidArgumentException('Length must be a positive integer');
}
$str = '';
$alphamax = strlen($alphabet) - 1;
if ($alphamax < 1) {
throw new InvalidArgumentException('Invalid alphabet');
}
for ($i = 0; $i < $length; ++$i) {
$str .= $alphabet[random_int(0, $alphamax)];
}
return $str;
}
Demo code: https://3v4l.org/DOjNE
If your host doesn't support random_int() you can use a function which I made for myself.
function generateRandomString($length, $secureRand = false, $chars="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") {
if (!function_exists("random_int") && $secureRand) {
function random_int($min, $max) {
$range = $max - $min;
if ($range <= 0) return $min;
$log = ceil(log($range, 2));
$bytes = (int)($log / 8) + 1;
$filter = (int)(1 << ((int)($log + 1))) - 1;
do {
$rnd = hexdec(bin2hex(openssl_random_pseudo_bytes($bytes, $s)));
if (!$s) continue;
$rnd = $rnd & $filter;
} while ($rnd > $range);
return $min + $rnd;
}
}
$charsCount = strlen($chars) - 1;
$output = "";
for ($i=1; $i <= $length; $i++) {
if ($secureRand)
$output .= $chars[random_int(0, $charsCount)];
else
$output .= $chars[mt_rand(0, $charsCount)];
}
return $output;
}
If you need a secure random string (e.g. random passwords):
generateRandomString(8, true);
this will give you a 8 lenght string.

automatic number increase in php based web form [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
So I have created an html form which then posts the results to a php file that overlays them on a PDF and then emails that PDF to myself and the email that was put in the form. All I want to do now is find a simple way to make it so that the PDF includes a sequential number.
For example: When the form is filled out for the first time the number 0001 is input automatically into the PDF and 0002 for the second time and so on.
Is there an easy PHP function to accomplish this?
Essentially I am creating an online invoicing form so when I do service calls I can create an invoice on the spot from a web browser which is then emailed to my office and the client.
Any help would be greatly appreciated.
For an incrementing number, you could keep a number in a database and then extract it, add 1 to it, use it, and then put it back in the DB for next time, but this seems complicated. Somebody in the comments mentioned using the timestamp, which would be done like so:
$invoicenumber = time(); //This number will always be unique
The time function works like so (copied from w3schools):
The time() function returns the current time in the number of seconds since the Unix Epoch (January 1 1970 00:00:00 GMT).
Since actual seconds can only go up (increment), this number will never be the same twice.
I hope this is helpful.
-Edit
You can also display this date/time in a readable format like so:
$time = time();
echo date("Y-m-d H:i:s",$time);
-Edit 2
If you want an incrementing number, you basically need a very simple database to save it, which might be as simple as a table called invoices, with a column called invoicenumber, which stores your invoice number in it. You could / probably should use this to store other invoice information in it too, so you'd have each invoice number saved (which means we want to only get the highest one)
Then your code would look like this, for each time you want to use it:
Firstly you'd have a database information file (settings.php or something similar) with your database definitions in it, which might look like this:
define('DB_HOST', 'localhost');
define('DB_USER', 'db_username');
define('DB_PASS', 'db_password');
define('DB_NAME', 'database_name');
Your code would look like this:
//Establish a mysql connection
$mysqli = new mysqli(DB_HOST, DB_USER, DB_PASS, DB_NAME);
//Set up a query to get the highest number
$query = "SELECT invoicenumber FROM invoices ORDER BY invoicenumber DESC LIMIT 1";
//Get the result
$result = $mysqli->query($query);
$row = $result->fetch_assoc();
//If we have a record
if($row){
//New invoice number
$invoicenumber = $row['invoicenumber']++;
//Else (database is empty, so start at the beginning)
}else{
$invoicenumber = 1;
}
//Now we have our invoice number, so do whatever you want with it
/**
* Code here to use the number
* */
//Now we wanna add the new invoice to the database, so
/**
* Add any other info to this statement if you want.
* If any of it is user submitted data, be sure to use prepared statements
* (just look at php.net's documentation on prepared statements)
* w3schools also has some nice tutorials on how to safely insert stuff
* in to a database, so check it all out :)
* */
$query = "INSERT INTO invoices(invoicenumber) VALUES($invoicenumber)";
//Execute the query
if($mysqli->query($query)){
//Show success
echo "Invoice $invoicenumber has been added to the database.";
}else{
//Show error
echo "Unfortunately we could not add invoice $invoicenumber to the database.";
}
//Now we can clear up our resources
$stmt->free_result(); $stmt->close(); $mysqli->close();
Please note: this is a very basic example. Yours will have additions and enhanced security if you are using user submitted data, so please do your homework and make sure that you fully understand each line of this code before you proceed to use it.
I do exactly the same with patient accession numbers on patient reports.
include('/home/user/php/class.pdf2text.php');
$p2t = new PDF2Text();
$p2t->setFilename($pdf);
$p2t->decodePDF();
$data = $p2t->output();
$len = strlen($data);
$pos = strpos($data,$accession);
if (pos){
$in .= "$accession,";
$checked++;
}
else{
$missingPDF += 1;echo "\n<p> <span class='bold red'>INCORRECT ACCESSION NUMBER c=$row[0] p=$row[1]</span>\n";
}
if ($checked > 0){
$in = substr($in,0,-1) . ')';
$sql = "UPDATE `Patient` SET `PDF`=1 WHERE $in";
}
pdf2text.php
class PDF2Text {
// Some settings
var $multibyte = 4; // Use setUnicode(TRUE|FALSE)
var $convertquotes = ENT_QUOTES; // ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
var $showprogress = true; // TRUE if you have problems with time-out
// Variables
var $filename = '';
var $decodedtext = '';
function setFilename($filename) {
// Reset
$this->decodedtext = '';
$this->filename = $filename;
}
function output($echo = false) {
if($echo) echo $this->decodedtext;
else return $this->decodedtext;
}
function setUnicode($input) {
// 4 for unicode. But 2 should work in most cases just fine
if($input == true) $this->multibyte = 4;
else $this->multibyte = 2;
}
function decodePDF() {
// Read the data from pdf file
$infile = #file_get_contents($this->filename, FILE_BINARY);
if (empty($infile))
return "";
// Get all text data.
$transformations = array();
$texts = array();
// Get the list of all objects.
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $infile . "endobj\r", $objects);
$objects = #$objects[1];
// Select objects with streams.
for ($i = 0; $i < count($objects); $i++) {
$currentObject = $objects[$i];
// Prevent time-out
#set_time_limit ();
if($this->showprogress) {
// echo ". ";
flush(); ob_flush();
}
// Check if an object includes data stream.
if (preg_match("#stream[\n|\r](.*)endstream[\n|\r]#ismU", $currentObject . "endstream\r", $stream )) {
$stream = ltrim($stream[1]);
// Check object parameters and look for text data.
$options = $this->getObjectOptions($currentObject);
if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"])) )
// if ( $options["Image"] && $options["Subtype"] )
// if (!(empty($options["Length1"]) && empty($options["Subtype"])) )
continue;
// Hack, length doesnt always seem to be correct
unset($options["Length"]);
// So, we have text data. Decode it.
$data = $this->getDecodedStream($stream, $options);
if (strlen($data)) {
if (preg_match_all("#BT[\n|\r](.*)ET[\n|\r]#ismU", $data . "ET\r", $textContainers)) {
$textContainers = #$textContainers[1];
$this->getDirtyTexts($texts, $textContainers);
} else
$this->getCharTransformations($transformations, $data);
}
}
}
// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);
}
function decodeAsciiHex($input) {
$output = "";
$isOdd = true;
$isComment = false;
for($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++) {
$c = $input[$i];
if($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
switch($c) {
case '\0': case '\t': case '\r': case '\f': case '\n': case ' ': break;
case '%':
$isComment = true;
break;
default:
$code = hexdec($c);
if($code === 0 && $c != '0')
return "";
if($isOdd)
$codeHigh = $code;
else
$output .= chr($codeHigh * 16 + $code);
$isOdd = !$isOdd;
break;
}
}
if($input[$i] != '>')
return "";
if($isOdd)
$output .= chr($codeHigh * 16);
return $output;
}
function decodeAscii85($input) {
$output = "";
$isComment = false;
$ords = array();
for($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++) {
$c = $input[$i];
if($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
continue;
if ($c == '%') {
$isComment = true;
continue;
}
if ($c == 'z' && $state === 0) {
$output .= str_repeat(chr(0), 4);
continue;
}
if ($c < '!' || $c > 'u')
return "";
$code = ord($input[$i]) & 0xff;
$ords[$state++] = $code - ord('!');
if ($state == 5) {
$state = 0;
for ($sum = 0, $j = 0; $j < 5; $j++)
$sum = $sum * 85 + $ords[$j];
for ($j = 3; $j >= 0; $j--)
$output .= chr($sum >> ($j * 8));
}
}
if ($state === 1)
return "";
elseif ($state > 1) {
for ($i = 0, $sum = 0; $i < $state; $i++)
$sum += ($ords[$i] + ($i == $state - 1)) * pow(85, 4 - $i);
for ($i = 0; $i < $state - 1; $i++) {
try {
if(false == ($o = chr($sum >> ((3 - $i) * 8)))) {
throw new Exception('Error');
}
$output .= $o;
} catch (Exception $e) { /*Dont do anything*/ }
}
}
return $output;
}
function decodeFlate($data) {
return #gzuncompress($data);
}
function getObjectOptions($object) {
$options = array();
if (preg_match("#<<(.*)>>#ismU", $object, $options)) {
$options = explode("/", $options[1]);
#array_shift($options);
$o = array();
for ($j = 0; $j < #count($options); $j++) {
$options[$j] = preg_replace("#\s+#", " ", trim($options[$j]));
if (strpos($options[$j], " ") !== false) {
$parts = explode(" ", $options[$j]);
$o[$parts[0]] = $parts[1];
} else
$o[$options[$j]] = true;
}
$options = $o;
unset($o);
}
return $options;
}
function getDecodedStream($stream, $options) {
$data = "";
if (empty($options["Filter"]))
$data = $stream;
else {
$length = !empty($options["Length"]) ? $options["Length"] : strlen($stream);
$_stream = substr($stream, 0, $length);
foreach ($options as $key => $value) {
if ($key == "ASCIIHexDecode")
$_stream = $this->decodeAsciiHex($_stream);
elseif ($key == "ASCII85Decode")
$_stream = $this->decodeAscii85($_stream);
elseif ($key == "FlateDecode")
$_stream = $this->decodeFlate($_stream);
elseif ($key == "Crypt") { // TO DO
}
}
$data = $_stream;
}
return $data;
}
function getDirtyTexts(&$texts, $textContainers) {
for ($j = 0; $j < count($textContainers); $j++) {
if (preg_match_all("#\[(.*)\]\s*TJ[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
elseif (preg_match_all("#T[d|w|m|f]\s*(\(.*\))\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
elseif (preg_match_all("#T[d|w|m|f]\s*(\[.*\])\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
}
}
function getCharTransformations(&$transformations, $stream) {
preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
for ($j = 0; $j < count($chars); $j++) {
$count = $chars[$j][1];
$current = explode("\n", trim($chars[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{2,4})>\s+<([0-9a-f]{4,512})>#is", trim($current[$k]), $map))
$transformations[str_pad($map[1], 4, "0")] = $map[2];
}
}
for ($j = 0; $j < count($ranges); $j++) {
$count = $ranges[$j][1];
$current = explode("\n", trim($ranges[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+<([0-9a-f]{4})>#is", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$_from = hexdec($map[3]);
for ($m = $from, $n = 0; $m <= $to; $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", $_from + $n);
} elseif (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+\[(.*)\]#ismU", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$parts = preg_split("#\s+#", trim($map[3]));
for ($m = $from, $n = 0; $m <= $to && $n < count($parts); $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", hexdec($parts[$n]));
}
}
}
}
function getTextUsingTransformations($texts, $transformations) {
$document = "";
for ($i = 0; $i < count($texts); $i++) {
$isHex = false;
$isPlain = false;
$hex = "";
$plain = "";
for ($j = 0; $j < strlen($texts[$i]); $j++) {
$c = $texts[$i][$j];
switch($c) {
case "<":
$hex = "";
$isHex = true;
$isPlain = false;
break;
case ">":
$hexs = str_split($hex, $this->multibyte); // 2 or 4 (UTF8 or ISO)
for ($k = 0; $k < count($hexs); $k++) {
$chex = str_pad($hexs[$k], 4, "0"); // Add tailing zero
if (isset($transformations[$chex]))
$chex = $transformations[$chex];
$document .= html_entity_decode("&#x".$chex.";");
}
$isHex = false;
break;
case "(":
$plain = "";
$isPlain = true;
$isHex = false;
break;
case ")":
$document .= $plain;
$isPlain = false;
break;
case "\\":
$c2 = $texts[$i][$j + 1];
if (in_array($c2, array("\\", "(", ")"))) $plain .= $c2;
elseif ($c2 == "n") $plain .= '\n';
elseif ($c2 == "r") $plain .= '\r';
elseif ($c2 == "t") $plain .= '\t';
elseif ($c2 == "b") $plain .= '\b';
elseif ($c2 == "f") $plain .= '\f';
elseif ($c2 >= '0' && $c2 <= '9') {
$oct = preg_replace("#[^0-9]#", "", substr($texts[$i], $j + 1, 3));
$j += strlen($oct) - 1;
$plain .= html_entity_decode("&#".octdec($oct).";", $this->convertquotes);
}
$j++;
break;
default:
if ($isHex)
$hex .= $c;
elseif ($isPlain)
$plain .= $c;
break;
}
}
$document .= "\n";
}
return $document;
}
}

Restricting access to a site using IP address

I would like to know whether there is a way of restricting the users of a site such that they can only access the inner pages of a site if they are within a certain range of IP addresses or a certain network?
The current PHP scripts I am getting cant differentiate the real IPs from the Proxies?
Thanks
i wouldn’t restrict on ip addresses. as you said, you can’t know if it’s a proxy. furthermore, ip addresses can be easily spoofed.
Have you considered using apache .htaccess files for that?
IP restriction with htaccess
You can try out a script I created that allows very advanced IP rules. I coded it years ago so I apologize in advance for the current shape of it.
Edit:
If you're looking for an "&" operator in the syntax don't bother. I forgot to add it when I coded this and looking back at this script now makes me cringe at the thought of touching it again.
<?php
##############################################################
# IP Expression Class #
# Easy IP-based Access Restrictions #
# Change Log: #
# - Added Range and limited IPv6 support #
# - Changed name from IPAR to IPEX #
# #
##############################################################
# Example Rules: #
# 69.[10-20].[^50].* #
# 69.*.[1-5 | 10-20 |^30].* #
# 60.12.2.* #
# 127.* #
# 69.1.1.1-70.1.1.1 <-- This is a range #
# #
# Usage: #
# Ipex::IsMatch($rule, $ip); #
# #
# [range] - Defines a range for a section of the IP #
# | - OR token. IP can match this range/number #
# ^ - NOT token. IP can not match this range/number #
# x-y - Defines a range from x to y #
# x - Exactly match x (x = a hex or dec number) #
# * - Match any number #
# #
#----------===============================-------------------#
# [ Written by Chris Tarquini ] #
#----------===============================-------------------#
##############################################################
define('IPR_DENY', false);
define('IPR_ALLOW', true);
define('IPR_ERR_MISMATCH',-1);
define('IPR_ERR_RANGE_MISMATCH',-2);
define('IPR_ERR_RANGE_INVALID',-3);
define('IPR_ERR_INVALID_RULE',-4);
class IPEX
{
const TOKEN_RANGE_BEGIN = '[';
const TOKEN_RANGE_END = ']';
const TOKEN_WILDCARD = '*';
const TOKEN_RANGE_SPLIT = '-';
const TOKEN_OR = '|';
const TOKEN_NOT = '^';
const DEBUG_MODE = TRUE;
private static function trace($err){if(self::DEBUG_MODE) echo "$err\r\n";}
private static function FixRule($rule,$count = 4, $split='.')
{
$rule = explode($split,$rule);
$filler = 0;
$size = sizeof($rule);
for($i = 0; $i < $count; $i++)
{
if($i > $size) { $rule[] = $filler; $size++;}
else if(empty($rule[$i])) { $filler = self::TOKEN_WILDCARD; $rule[$i] = $filler;}
}
return $rule;
}
private static function FixIP($rule,$count = 4, $split='.')
{
$rule = explode($split,$rule);
$size = sizeof($rule);
for($i = 0; $i < $count; $i++)
{
if($i > $size) { $rule[] = 0; $size++;}
else if(empty($rule[$i])) { $rule[$i] = 0;}
}
return $rule;
}
private static function GetIpType(&$ip)
{
$mode = IPID::Identify($ip,$newip);
if($mode == IPID_IPv4_Embed) { $ip = $newip; return IPID_IPv4;}
return $mode;
}
private static function FixIPRange(&$start, &$stop)
{
$count = 4; $split = '.';
if(self::GetIpType($start) == IPID_IPv6) {$count = 8; $split = ':';}
$q = 0;
while($q < 2)
{
$filler = ($q == 0) ? 0 : 255;
$arr = explode($split,($q == 0) ? $start : $stop);
$size = sizeof($arr);
for($i = 0; $i < $count; $i++)
{
if($i > $size){ $arr[] = $filler; $size++;}
else if(empty($arr[$i])){ $arr[$i] = $filler; }
}
if($q == 0) $start = implode($split, $arr);
else $stop = implode($split,$arr);
$q++;
}
}
public static function IsInRange($start, $stop, $ip)
{
//Sorry guys we only support IPv4 for this ;(
self::FixIPRange($start,$stop);
self::trace("fixed: start = $start, stop = $stop");
$start = ip2long($start); $stop = ip2long($stop);
$ip = ip2long($ip);
self::trace("start = $start, stop = $stop, ip = $ip");
return ($ip >= $start && $ip <= $stop);
}
public static function IsAllowed($rule, $ip){return self::IsMatch($rule,$ip);}
public static function IsMatch($rule,$ip)
{
$mode = self::GetIpType($ip);
self::trace("ip type: $mode");
if(strpos($rule, self::TOKEN_RANGE_SPLIT) !== false && strpos($rule,self::TOKEN_RANGE_BEGIN) === false)
{
self::trace("ip range mode");
$test = explode(self::TOKEN_RANGE_SPLIT, $rule);
self::trace("range size: ".sizeof($test));
print_r($test);
if(sizeof($test) != 2) return IPR_ERR_RANGE_INVALID;
$start = $test[0]; $end = $test[1];
if(empty($start) || empty($end)) return IPR_ERR_RANGE_INVALID;
self::trace("range start: $start, range stop: $end");
$rm1 = (self::IsHex($start)) ? $mode : self::GetIpType($start);
$rm2 = (self::IsHex($end)) ? $mode : self::GetIpType($end);
self::trace("range types: $rm1, $rm2\r\nip type: $mode");
if($rm1 != $rm2 || $rm1 != $mode) return IPR_ERR_RANGE_MISMATCH;
if($mode == IPID_IPv6) { return IPR_ERR_IPv6_NOTSUPPORTED;}
return self::IsInRange($start,$end,$ip);
}
if(self::GetIpType($rule) != $mode) return IPR_ERR_MISMATCH;
//all is good so far
$count = 4;
$split = '.'; if($mode==IPID_IPv6){$count = 8; $split=':';}
$rule = self::FixRule($rule, $count,$split);
$ip = self::FixIp($ip,$count,$split);
self::trace("ip: ".implode($split,$ip));
self::trace('rule: '.implode($split,$rule));
for($i = 0; $i < $count; $i++)
{
$r = str_replace(' ', '', $rule[$i]);
$ri = false;
if($r == self::TOKEN_WILDCARD) continue;
if($mode == IPPID_IPv6 && self::IsHex($r)) { $ri = hexdec($r);}else if(is_numeric($r)) $ri = $r;
$x = $ip[$i];
if($mode == IPPID_IPv6) $x = hexdec($x);
//* Exact Match *//
self::trace("rule[$i]: $ri");
self::trace("ip[$i]: $x");
if($ri !== false && $ri != $x) return IPR_DENY;
$len = strlen($r);
for($y = 0; $y < $len; $y++)
{
self::trace("y = $y");
if(substr($r, $y,1) == self::TOKEN_RANGE_BEGIN)
{
++$y;
self::trace("found range, y = $y");
$negflag = false;
$start = false;
$stop = false;
$allows = 0;
$denys = 0;
$q = 0;
$c = substr($r,$y,1);
while($c !== false)
{
self::trace("in range, char: $c");
//* Flags *//
$break = false;
$exec = false;
$toggle = false;
$reset = false;
if($c === self::TOKEN_RANGE_END) {$skiphex = true;$break = true; $exec = true; self::trace("found end of range");}
if($c === self::TOKEN_NOT) {if($q > 0){ $toggle = true; $exec = true;} else $negflag = !$negflag; $skiphex =false; self::trace("found TOKEN_NOT");}
if($c === self::TOKEN_OR) { $exec = true; $reset = true;$skiphex=true;self::trace("found TOKEN_OR");}
if($c === self::TOKEN_RANGE_SPLIT){ $skiphex = false;++$q; self::trace("found range split");}
//* Read Hex Tokens *//
if(!$skiphex && self::IsHexChar($c))
{
$n = self::ReadNextHexToken($r,$y);
if($mode == IPID_IPv6) $n = hexdec($n);
if($q == 0) $start = $n;
else if($q == 1) $stop = $n;
--$y; //fixes error
self::trace("parsed number: $n, y = $y");
}
if($reset) {$negflag = false; $start = false; $stop = false; $q = 0;}
if($exec)
{
self::trace("executing: start = $start, stop = $stop, x = $x");
self::trace("negflag = $negflag");
if($stop !== false && $x >= $start && $x <= $stop)
{
if($negflag) { ++$denys; $allows = 0; break;}
else ++$allows;
}
else if($stop === false && $start == $x)
{
if($negflag) { ++$denys; $allows = 0; break;}
else ++$allows;
}
self::trace("exec complete: allows = $allows, denys = $denys");
$q = 0;
}
if($toggle) $negflag = !$negflag;
if($break) break;
++$y;
$c = substr($r,$y,1);
}
if(!$allows) return IPR_DENY;
}
}
}
return IPR_ALLOW;
}
private static function ReadNextHexToken($buff, &$offset, $max = -1)
{
$str = '';
if($max == -1) { $max = strlen($buff);}
for(; $offset < $max; $offset++)
{
$c = substr($buff,$offset, 1);
if(self::IsHexChar($c))
$str .= $c;
else
return $str;
}
return $str;
}
private static function IsHex($x){ $len = strlen($x); for($i = 0; $i < $len; $i++) if(!self::IsHexChar(substr($x,$i,1))) return false; return true;}
private static function IsHexChar($x){self::trace("isHex($x);"); return (in_array(strtoupper($x),array('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F')));
}
}
######################
# IP Identify Class #
#####################
define('IPID_INVALID',false);
define('IPID_IPv4',2);
define('IPID_IPv6',3);
define('IPID_IPv4_Embed',6);
class IPID
{
public static function Identify($ip,&$ipconvert = false)
{
$ip = strtoupper($ip);
$ipconvert = $ip;
// Check if we are IPv4
if(strpos($ip,':') === false && strpos($ip,'.') !== false)
return IPID_IPv4;
//Is it one of those hybrids?
else if(strpos($ip,':FFFF') !== false && strpos($ip,'.') !== false)
{
$ipconvert = substr($ip,strpos($ip,':FFFF:')+6);
return IPID_IPv4_Embed;
}
// Is it IPv6?
else if(strpos($ip,':') !== false) return IPID_IPv6;
// What the...?
return IPID_INVALID;
}
}
?>
You can use it as long as you don't try and resell it and you keep the header as is.
<?php
//This function returns True if visitor IP is allowed.
//Otherwise it returns False.
function CheckAccess()
{
//allowed IP. Change it to your static IP
$allowedip = '127.0.0.1';
$ip = $_SERVER['REMOTE_ADDR'];
return ($ip == $allowedip);
}
Proxy servers should set the X-Forwarded-For HTTP header, which you could look up with $_SERVER['HTTP_X_FORWARDED_FOR']. Otherwise $_SERVER['REMOTE_ADDR'] can be used to get the IP address. As others have noted, both of these can be easily spoofed, and there is no requirement for proxies to set the X-Forwarded-For request header.
There is a ip2long() function in PHP which give you an integer to use for range checking.
To get the location of an IP address you need a lookup table which maps IP address ranges to approximate geographical locations (such lookup tables are typically not free). There are many services which offer IP address geolocation, some of which are mentioned here and here.

How to generate excerpt with most searched words in PHP?

Here is an excerpt function:
function excerpt($text, $phrase, $radius = 100, $ending = "...") {
270 if (empty($text) or empty($phrase)) {
271 return $this->truncate($text, $radius * 2, $ending);
272 }
273
274 $phraseLen = strlen($phrase);
275 if ($radius < $phraseLen) {
276 $radius = $phraseLen;
277 }
278
279 $pos = strpos(strtolower($text), strtolower($phrase));
280
281 $startPos = 0;
282 if ($pos > $radius) {
283 $startPos = $pos - $radius;
284 }
285
286 $textLen = strlen($text);
287
288 $endPos = $pos + $phraseLen + $radius;
289 if ($endPos >= $textLen) {
290 $endPos = $textLen;
291 }
292
293 $excerpt = substr($text, $startPos, $endPos - $startPos);
294 if ($startPos != 0) {
295 $excerpt = substr_replace($excerpt, $ending, 0, $phraseLen);
296 }
297
298 if ($endPos != $textLen) {
299 $excerpt = substr_replace($excerpt, $ending, -$phraseLen);
300 }
301
302 return $excerpt;
303 }
Its drawback is that it doesn't try to match as many searched words as possible,which only matches once by default.
How to implement the desired one?
The code listed here thus far has not worked for me so I spent some time thinking of an algorithm to implement. What I have now works decently, and it does not appear to be a performance problem - feel free to test. Results are not as snazzy Google's snippets as there is no detection for where sentences start and end. I could add this but it'd be that much more complicated and I'd have to throw in the towel on doing this in a single function. Already its getting crowded and could be better coded if, for example, the object manipulations were abstracted to methods.
Anyhow, this is what I have and it should be a good start. The most dense excerpt is determined and the resulting string will approximately be the span you have specified. I urge some testing of this code as I have not done a thorough job of it. Surely there are problematic cases to be found.
I also encourage anyone to improve on this algorithm, or simply the code to execute it.
Enjoy.
// string excerpt(string $text, string $phrase, int $span = 100, string $delimiter = '...')
// parameters:
// $text - text to be searched
// $phrase - search string
// $span - approximate length of the excerpt
// $delimiter - string to use as a suffix and/or prefix if the excerpt is from the middle of a text
function excerpt($text, $phrase, $span = 100, $delimiter = '...') {
$phrases = preg_split('/\s+/', $phrase);
$regexp = '/\b(?:';
foreach ($phrases as $phrase) {
$regexp .= preg_quote($phrase, '/') . '|';
}
$regexp = substr($regexp, 0, -1) . ')\b/i';
$matches = array();
preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
$matches = $matches[0];
$nodes = array();
foreach ($matches as $match) {
$node = new stdClass;
$node->phraseLength = strlen($match[0]);
$node->position = $match[1];
$nodes[] = $node;
}
if (count($nodes) > 0) {
$clust = new stdClass;
$clust->nodes[] = array_shift($nodes);
$clust->length = $clust->nodes[0]->phraseLength;
$clust->i = 0;
$clusters = new stdClass;
$clusters->data = array($clust);
$clusters->i = 0;
foreach ($nodes as $node) {
$lastClust = $clusters->data[$clusters->i];
$lastNode = $lastClust->nodes[$lastClust->i];
$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
if ($lastClust->length + $addedLength <= $span) {
$lastClust->nodes[] = $node;
$lastClust->length += $addedLength;
$lastClust->i += 1;
} else {
if ($addedLength > $span) {
$newClust = new stdClass;
$newClust->nodes = array($node);
$newClust->i = 0;
$newClust->length = $node->phraseLength;
$clusters->data[] = $newClust;
$clusters->i += 1;
} else {
$newClust = clone $lastClust;
while ($newClust->length + $addedLength > $span) {
$shiftedNode = array_shift($newClust->nodes);
if ($shiftedNode === null) {
break;
}
$newClust->i -= 1;
$removedLength = $shiftedNode->phraseLength;
if (isset($newClust->nodes[0])) {
$removedLength += $newClust->nodes[0]->position - $shiftedNode->position;
}
$newClust->length -= $removedLength;
}
if ($newClust->i < 0) {
$newClust->i = 0;
}
$newClust->nodes[] = $node;
$newClust->length += $addedLength;
$clusters->data[] = $newClust;
$clusters->i += 1;
}
}
}
$bestClust = $clusters->data[0];
$bestClustSize = count($bestClust->nodes);
foreach ($clusters->data as $clust) {
$newClustSize = count($clust->nodes);
if ($newClustSize > $bestClustSize) {
$bestClust = $clust;
$bestClustSize = $newClustSize;
}
}
$clustLeft = $bestClust->nodes[0]->position;
$clustLen = $bestClust->length;
$padding = round(($span - $clustLen)/2);
$clustLeft -= $padding;
if ($clustLeft < 0) {
$clustLen += $clustLeft*-1 + $padding;
$clustLeft = 0;
} else {
$clustLen += $padding*2;
}
} else {
$clustLeft = 0;
$clustLen = $span;
}
$textLen = strlen($text);
$prefix = '';
$suffix = '';
if (!ctype_space($text[$clustLeft]) && isset($text[$clustLeft-1]) && !ctype_space($text[$clustLeft-1])) {
while (!ctype_space($text[$clustLeft])) {
$clustLeft += 1;
}
$prefix = $delimiter;
}
$lastChar = $clustLeft + $clustLen;
if (!ctype_space($text[$lastChar]) && isset($text[$lastChar+1]) && !ctype_space($text[$lastChar+1])) {
while (!ctype_space($text[$lastChar])) {
$lastChar -= 1;
}
$suffix = $delimiter;
$clustLen = $lastChar - $clustLeft;
}
if ($clustLeft > 0) {
$prefix = $delimiter;
}
if ($clustLeft + $clustLen < $textLen) {
$suffix = $delimiter;
}
return $prefix . trim(substr($text, $clustLeft, $clustLen+1)) . $suffix;
}
I came up with the below to generate excerpts. You can see the code here https://github.com/boyter/php-excerpt It works by finding all the locations of the matching words, then takes an excerpt based on which words are the closest. In theory this does not sound very good but in practice it works very well.
Its actually very close to how Sphider (for the record it lives in searchfuncs.php from line 529 to 566) generates its snippets. I think the below is much easier to read and is without bugs which exist in Sphider. It also does not use regular expressions which makes it a bit faster then other methods I have used.
I blogged about it here http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/
<?php
// find the locations of each of the words
// Nothing exciting here. The array_unique is required
// unless you decide to make the words unique before passing in
function _extractLocations($words, $fulltext) {
$locations = array();
foreach($words as $word) {
$wordlen = strlen($word);
$loc = stripos($fulltext, $word);
while($loc !== FALSE) {
$locations[] = $loc;
$loc = stripos($fulltext, $word, $loc + $wordlen);
}
}
$locations = array_unique($locations);
sort($locations);
return $locations;
}
// Work out which is the most relevant portion to display
// This is done by looping over each match and finding the smallest distance between two found
// strings. The idea being that the closer the terms are the better match the snippet would be.
// When checking for matches we only change the location if there is a better match.
// The only exception is where we have only two matches in which case we just take the
// first as will be equally distant.
function _determineSnipLocation($locations, $prevcount) {
// If we only have 1 match we dont actually do the for loop so set to the first
$startpos = $locations[0];
$loccount = count($locations);
$smallestdiff = PHP_INT_MAX;
// If we only have 2 skip as its probably equally relevant
if(count($locations) > 2) {
// skip the first as we check 1 behind
for($i=1; $i < $loccount; $i++) {
if($i == $loccount-1) { // at the end
$diff = $locations[$i] - $locations[$i-1];
}
else {
$diff = $locations[$i+1] - $locations[$i];
}
if($smallestdiff > $diff) {
$smallestdiff = $diff;
$startpos = $locations[$i];
}
}
}
$startpos = $startpos > $prevcount ? $startpos - $prevcount : 0;
return $startpos;
}
// 1/6 ratio on prevcount tends to work pretty well and puts the terms
// in the middle of the extract
function extractRelevant($words, $fulltext, $rellength=300, $prevcount=50, $indicator='...') {
$textlength = strlen($fulltext);
if($textlength <= $rellength) {
return $fulltext;
}
$locations = _extractLocations($words, $fulltext);
$startpos = _determineSnipLocation($locations,$prevcount);
// if we are going to snip too much...
if($textlength-$startpos < $rellength) {
$startpos = $startpos - ($textlength-$startpos)/2;
}
$reltext = substr($fulltext, $startpos, $rellength);
// check to ensure we dont snip the last word if thats the match
if( $startpos + $rellength < $textlength) {
$reltext = substr($reltext, 0, strrpos($reltext, " ")).$indicator; // remove last word
}
// If we trimmed from the front add ...
if($startpos != 0) {
$reltext = $indicator.substr($reltext, strpos($reltext, " ") + 1); // remove first word
}
return $reltext;
}
?>
function excerpt($text, $phrase, $radius = 100, $ending = "...") {
$phraseLen = strlen($phrase);
if ($radius < $phraseLen) {
$radius = $phraseLen;
}
$phrases = explode (' ',$phrase);
foreach ($phrases as $phrase) {
$pos = strpos(strtolower($text), strtolower($phrase));
if ($pos > -1) break;
}
$startPos = 0;
if ($pos > $radius) {
$startPos = $pos - $radius;
}
$textLen = strlen($text);
$endPos = $pos + $phraseLen + $radius;
if ($endPos >= $textLen) {
$endPos = $textLen;
}
$excerpt = substr($text, $startPos, $endPos - $startPos);
if ($startPos != 0) {
$excerpt = substr_replace($excerpt, $ending, 0, $phraseLen);
}
if ($endPos != $textLen) {
$excerpt = substr_replace($excerpt, $ending, -$phraseLen);
}
return $excerpt; }
I could not contact erisco, so I am posting his function with multiple fixes (most importantly multibyte support).
/**
* #param string $text text to be searched
* #param string $phrase search string
* #param int $span approximate length of the excerpt
* #param string $delimiter string to use as a suffix and/or prefix if the excerpt is from the middle of a text
*
* #return string
*/
public static function excerpt($text, $phrase, $span = 100, $delimiter = '...')
{
$phrases = preg_split('/\s+/u', $phrase);
$regexp = '/\b(?:';
foreach($phrases as $phrase)
{
$regexp.= preg_quote($phrase, '/') . '|';
}
$regexp = mb_substr($regexp, 0, -1) .')\b/ui';
$matches = [];
preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
$matches = $matches[0];
$nodes = [];
foreach($matches as $match)
{
$node = new stdClass;
$node->phraseLength = mb_strlen($match[0]);
$node->position = mb_strlen(substr($text, 0, $match[1])); // calculate UTF-8 position (#see https://bugs.php.net/bug.php?id=67487)
$nodes[] = $node;
}
if(count($nodes) > 0)
{
$clust = new stdClass;
$clust->nodes[] = array_shift($nodes);
$clust->length = $clust->nodes[0]->phraseLength;
$clust->i = 0;
$clusters = new stdClass;
$clusters->data =
[
$clust
];
$clusters->i = 0;
foreach($nodes as $node)
{
$lastClust = $clusters->data[$clusters->i];
$lastNode = $lastClust->nodes[$lastClust->i];
$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
if($lastClust->length + $addedLength <= $span)
{
$lastClust->nodes[] = $node;
$lastClust->length+= $addedLength;
$lastClust->i++;
}
else
{
if($addedLength > $span)
{
$newClust = new stdClass;
$newClust->nodes =
[
$node
];
$newClust->i = 0;
$newClust->length = $node->phraseLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
else
{
$newClust = clone $lastClust;
while($newClust->length + $addedLength > $span)
{
$shiftedNode = array_shift($newClust->nodes);
if($shiftedNode === null)
{
break;
}
$newClust->i--;
$removedLength = $shiftedNode->phraseLength;
if(isset($newClust->nodes[0]))
{
$removedLength+= $newClust->nodes[0]->position - $shiftedNode->position;
}
$newClust->length-= $removedLength;
}
if($newClust->i < 0)
{
$newClust->i = 0;
}
$newClust->nodes[] = $node;
$newClust->length+= $addedLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
}
}
$bestClust = $clusters->data[0];
$bestClustSize = count($bestClust->nodes);
foreach($clusters->data as $clust)
{
$newClustSize = count($clust->nodes);
if($newClustSize > $bestClustSize)
{
$bestClust = $clust;
$bestClustSize = $newClustSize;
}
}
$clustLeft = $bestClust->nodes[0]->position;
$clustLen = $bestClust->length;
$padding = intval(round(($span - $clustLen) / 2));
$clustLeft-= $padding;
if($clustLeft < 0)
{
$clustLen+= $clustLeft * -1 + $padding;
$clustLeft = 0;
}
else
{
$clustLen+= $padding * 2;
}
}
else
{
$clustLeft = 0;
$clustLen = $span;
}
$textLen = mb_strlen($text);
$prefix = '';
$suffix = '';
if($clustLeft > 0 && !ctype_space(mb_substr($text, $clustLeft, 1))
&& !ctype_space(mb_substr($text, $clustLeft - 1, 1)))
{
$clustLeft++;
while(!ctype_space(mb_substr($text, $clustLeft, 1)))
{
$clustLeft++;
}
$prefix = $delimiter;
}
$lastChar = $clustLeft + $clustLen;
if($lastChar < $textLen && !ctype_space(mb_substr($text, $lastChar, 1))
&& !ctype_space(mb_substr($text, $lastChar + 1, 1)))
{
$lastChar--;
while(!ctype_space(mb_substr($text, $lastChar, 1)))
{
$lastChar--;
}
$suffix = $delimiter;
$clustLen = $lastChar - $clustLeft;
}
if($clustLeft > 0)
{
$prefix = $delimiter;
}
if($clustLeft + $clustLen < $textLen)
{
$suffix = $delimiter;
}
return $prefix . trim(mb_substr($text, $clustLeft, $clustLen + 1)) . $suffix;
}

Categories