Restricting access to a site using IP address - php

I would like to know whether there is a way of restricting the users of a site such that they can only access the inner pages of a site if they are within a certain range of IP addresses or a certain network?
The current PHP scripts I am getting cant differentiate the real IPs from the Proxies?
Thanks

i wouldn’t restrict on ip addresses. as you said, you can’t know if it’s a proxy. furthermore, ip addresses can be easily spoofed.

Have you considered using apache .htaccess files for that?
IP restriction with htaccess

You can try out a script I created that allows very advanced IP rules. I coded it years ago so I apologize in advance for the current shape of it.
Edit:
If you're looking for an "&" operator in the syntax don't bother. I forgot to add it when I coded this and looking back at this script now makes me cringe at the thought of touching it again.
<?php
##############################################################
# IP Expression Class #
# Easy IP-based Access Restrictions #
# Change Log: #
# - Added Range and limited IPv6 support #
# - Changed name from IPAR to IPEX #
# #
##############################################################
# Example Rules: #
# 69.[10-20].[^50].* #
# 69.*.[1-5 | 10-20 |^30].* #
# 60.12.2.* #
# 127.* #
# 69.1.1.1-70.1.1.1 <-- This is a range #
# #
# Usage: #
# Ipex::IsMatch($rule, $ip); #
# #
# [range] - Defines a range for a section of the IP #
# | - OR token. IP can match this range/number #
# ^ - NOT token. IP can not match this range/number #
# x-y - Defines a range from x to y #
# x - Exactly match x (x = a hex or dec number) #
# * - Match any number #
# #
#----------===============================-------------------#
# [ Written by Chris Tarquini ] #
#----------===============================-------------------#
##############################################################
define('IPR_DENY', false);
define('IPR_ALLOW', true);
define('IPR_ERR_MISMATCH',-1);
define('IPR_ERR_RANGE_MISMATCH',-2);
define('IPR_ERR_RANGE_INVALID',-3);
define('IPR_ERR_INVALID_RULE',-4);
class IPEX
{
const TOKEN_RANGE_BEGIN = '[';
const TOKEN_RANGE_END = ']';
const TOKEN_WILDCARD = '*';
const TOKEN_RANGE_SPLIT = '-';
const TOKEN_OR = '|';
const TOKEN_NOT = '^';
const DEBUG_MODE = TRUE;
private static function trace($err){if(self::DEBUG_MODE) echo "$err\r\n";}
private static function FixRule($rule,$count = 4, $split='.')
{
$rule = explode($split,$rule);
$filler = 0;
$size = sizeof($rule);
for($i = 0; $i < $count; $i++)
{
if($i > $size) { $rule[] = $filler; $size++;}
else if(empty($rule[$i])) { $filler = self::TOKEN_WILDCARD; $rule[$i] = $filler;}
}
return $rule;
}
private static function FixIP($rule,$count = 4, $split='.')
{
$rule = explode($split,$rule);
$size = sizeof($rule);
for($i = 0; $i < $count; $i++)
{
if($i > $size) { $rule[] = 0; $size++;}
else if(empty($rule[$i])) { $rule[$i] = 0;}
}
return $rule;
}
private static function GetIpType(&$ip)
{
$mode = IPID::Identify($ip,$newip);
if($mode == IPID_IPv4_Embed) { $ip = $newip; return IPID_IPv4;}
return $mode;
}
private static function FixIPRange(&$start, &$stop)
{
$count = 4; $split = '.';
if(self::GetIpType($start) == IPID_IPv6) {$count = 8; $split = ':';}
$q = 0;
while($q < 2)
{
$filler = ($q == 0) ? 0 : 255;
$arr = explode($split,($q == 0) ? $start : $stop);
$size = sizeof($arr);
for($i = 0; $i < $count; $i++)
{
if($i > $size){ $arr[] = $filler; $size++;}
else if(empty($arr[$i])){ $arr[$i] = $filler; }
}
if($q == 0) $start = implode($split, $arr);
else $stop = implode($split,$arr);
$q++;
}
}
public static function IsInRange($start, $stop, $ip)
{
//Sorry guys we only support IPv4 for this ;(
self::FixIPRange($start,$stop);
self::trace("fixed: start = $start, stop = $stop");
$start = ip2long($start); $stop = ip2long($stop);
$ip = ip2long($ip);
self::trace("start = $start, stop = $stop, ip = $ip");
return ($ip >= $start && $ip <= $stop);
}
public static function IsAllowed($rule, $ip){return self::IsMatch($rule,$ip);}
public static function IsMatch($rule,$ip)
{
$mode = self::GetIpType($ip);
self::trace("ip type: $mode");
if(strpos($rule, self::TOKEN_RANGE_SPLIT) !== false && strpos($rule,self::TOKEN_RANGE_BEGIN) === false)
{
self::trace("ip range mode");
$test = explode(self::TOKEN_RANGE_SPLIT, $rule);
self::trace("range size: ".sizeof($test));
print_r($test);
if(sizeof($test) != 2) return IPR_ERR_RANGE_INVALID;
$start = $test[0]; $end = $test[1];
if(empty($start) || empty($end)) return IPR_ERR_RANGE_INVALID;
self::trace("range start: $start, range stop: $end");
$rm1 = (self::IsHex($start)) ? $mode : self::GetIpType($start);
$rm2 = (self::IsHex($end)) ? $mode : self::GetIpType($end);
self::trace("range types: $rm1, $rm2\r\nip type: $mode");
if($rm1 != $rm2 || $rm1 != $mode) return IPR_ERR_RANGE_MISMATCH;
if($mode == IPID_IPv6) { return IPR_ERR_IPv6_NOTSUPPORTED;}
return self::IsInRange($start,$end,$ip);
}
if(self::GetIpType($rule) != $mode) return IPR_ERR_MISMATCH;
//all is good so far
$count = 4;
$split = '.'; if($mode==IPID_IPv6){$count = 8; $split=':';}
$rule = self::FixRule($rule, $count,$split);
$ip = self::FixIp($ip,$count,$split);
self::trace("ip: ".implode($split,$ip));
self::trace('rule: '.implode($split,$rule));
for($i = 0; $i < $count; $i++)
{
$r = str_replace(' ', '', $rule[$i]);
$ri = false;
if($r == self::TOKEN_WILDCARD) continue;
if($mode == IPPID_IPv6 && self::IsHex($r)) { $ri = hexdec($r);}else if(is_numeric($r)) $ri = $r;
$x = $ip[$i];
if($mode == IPPID_IPv6) $x = hexdec($x);
//* Exact Match *//
self::trace("rule[$i]: $ri");
self::trace("ip[$i]: $x");
if($ri !== false && $ri != $x) return IPR_DENY;
$len = strlen($r);
for($y = 0; $y < $len; $y++)
{
self::trace("y = $y");
if(substr($r, $y,1) == self::TOKEN_RANGE_BEGIN)
{
++$y;
self::trace("found range, y = $y");
$negflag = false;
$start = false;
$stop = false;
$allows = 0;
$denys = 0;
$q = 0;
$c = substr($r,$y,1);
while($c !== false)
{
self::trace("in range, char: $c");
//* Flags *//
$break = false;
$exec = false;
$toggle = false;
$reset = false;
if($c === self::TOKEN_RANGE_END) {$skiphex = true;$break = true; $exec = true; self::trace("found end of range");}
if($c === self::TOKEN_NOT) {if($q > 0){ $toggle = true; $exec = true;} else $negflag = !$negflag; $skiphex =false; self::trace("found TOKEN_NOT");}
if($c === self::TOKEN_OR) { $exec = true; $reset = true;$skiphex=true;self::trace("found TOKEN_OR");}
if($c === self::TOKEN_RANGE_SPLIT){ $skiphex = false;++$q; self::trace("found range split");}
//* Read Hex Tokens *//
if(!$skiphex && self::IsHexChar($c))
{
$n = self::ReadNextHexToken($r,$y);
if($mode == IPID_IPv6) $n = hexdec($n);
if($q == 0) $start = $n;
else if($q == 1) $stop = $n;
--$y; //fixes error
self::trace("parsed number: $n, y = $y");
}
if($reset) {$negflag = false; $start = false; $stop = false; $q = 0;}
if($exec)
{
self::trace("executing: start = $start, stop = $stop, x = $x");
self::trace("negflag = $negflag");
if($stop !== false && $x >= $start && $x <= $stop)
{
if($negflag) { ++$denys; $allows = 0; break;}
else ++$allows;
}
else if($stop === false && $start == $x)
{
if($negflag) { ++$denys; $allows = 0; break;}
else ++$allows;
}
self::trace("exec complete: allows = $allows, denys = $denys");
$q = 0;
}
if($toggle) $negflag = !$negflag;
if($break) break;
++$y;
$c = substr($r,$y,1);
}
if(!$allows) return IPR_DENY;
}
}
}
return IPR_ALLOW;
}
private static function ReadNextHexToken($buff, &$offset, $max = -1)
{
$str = '';
if($max == -1) { $max = strlen($buff);}
for(; $offset < $max; $offset++)
{
$c = substr($buff,$offset, 1);
if(self::IsHexChar($c))
$str .= $c;
else
return $str;
}
return $str;
}
private static function IsHex($x){ $len = strlen($x); for($i = 0; $i < $len; $i++) if(!self::IsHexChar(substr($x,$i,1))) return false; return true;}
private static function IsHexChar($x){self::trace("isHex($x);"); return (in_array(strtoupper($x),array('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F')));
}
}
######################
# IP Identify Class #
#####################
define('IPID_INVALID',false);
define('IPID_IPv4',2);
define('IPID_IPv6',3);
define('IPID_IPv4_Embed',6);
class IPID
{
public static function Identify($ip,&$ipconvert = false)
{
$ip = strtoupper($ip);
$ipconvert = $ip;
// Check if we are IPv4
if(strpos($ip,':') === false && strpos($ip,'.') !== false)
return IPID_IPv4;
//Is it one of those hybrids?
else if(strpos($ip,':FFFF') !== false && strpos($ip,'.') !== false)
{
$ipconvert = substr($ip,strpos($ip,':FFFF:')+6);
return IPID_IPv4_Embed;
}
// Is it IPv6?
else if(strpos($ip,':') !== false) return IPID_IPv6;
// What the...?
return IPID_INVALID;
}
}
?>
You can use it as long as you don't try and resell it and you keep the header as is.

<?php
//This function returns True if visitor IP is allowed.
//Otherwise it returns False.
function CheckAccess()
{
//allowed IP. Change it to your static IP
$allowedip = '127.0.0.1';
$ip = $_SERVER['REMOTE_ADDR'];
return ($ip == $allowedip);
}

Proxy servers should set the X-Forwarded-For HTTP header, which you could look up with $_SERVER['HTTP_X_FORWARDED_FOR']. Otherwise $_SERVER['REMOTE_ADDR'] can be used to get the IP address. As others have noted, both of these can be easily spoofed, and there is no requirement for proxies to set the X-Forwarded-For request header.
There is a ip2long() function in PHP which give you an integer to use for range checking.
To get the location of an IP address you need a lookup table which maps IP address ranges to approximate geographical locations (such lookup tables are typically not free). There are many services which offer IP address geolocation, some of which are mentioned here and here.

Related

automatic number increase in php based web form [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
So I have created an html form which then posts the results to a php file that overlays them on a PDF and then emails that PDF to myself and the email that was put in the form. All I want to do now is find a simple way to make it so that the PDF includes a sequential number.
For example: When the form is filled out for the first time the number 0001 is input automatically into the PDF and 0002 for the second time and so on.
Is there an easy PHP function to accomplish this?
Essentially I am creating an online invoicing form so when I do service calls I can create an invoice on the spot from a web browser which is then emailed to my office and the client.
Any help would be greatly appreciated.
For an incrementing number, you could keep a number in a database and then extract it, add 1 to it, use it, and then put it back in the DB for next time, but this seems complicated. Somebody in the comments mentioned using the timestamp, which would be done like so:
$invoicenumber = time(); //This number will always be unique
The time function works like so (copied from w3schools):
The time() function returns the current time in the number of seconds since the Unix Epoch (January 1 1970 00:00:00 GMT).
Since actual seconds can only go up (increment), this number will never be the same twice.
I hope this is helpful.
-Edit
You can also display this date/time in a readable format like so:
$time = time();
echo date("Y-m-d H:i:s",$time);
-Edit 2
If you want an incrementing number, you basically need a very simple database to save it, which might be as simple as a table called invoices, with a column called invoicenumber, which stores your invoice number in it. You could / probably should use this to store other invoice information in it too, so you'd have each invoice number saved (which means we want to only get the highest one)
Then your code would look like this, for each time you want to use it:
Firstly you'd have a database information file (settings.php or something similar) with your database definitions in it, which might look like this:
define('DB_HOST', 'localhost');
define('DB_USER', 'db_username');
define('DB_PASS', 'db_password');
define('DB_NAME', 'database_name');
Your code would look like this:
//Establish a mysql connection
$mysqli = new mysqli(DB_HOST, DB_USER, DB_PASS, DB_NAME);
//Set up a query to get the highest number
$query = "SELECT invoicenumber FROM invoices ORDER BY invoicenumber DESC LIMIT 1";
//Get the result
$result = $mysqli->query($query);
$row = $result->fetch_assoc();
//If we have a record
if($row){
//New invoice number
$invoicenumber = $row['invoicenumber']++;
//Else (database is empty, so start at the beginning)
}else{
$invoicenumber = 1;
}
//Now we have our invoice number, so do whatever you want with it
/**
* Code here to use the number
* */
//Now we wanna add the new invoice to the database, so
/**
* Add any other info to this statement if you want.
* If any of it is user submitted data, be sure to use prepared statements
* (just look at php.net's documentation on prepared statements)
* w3schools also has some nice tutorials on how to safely insert stuff
* in to a database, so check it all out :)
* */
$query = "INSERT INTO invoices(invoicenumber) VALUES($invoicenumber)";
//Execute the query
if($mysqli->query($query)){
//Show success
echo "Invoice $invoicenumber has been added to the database.";
}else{
//Show error
echo "Unfortunately we could not add invoice $invoicenumber to the database.";
}
//Now we can clear up our resources
$stmt->free_result(); $stmt->close(); $mysqli->close();
Please note: this is a very basic example. Yours will have additions and enhanced security if you are using user submitted data, so please do your homework and make sure that you fully understand each line of this code before you proceed to use it.
I do exactly the same with patient accession numbers on patient reports.
include('/home/user/php/class.pdf2text.php');
$p2t = new PDF2Text();
$p2t->setFilename($pdf);
$p2t->decodePDF();
$data = $p2t->output();
$len = strlen($data);
$pos = strpos($data,$accession);
if (pos){
$in .= "$accession,";
$checked++;
}
else{
$missingPDF += 1;echo "\n<p> <span class='bold red'>INCORRECT ACCESSION NUMBER c=$row[0] p=$row[1]</span>\n";
}
if ($checked > 0){
$in = substr($in,0,-1) . ')';
$sql = "UPDATE `Patient` SET `PDF`=1 WHERE $in";
}
pdf2text.php
class PDF2Text {
// Some settings
var $multibyte = 4; // Use setUnicode(TRUE|FALSE)
var $convertquotes = ENT_QUOTES; // ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
var $showprogress = true; // TRUE if you have problems with time-out
// Variables
var $filename = '';
var $decodedtext = '';
function setFilename($filename) {
// Reset
$this->decodedtext = '';
$this->filename = $filename;
}
function output($echo = false) {
if($echo) echo $this->decodedtext;
else return $this->decodedtext;
}
function setUnicode($input) {
// 4 for unicode. But 2 should work in most cases just fine
if($input == true) $this->multibyte = 4;
else $this->multibyte = 2;
}
function decodePDF() {
// Read the data from pdf file
$infile = #file_get_contents($this->filename, FILE_BINARY);
if (empty($infile))
return "";
// Get all text data.
$transformations = array();
$texts = array();
// Get the list of all objects.
preg_match_all("#obj[\n|\r](.*)endobj[\n|\r]#ismU", $infile . "endobj\r", $objects);
$objects = #$objects[1];
// Select objects with streams.
for ($i = 0; $i < count($objects); $i++) {
$currentObject = $objects[$i];
// Prevent time-out
#set_time_limit ();
if($this->showprogress) {
// echo ". ";
flush(); ob_flush();
}
// Check if an object includes data stream.
if (preg_match("#stream[\n|\r](.*)endstream[\n|\r]#ismU", $currentObject . "endstream\r", $stream )) {
$stream = ltrim($stream[1]);
// Check object parameters and look for text data.
$options = $this->getObjectOptions($currentObject);
if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"])) )
// if ( $options["Image"] && $options["Subtype"] )
// if (!(empty($options["Length1"]) && empty($options["Subtype"])) )
continue;
// Hack, length doesnt always seem to be correct
unset($options["Length"]);
// So, we have text data. Decode it.
$data = $this->getDecodedStream($stream, $options);
if (strlen($data)) {
if (preg_match_all("#BT[\n|\r](.*)ET[\n|\r]#ismU", $data . "ET\r", $textContainers)) {
$textContainers = #$textContainers[1];
$this->getDirtyTexts($texts, $textContainers);
} else
$this->getCharTransformations($transformations, $data);
}
}
}
// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);
}
function decodeAsciiHex($input) {
$output = "";
$isOdd = true;
$isComment = false;
for($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++) {
$c = $input[$i];
if($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
switch($c) {
case '\0': case '\t': case '\r': case '\f': case '\n': case ' ': break;
case '%':
$isComment = true;
break;
default:
$code = hexdec($c);
if($code === 0 && $c != '0')
return "";
if($isOdd)
$codeHigh = $code;
else
$output .= chr($codeHigh * 16 + $code);
$isOdd = !$isOdd;
break;
}
}
if($input[$i] != '>')
return "";
if($isOdd)
$output .= chr($codeHigh * 16);
return $output;
}
function decodeAscii85($input) {
$output = "";
$isComment = false;
$ords = array();
for($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++) {
$c = $input[$i];
if($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
continue;
if ($c == '%') {
$isComment = true;
continue;
}
if ($c == 'z' && $state === 0) {
$output .= str_repeat(chr(0), 4);
continue;
}
if ($c < '!' || $c > 'u')
return "";
$code = ord($input[$i]) & 0xff;
$ords[$state++] = $code - ord('!');
if ($state == 5) {
$state = 0;
for ($sum = 0, $j = 0; $j < 5; $j++)
$sum = $sum * 85 + $ords[$j];
for ($j = 3; $j >= 0; $j--)
$output .= chr($sum >> ($j * 8));
}
}
if ($state === 1)
return "";
elseif ($state > 1) {
for ($i = 0, $sum = 0; $i < $state; $i++)
$sum += ($ords[$i] + ($i == $state - 1)) * pow(85, 4 - $i);
for ($i = 0; $i < $state - 1; $i++) {
try {
if(false == ($o = chr($sum >> ((3 - $i) * 8)))) {
throw new Exception('Error');
}
$output .= $o;
} catch (Exception $e) { /*Dont do anything*/ }
}
}
return $output;
}
function decodeFlate($data) {
return #gzuncompress($data);
}
function getObjectOptions($object) {
$options = array();
if (preg_match("#<<(.*)>>#ismU", $object, $options)) {
$options = explode("/", $options[1]);
#array_shift($options);
$o = array();
for ($j = 0; $j < #count($options); $j++) {
$options[$j] = preg_replace("#\s+#", " ", trim($options[$j]));
if (strpos($options[$j], " ") !== false) {
$parts = explode(" ", $options[$j]);
$o[$parts[0]] = $parts[1];
} else
$o[$options[$j]] = true;
}
$options = $o;
unset($o);
}
return $options;
}
function getDecodedStream($stream, $options) {
$data = "";
if (empty($options["Filter"]))
$data = $stream;
else {
$length = !empty($options["Length"]) ? $options["Length"] : strlen($stream);
$_stream = substr($stream, 0, $length);
foreach ($options as $key => $value) {
if ($key == "ASCIIHexDecode")
$_stream = $this->decodeAsciiHex($_stream);
elseif ($key == "ASCII85Decode")
$_stream = $this->decodeAscii85($_stream);
elseif ($key == "FlateDecode")
$_stream = $this->decodeFlate($_stream);
elseif ($key == "Crypt") { // TO DO
}
}
$data = $_stream;
}
return $data;
}
function getDirtyTexts(&$texts, $textContainers) {
for ($j = 0; $j < count($textContainers); $j++) {
if (preg_match_all("#\[(.*)\]\s*TJ[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
elseif (preg_match_all("#T[d|w|m|f]\s*(\(.*\))\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
elseif (preg_match_all("#T[d|w|m|f]\s*(\[.*\])\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, array(#implode('', $parts[1])));
}
}
function getCharTransformations(&$transformations, $stream) {
preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
for ($j = 0; $j < count($chars); $j++) {
$count = $chars[$j][1];
$current = explode("\n", trim($chars[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{2,4})>\s+<([0-9a-f]{4,512})>#is", trim($current[$k]), $map))
$transformations[str_pad($map[1], 4, "0")] = $map[2];
}
}
for ($j = 0; $j < count($ranges); $j++) {
$count = $ranges[$j][1];
$current = explode("\n", trim($ranges[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+<([0-9a-f]{4})>#is", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$_from = hexdec($map[3]);
for ($m = $from, $n = 0; $m <= $to; $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", $_from + $n);
} elseif (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+\[(.*)\]#ismU", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$parts = preg_split("#\s+#", trim($map[3]));
for ($m = $from, $n = 0; $m <= $to && $n < count($parts); $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", hexdec($parts[$n]));
}
}
}
}
function getTextUsingTransformations($texts, $transformations) {
$document = "";
for ($i = 0; $i < count($texts); $i++) {
$isHex = false;
$isPlain = false;
$hex = "";
$plain = "";
for ($j = 0; $j < strlen($texts[$i]); $j++) {
$c = $texts[$i][$j];
switch($c) {
case "<":
$hex = "";
$isHex = true;
$isPlain = false;
break;
case ">":
$hexs = str_split($hex, $this->multibyte); // 2 or 4 (UTF8 or ISO)
for ($k = 0; $k < count($hexs); $k++) {
$chex = str_pad($hexs[$k], 4, "0"); // Add tailing zero
if (isset($transformations[$chex]))
$chex = $transformations[$chex];
$document .= html_entity_decode("&#x".$chex.";");
}
$isHex = false;
break;
case "(":
$plain = "";
$isPlain = true;
$isHex = false;
break;
case ")":
$document .= $plain;
$isPlain = false;
break;
case "\\":
$c2 = $texts[$i][$j + 1];
if (in_array($c2, array("\\", "(", ")"))) $plain .= $c2;
elseif ($c2 == "n") $plain .= '\n';
elseif ($c2 == "r") $plain .= '\r';
elseif ($c2 == "t") $plain .= '\t';
elseif ($c2 == "b") $plain .= '\b';
elseif ($c2 == "f") $plain .= '\f';
elseif ($c2 >= '0' && $c2 <= '9') {
$oct = preg_replace("#[^0-9]#", "", substr($texts[$i], $j + 1, 3));
$j += strlen($oct) - 1;
$plain .= html_entity_decode("&#".octdec($oct).";", $this->convertquotes);
}
$j++;
break;
default:
if ($isHex)
$hex .= $c;
elseif ($isPlain)
$plain .= $c;
break;
}
}
$document .= "\n";
}
return $document;
}
}

Given an IP address and a Subnet, how do I calculate the range of IPs using php

I have the following code snippet and im trying to detect if an IP address falls into a certain range. Most of the time I have the IP address and the subnet but sometimes I just have the IP address only.
<?php
function CalculateRange($IP,$Subnet=""){
//Calculate subnetmax
if ($Subnet==""){
}
//Calculate max IP
return $MaxIP;
}
//--- IP range
$IPRange = array (
array("address"=>"196.201.26.0","subnet"=>"255.255.252.0"),
array("address"=>"196.202.43.0","subnet"=>"255.255.0.0"),
array("address"=>"196.203.44.0","subnet"=>"255.255.128.0"),
);
//Display MaxIP for each IP and Subnet
foreach ($IPRange as $pair) {
echo "<p>";
echo "For IP:{$pair['address']} with Subnet:{$pair['subnet']}.";
echo "MaxIP is ";
echo CalculateRange($pair['address'],$pair['subnet']);
echo "</p>";
}
?>
My question is how do I calculate MaxIP for the IP and Subnet combo?
<?php
$subnet = "255.255.252.0"; // provide your subnet here
$ip = "191.168.31.0"; // provide your ip here
$ip_blocks = explode(".", $ip);
$mask_blocks = explode('.', $subnet);
$ip_class = '';
if($ip_blocks[0] > 0 && $ip_blocks[0] <= 126) {
$ip_class = 'a';
} elseif($ip_blocks[0] >= 128 && $ip_blocks[0] <= 191) {
$ip_class = 'b';
} elseif($ip_blocks[0] >= 192 && $ip_blocks[0] <= 223) {
$ip_class = 'c';
} elseif($ip_blocks[0] >= 224 && $ip_blocks[0] <= 239) {
$ip_class = 'd';
} elseif($ip_blocks[0] >= 240 && $ip_blocks[0] <= 255) {
$ip_class = 'e';
} else {
die('wrong ip');
}
$subnet_class= '';
if($mask_blocks[0]=='255' && $mask_blocks[1] < '255') {
$subnet_class = 'a';
} elseif($mask_blocks[0]=='255' && $mask_blocks[1]=='255' && $mask_blocks[2] < '255') {
$subnet_class = 'b';
} else {
$subnet_class = 'c';
}
echo 'subnet class: '.$subnet_class.'<br />';
$min_ip = '';
$max_ip = '';
if($subnet_class=='b') {
if($ip_class == $subnet_class) {
$min_ip = "$ip_blocks[0].$ip_blocks[1].1";
$max_ip = "$ip_blocks[0].$ip_blocks[1].255";
echo 'minimum: '.$min_ip.' - maximum: '.$max_ip;
} else {
echo 'Error! IP does not lie in this range. ';
exit;
}
}
// you can continue for other subnet masks as well.. as this is only for subnet for class b
?>
The below worked perfectly
function isInRange() {
//--- IP range
$IPRange = array (
array("address"=>"197.207.35.238","subnet"=>"255.255.0.0"),
array("address"=>"41.207.44.232","subnet"=>"255.255.10.0"),
array("address"=>"40.207.44.250","subnet"=>"255.255.0.0")
);
foreach ($IPRange as $pair) {
//Check if we have subnet mask
if ($pair['subnet']!='') {
// simple example
$bcast = ip2long($_SERVER['REMOTE_ADDR']);
$smask = ip2long($pair['subnet']);
$nmask = $bcast & $smask;
$SourceStartIP = long2ip($nmask);
if($SourceStartIP==$pair['address']) {
//This is in range
return true;
}
} else {
//--- The header matches something in the fixed list
if ($pair['address'] == $_SERVER['REMOTE_ADDR']) {
return true;
}
}
}
return false;
}

Recursive function loops infinitely [closed]

This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable to the worldwide audience of the internet. For help making this question more broadly applicable, visit the help center.
Closed 9 years ago.
I am working on a personal project, a bot that plays connect 4. So something is seriously wrong with the recursive function I have written to manage the bots move. No errors are thrown, and any debug info I can be shown does not tell me anything useful. Additionally, I am sure that I have not overflown my stack and that my php.ini file is good to go. This script just runs (consumes a sane amount of memory) and never returns. Only about 2400 function calls should be happening, so this script should return after only a second or two. This has stumped me for days. I believe there is something I have not properly researched. Additonally I should mention that the game board is a simple 2D array to simulate a 7 x 7 board. ai_move_helper is the recursive function and I just cannot figure out why it will not function correctly.
// this class is a code igniter library
class ConnectFour
{
public function __construct()
{
// these are expensive opperations this will give the server enough time
set_time_limit(0);
ini_set('memory_limit', '2048M');
}
public $board_width = 7;
public $board_length = 7;
public $game_array = array();
public $depth_limit = 3;
// this function gets a human made move from a CI controller ($game board)
// and returns the board after the new AI move is applied
public function ai_player_move($game_array, $active_players_move)
{
$this->game_array = $game_array;
$this->game_array = $this->calculate_ai_move($active_players_move);
return $this->game_array;
}
public function calculate_ai_move($active_players_move)
{
$move_weight_array = array();
$prime_game_board = array();
// we hardcast the active player because at this point we know it is the AI
// here we also have to prime the move computer
for($q = 0; $q < $this->board_length; $q++)
{
// MAGIC NUMBERS are the active players!!!
$prime_game_board[] = $this->apply_prime_move($q, 2, $this->game_array);
$move_weight_array[] = $this->ai_move_helper($prime_game_board[$q], 2, 0);
}
//choose your move
for($u = 0; $u < $this->board_length; $u)
{
if($move_weight_array[$u][0] == 1)
{
return $prime_game_board[$u];
}
}
// otherwise return a random acceptable move
$random = rand(0, 6);
return $prime_game_board[$random];
}
public function ai_move_helper($game_board, $active_player, $depth)
{
// build the object that will be needed at this level of recusion
$depth = $depth + 1;
$score_object = new stdClass;
$move_array = array();
$game_boards_generated_at_this_level = array();
$new_game_boards_generated_at_this_level = array();
$return_end_state_detected = 0;
$score_agregate = array();
if($this->depth_limit < $depth)
{
$score_agregate[0] = 0;
$score_agregate[1] = 0;
return $score_agregate;
}
$active_player = ($active_player == 1) ? 2 : 1;
// check for possible moves
for($i=0; $i < $this->board_width; $i++)
{
// calculate all of the possible recusions (all of the next moves)
$game_boards_generated_at_this_level[$i] = $this->apply_ai_move($i, $active_player, $game_board);
// this is the recusive level
$score_agregate = $this->ai_move_helper($game_boards_generated_at_this_level[$i]->game_board, $active_player, $depth);
}
// check to see if there are more moves of if it is time to return
foreach($game_boards_generated_at_this_level as $game_state)
{
//compute the agragate of the scores only for player two (AI)
if($active_player == 2)
{
$score_agregate[0] = $score_agregate[0] + $game_state->score_array[0];
$score_agregate[1] = $score_agregate[1] + $game_state->score_array[1];
}
}
return $score_agregate;
}
public function apply_ai_move($move, $active_players_move, $board_to_use)
{
$board_for_function = array();
$location_of_new_pieces = 0;
$return_object = new stdClass;
// this makes sure that this function is being called with the right board
if(!empty($board_to_use))
{
$board_for_function = $board_to_use;
} else {
$board_for_function = $this->game_array;
}
// check that this move is possible
if(!$this->move_possible($move, $board_for_function))
{
$return_object->game_board = NULL;
$return_object->score_array = NULL;
return $return_object;
}
// this part of the function applies a valid move
foreach($board_for_function[$move] as $column_key => $column_space)
{
// check if you are at the edge of an empty row
if(!array_key_exists(($location_of_new_pieces + 1), $board_for_function[$move]) && $column_space == '_')
{
$board_for_function[$move][$location_of_new_pieces] = ($active_players_move == 1) ? 'x' : 'o';
break;
}
// check if the next place has stuff in it too
if($column_space != '_')
{
// check the edge of the board to make sure that exists
if(array_key_exists(($location_of_new_pieces - 1), $board_for_function))
{
$board_for_function[$move][$location_of_new_pieces - 1] = ($active_players_move == 1) ? 'x' : 'o';
break;
} else {
echo "well fuck...1"; exit;
}
}
$location_of_new_pieces++;
}
$return_object->game_board = $board_for_function;
// now check if this state is a win loss or draw
$test_for_complete = $this->check_for_winner_or_draw($board_for_function, $active_players_move);
// this is a draw
if($test_for_complete == -1)
{
$return_object->score_array = array(0, 1);
} else if($test_for_complete > 3) {
$return_object->score_array = array(1, 0);
} else {
$return_object->score_array = array(0, 0);
}
return $return_object;
}
public function apply_prime_move($move, $active_players_move, $board_to_use)
{
$location_of_new_pieces = 0;
foreach($board_to_use[$move] as $column_key => $column_space)
{
// check if you are at the edge of an empty row
if(!array_key_exists(($location_of_new_pieces + 1), $board_to_use[$move]) && $column_space == '_')
{
$board_to_use[$move][$location_of_new_pieces] = ($active_players_move == 1) ? 'x' : 'o';
break;
}
// check if the next place has stuff in it too
if($column_space != '_')
{
// check the edge of the board to make sure that exists
if(array_key_exists(($location_of_new_pieces - 1), $board_to_use))
{
$board_to_use[$move][$location_of_new_pieces - 1] = ($active_players_move == 1) ? 'x' : 'o';
break;
} else {
echo "well fuck...1"; exit;
}
}
$location_of_new_pieces++;
}
return $board_to_use;
}
public function move_possible($move, $game_board)
{
// check that this move is not going to fall out of the board
if($game_board[$move][0] != "_")
{
return FALSE;
} else {
return TRUE;
}
}
public function check_for_winner_or_draw($game_array, $active_player_move)
{
$present_possible_winner = "";
$count_to_win = 0;
$game_not_a_draw = FALSE;
for($i = 0; $i < $this->board_length; $i++)
{
for($j = 0; $j < $this->board_width; $j++)
{
// start checking for a winner
if($game_array[$i][$j] != "_")
{
$present_possible_winner = $game_array[$i][$j];
// check for a winner horizontally
for($x = 0; $x < 4; $x++)
{
if($j+$x < $this->board_width)
{
if($game_array[$i][$j+$x] == $present_possible_winner)
{
$count_to_win = $count_to_win + 1;
}
}
}
if($count_to_win > 3)
{
return $present_possible_winner; // this player has won
} else {
$count_to_win = 0;
}
// check for a winner horizontally
for($y = 0; $y < 4; $y++)
{
if($i+$y < $this->board_width)
{
if($game_array[$i+$y][$j] == $present_possible_winner)
{
$count_to_win = $count_to_win + 1;
}
}
}
if($count_to_win > 3)
{
return $present_possible_winner; // this player has won
} else {
$count_to_win = 0;
}
// check for a winner up to down diagonal
for($z = 0; $z < 4; $z++)
{
if(($i+$z < $this->board_width) && ($j+$z < $this->board_length))
{
if($game_array[$i+$z][$j+$z] == $present_possible_winner)
{
$count_to_win = $count_to_win + 1;
}
}
}
if($count_to_win > 3)
{
return $present_possible_winner; // this player has won
} else {
$count_to_win = 0;
}
// check for a winner down to up diagonal
for($w = 0; $w < 4; $w++)
{
if(($i+$w < $this->board_width) && ($j-$w >= 0))
{
if($game_array[$i+$w][$j-$w] == $present_possible_winner)
{
$count_to_win = $count_to_win + 1;
}
}
}
if($count_to_win > 3)
{
return $present_possible_winner; // this player has won
} else {
$count_to_win = 0;
}
}
}
}
// check for a drawed game and return accordingly
for($i = 0; $i < $this->board_length; $i++)
{
for($j = 0; $j < $this->board_width; $j++)
{
if($game_array[$i][$j] == "_")
{
$game_not_a_draw = TRUE;
}
}
}
if(!$game_not_a_draw)
{
return -1;
}
return 0;
}
// this is a private debugging function that I wrote for this script
public function debug($value = NULL, $name = NULL, $exit = NULL)
{
if(!empty($name))
{
echo $name . "<br />";
}
echo "<pre>";
var_dump($value);
echo "</pre>";
if($exit)
{
exit;
}
}
}
Well I found it: The calculate ai move had an infinite loop in it...
//choose your move
for($u = 0; $u < $this->board_length; $u)
{
if($move_weight_array[$u][0] == 1)
{
return $prime_game_board[$u];
}
}
Should be
//choose your move
for($u = 0; $u < $this->board_length; $u++)
{
if($move_weight_array[$u][0] == 1)
{
return $prime_game_board[$u];
}
}
Back to work for me...

Interested in making a PHP script that increments IP address from defined starting address to defined ending address

I know I can do this easily by converting the IP addresses to decimal notation first using PHP built in functions like up2long and long2ip. I just want to be able to do the same using the standard IP address notation as an exercise.
The problem I am thinking goes like this: Given an starting IP address, say 192.168.1.100, and an ending IP address, say 201.130.22.10. Make the program that prints all the address numbers in that range (192.168.1.100, 192.168.1.101, … , 201.130.22.9, 201.130.22.10).
I was thinking that maybe the way to go would be to make a nested for loop inside a while condition until the first octet of the starting address matches the first octet of the ending address. Then execute the same block of code for the second octet and so on until the program reaches the ending address and finished.
I just started learning to program recently so it is quite possible that my of thinking and or writing code is far from elegant. If you were to this, how would you do it?
Something like this:
<?php
// works only for valid range
$start_ip = '10.0.0.1';
$end_ip = '10.0.20.1';
$start_arr = explode('.',$start_ip);
$end_arr = explode('.',$end_ip);
while($start_arr <= $end_arr)
{
echo implode('.',$start_arr) . '<br>';
$start_arr[3]++;
if($start_arr[3] == 256)
{
$start_arr[3] = 0;
$start_arr[2]++;
if($start_arr[2] == 256)
{
$start_arr[2] = 0;
$start_arr[1]++;
if($start_arr[1] == 256)
{
$start_arr[1] = 0;
$start_arr[0]++;
}
}
}
}
?>
This is much less complicated:
<?php
// works only for valid range
$start_ip = ip2long('10.0.0.1');
$end_ip = ip2long('10.0.20.1');
while($start_ip <= $end_ip){
echo long2ip($start_ip).'<br>';
$start_ip++;
}
?>
function getInBetweenIPs($startIP,$endIP){
$subIPS = array();
$start_ip = ip2long($startIP);
$end_ip = ip2long($endIP);
while($start_ip <= $end_ip){
$subIPS[]=long2ip($start_ip);
$start_ip++;
}
return $subIPS;
}
Increment (Add to):
<?php
function ipinc($i): string {
$i = explode(".", $i);
$a = $i[0];
$b = $i[1];
$c = $i[2];
$d = $i[3];
$d++;
if ($d > 255) {
$d = 0;
$c++;
}
if ($c > 255) {
$c = 0;
$b++;
}
if ($b > 255) {
$b = 0;
$a++;
}
if ($a > 255) {
die("IPv4 Range Exceeded");
}
return "$a.$b.$c.$d";
}
?>
Decrement (Take from):
<?php
function ipdec($i) {
$i = explode(".", $i);
$a = $i[0];
$b = $i[1];
$c = $i[2];
$d = $i[3];
$d--;
if ($d < 0) {
$d = 255;
$c--;
}
if ($c < 0) {
$c = 255;
$b--;
}
if ($b < 0) {
$b = 255;
$a--;
}
if ($a < 0) {
die("IPv4 Range Exceeded");
}
return "$a.$b.$c.$d";
}
?>
To test both functions, you can write a for loop to generate approximately 16 million IP addresses back and forth, you can pipe the output to a file and store the results that way.
<?php
require 'function.ipinc.php';
require 'function.ipdec.php';
print("Increment:\n");
for ($i = 0, $ip = "100.0.0.0"; $i <= 16777215; $i++) {
print("$ip\n");
$ip = ipinc($ip);
}
print("----------\n");
print("Decrement:\n");
for ($i = 0, $ip = "100.255.255.255"; $i <= 16777215; $i++) {
print("$ip\n");
$ip = ipdec($ip);
}
print("----------\n");
die("Finished!\n");
?>
If you don't like assigning values through variable declarations, modify the functions so you can use pass by reference instead.

How to generate excerpt with most searched words in PHP?

Here is an excerpt function:
function excerpt($text, $phrase, $radius = 100, $ending = "...") {
270 if (empty($text) or empty($phrase)) {
271 return $this->truncate($text, $radius * 2, $ending);
272 }
273
274 $phraseLen = strlen($phrase);
275 if ($radius < $phraseLen) {
276 $radius = $phraseLen;
277 }
278
279 $pos = strpos(strtolower($text), strtolower($phrase));
280
281 $startPos = 0;
282 if ($pos > $radius) {
283 $startPos = $pos - $radius;
284 }
285
286 $textLen = strlen($text);
287
288 $endPos = $pos + $phraseLen + $radius;
289 if ($endPos >= $textLen) {
290 $endPos = $textLen;
291 }
292
293 $excerpt = substr($text, $startPos, $endPos - $startPos);
294 if ($startPos != 0) {
295 $excerpt = substr_replace($excerpt, $ending, 0, $phraseLen);
296 }
297
298 if ($endPos != $textLen) {
299 $excerpt = substr_replace($excerpt, $ending, -$phraseLen);
300 }
301
302 return $excerpt;
303 }
Its drawback is that it doesn't try to match as many searched words as possible,which only matches once by default.
How to implement the desired one?
The code listed here thus far has not worked for me so I spent some time thinking of an algorithm to implement. What I have now works decently, and it does not appear to be a performance problem - feel free to test. Results are not as snazzy Google's snippets as there is no detection for where sentences start and end. I could add this but it'd be that much more complicated and I'd have to throw in the towel on doing this in a single function. Already its getting crowded and could be better coded if, for example, the object manipulations were abstracted to methods.
Anyhow, this is what I have and it should be a good start. The most dense excerpt is determined and the resulting string will approximately be the span you have specified. I urge some testing of this code as I have not done a thorough job of it. Surely there are problematic cases to be found.
I also encourage anyone to improve on this algorithm, or simply the code to execute it.
Enjoy.
// string excerpt(string $text, string $phrase, int $span = 100, string $delimiter = '...')
// parameters:
// $text - text to be searched
// $phrase - search string
// $span - approximate length of the excerpt
// $delimiter - string to use as a suffix and/or prefix if the excerpt is from the middle of a text
function excerpt($text, $phrase, $span = 100, $delimiter = '...') {
$phrases = preg_split('/\s+/', $phrase);
$regexp = '/\b(?:';
foreach ($phrases as $phrase) {
$regexp .= preg_quote($phrase, '/') . '|';
}
$regexp = substr($regexp, 0, -1) . ')\b/i';
$matches = array();
preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
$matches = $matches[0];
$nodes = array();
foreach ($matches as $match) {
$node = new stdClass;
$node->phraseLength = strlen($match[0]);
$node->position = $match[1];
$nodes[] = $node;
}
if (count($nodes) > 0) {
$clust = new stdClass;
$clust->nodes[] = array_shift($nodes);
$clust->length = $clust->nodes[0]->phraseLength;
$clust->i = 0;
$clusters = new stdClass;
$clusters->data = array($clust);
$clusters->i = 0;
foreach ($nodes as $node) {
$lastClust = $clusters->data[$clusters->i];
$lastNode = $lastClust->nodes[$lastClust->i];
$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
if ($lastClust->length + $addedLength <= $span) {
$lastClust->nodes[] = $node;
$lastClust->length += $addedLength;
$lastClust->i += 1;
} else {
if ($addedLength > $span) {
$newClust = new stdClass;
$newClust->nodes = array($node);
$newClust->i = 0;
$newClust->length = $node->phraseLength;
$clusters->data[] = $newClust;
$clusters->i += 1;
} else {
$newClust = clone $lastClust;
while ($newClust->length + $addedLength > $span) {
$shiftedNode = array_shift($newClust->nodes);
if ($shiftedNode === null) {
break;
}
$newClust->i -= 1;
$removedLength = $shiftedNode->phraseLength;
if (isset($newClust->nodes[0])) {
$removedLength += $newClust->nodes[0]->position - $shiftedNode->position;
}
$newClust->length -= $removedLength;
}
if ($newClust->i < 0) {
$newClust->i = 0;
}
$newClust->nodes[] = $node;
$newClust->length += $addedLength;
$clusters->data[] = $newClust;
$clusters->i += 1;
}
}
}
$bestClust = $clusters->data[0];
$bestClustSize = count($bestClust->nodes);
foreach ($clusters->data as $clust) {
$newClustSize = count($clust->nodes);
if ($newClustSize > $bestClustSize) {
$bestClust = $clust;
$bestClustSize = $newClustSize;
}
}
$clustLeft = $bestClust->nodes[0]->position;
$clustLen = $bestClust->length;
$padding = round(($span - $clustLen)/2);
$clustLeft -= $padding;
if ($clustLeft < 0) {
$clustLen += $clustLeft*-1 + $padding;
$clustLeft = 0;
} else {
$clustLen += $padding*2;
}
} else {
$clustLeft = 0;
$clustLen = $span;
}
$textLen = strlen($text);
$prefix = '';
$suffix = '';
if (!ctype_space($text[$clustLeft]) && isset($text[$clustLeft-1]) && !ctype_space($text[$clustLeft-1])) {
while (!ctype_space($text[$clustLeft])) {
$clustLeft += 1;
}
$prefix = $delimiter;
}
$lastChar = $clustLeft + $clustLen;
if (!ctype_space($text[$lastChar]) && isset($text[$lastChar+1]) && !ctype_space($text[$lastChar+1])) {
while (!ctype_space($text[$lastChar])) {
$lastChar -= 1;
}
$suffix = $delimiter;
$clustLen = $lastChar - $clustLeft;
}
if ($clustLeft > 0) {
$prefix = $delimiter;
}
if ($clustLeft + $clustLen < $textLen) {
$suffix = $delimiter;
}
return $prefix . trim(substr($text, $clustLeft, $clustLen+1)) . $suffix;
}
I came up with the below to generate excerpts. You can see the code here https://github.com/boyter/php-excerpt It works by finding all the locations of the matching words, then takes an excerpt based on which words are the closest. In theory this does not sound very good but in practice it works very well.
Its actually very close to how Sphider (for the record it lives in searchfuncs.php from line 529 to 566) generates its snippets. I think the below is much easier to read and is without bugs which exist in Sphider. It also does not use regular expressions which makes it a bit faster then other methods I have used.
I blogged about it here http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/
<?php
// find the locations of each of the words
// Nothing exciting here. The array_unique is required
// unless you decide to make the words unique before passing in
function _extractLocations($words, $fulltext) {
$locations = array();
foreach($words as $word) {
$wordlen = strlen($word);
$loc = stripos($fulltext, $word);
while($loc !== FALSE) {
$locations[] = $loc;
$loc = stripos($fulltext, $word, $loc + $wordlen);
}
}
$locations = array_unique($locations);
sort($locations);
return $locations;
}
// Work out which is the most relevant portion to display
// This is done by looping over each match and finding the smallest distance between two found
// strings. The idea being that the closer the terms are the better match the snippet would be.
// When checking for matches we only change the location if there is a better match.
// The only exception is where we have only two matches in which case we just take the
// first as will be equally distant.
function _determineSnipLocation($locations, $prevcount) {
// If we only have 1 match we dont actually do the for loop so set to the first
$startpos = $locations[0];
$loccount = count($locations);
$smallestdiff = PHP_INT_MAX;
// If we only have 2 skip as its probably equally relevant
if(count($locations) > 2) {
// skip the first as we check 1 behind
for($i=1; $i < $loccount; $i++) {
if($i == $loccount-1) { // at the end
$diff = $locations[$i] - $locations[$i-1];
}
else {
$diff = $locations[$i+1] - $locations[$i];
}
if($smallestdiff > $diff) {
$smallestdiff = $diff;
$startpos = $locations[$i];
}
}
}
$startpos = $startpos > $prevcount ? $startpos - $prevcount : 0;
return $startpos;
}
// 1/6 ratio on prevcount tends to work pretty well and puts the terms
// in the middle of the extract
function extractRelevant($words, $fulltext, $rellength=300, $prevcount=50, $indicator='...') {
$textlength = strlen($fulltext);
if($textlength <= $rellength) {
return $fulltext;
}
$locations = _extractLocations($words, $fulltext);
$startpos = _determineSnipLocation($locations,$prevcount);
// if we are going to snip too much...
if($textlength-$startpos < $rellength) {
$startpos = $startpos - ($textlength-$startpos)/2;
}
$reltext = substr($fulltext, $startpos, $rellength);
// check to ensure we dont snip the last word if thats the match
if( $startpos + $rellength < $textlength) {
$reltext = substr($reltext, 0, strrpos($reltext, " ")).$indicator; // remove last word
}
// If we trimmed from the front add ...
if($startpos != 0) {
$reltext = $indicator.substr($reltext, strpos($reltext, " ") + 1); // remove first word
}
return $reltext;
}
?>
function excerpt($text, $phrase, $radius = 100, $ending = "...") {
$phraseLen = strlen($phrase);
if ($radius < $phraseLen) {
$radius = $phraseLen;
}
$phrases = explode (' ',$phrase);
foreach ($phrases as $phrase) {
$pos = strpos(strtolower($text), strtolower($phrase));
if ($pos > -1) break;
}
$startPos = 0;
if ($pos > $radius) {
$startPos = $pos - $radius;
}
$textLen = strlen($text);
$endPos = $pos + $phraseLen + $radius;
if ($endPos >= $textLen) {
$endPos = $textLen;
}
$excerpt = substr($text, $startPos, $endPos - $startPos);
if ($startPos != 0) {
$excerpt = substr_replace($excerpt, $ending, 0, $phraseLen);
}
if ($endPos != $textLen) {
$excerpt = substr_replace($excerpt, $ending, -$phraseLen);
}
return $excerpt; }
I could not contact erisco, so I am posting his function with multiple fixes (most importantly multibyte support).
/**
* #param string $text text to be searched
* #param string $phrase search string
* #param int $span approximate length of the excerpt
* #param string $delimiter string to use as a suffix and/or prefix if the excerpt is from the middle of a text
*
* #return string
*/
public static function excerpt($text, $phrase, $span = 100, $delimiter = '...')
{
$phrases = preg_split('/\s+/u', $phrase);
$regexp = '/\b(?:';
foreach($phrases as $phrase)
{
$regexp.= preg_quote($phrase, '/') . '|';
}
$regexp = mb_substr($regexp, 0, -1) .')\b/ui';
$matches = [];
preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
$matches = $matches[0];
$nodes = [];
foreach($matches as $match)
{
$node = new stdClass;
$node->phraseLength = mb_strlen($match[0]);
$node->position = mb_strlen(substr($text, 0, $match[1])); // calculate UTF-8 position (#see https://bugs.php.net/bug.php?id=67487)
$nodes[] = $node;
}
if(count($nodes) > 0)
{
$clust = new stdClass;
$clust->nodes[] = array_shift($nodes);
$clust->length = $clust->nodes[0]->phraseLength;
$clust->i = 0;
$clusters = new stdClass;
$clusters->data =
[
$clust
];
$clusters->i = 0;
foreach($nodes as $node)
{
$lastClust = $clusters->data[$clusters->i];
$lastNode = $lastClust->nodes[$lastClust->i];
$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
if($lastClust->length + $addedLength <= $span)
{
$lastClust->nodes[] = $node;
$lastClust->length+= $addedLength;
$lastClust->i++;
}
else
{
if($addedLength > $span)
{
$newClust = new stdClass;
$newClust->nodes =
[
$node
];
$newClust->i = 0;
$newClust->length = $node->phraseLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
else
{
$newClust = clone $lastClust;
while($newClust->length + $addedLength > $span)
{
$shiftedNode = array_shift($newClust->nodes);
if($shiftedNode === null)
{
break;
}
$newClust->i--;
$removedLength = $shiftedNode->phraseLength;
if(isset($newClust->nodes[0]))
{
$removedLength+= $newClust->nodes[0]->position - $shiftedNode->position;
}
$newClust->length-= $removedLength;
}
if($newClust->i < 0)
{
$newClust->i = 0;
}
$newClust->nodes[] = $node;
$newClust->length+= $addedLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
}
}
$bestClust = $clusters->data[0];
$bestClustSize = count($bestClust->nodes);
foreach($clusters->data as $clust)
{
$newClustSize = count($clust->nodes);
if($newClustSize > $bestClustSize)
{
$bestClust = $clust;
$bestClustSize = $newClustSize;
}
}
$clustLeft = $bestClust->nodes[0]->position;
$clustLen = $bestClust->length;
$padding = intval(round(($span - $clustLen) / 2));
$clustLeft-= $padding;
if($clustLeft < 0)
{
$clustLen+= $clustLeft * -1 + $padding;
$clustLeft = 0;
}
else
{
$clustLen+= $padding * 2;
}
}
else
{
$clustLeft = 0;
$clustLen = $span;
}
$textLen = mb_strlen($text);
$prefix = '';
$suffix = '';
if($clustLeft > 0 && !ctype_space(mb_substr($text, $clustLeft, 1))
&& !ctype_space(mb_substr($text, $clustLeft - 1, 1)))
{
$clustLeft++;
while(!ctype_space(mb_substr($text, $clustLeft, 1)))
{
$clustLeft++;
}
$prefix = $delimiter;
}
$lastChar = $clustLeft + $clustLen;
if($lastChar < $textLen && !ctype_space(mb_substr($text, $lastChar, 1))
&& !ctype_space(mb_substr($text, $lastChar + 1, 1)))
{
$lastChar--;
while(!ctype_space(mb_substr($text, $lastChar, 1)))
{
$lastChar--;
}
$suffix = $delimiter;
$clustLen = $lastChar - $clustLeft;
}
if($clustLeft > 0)
{
$prefix = $delimiter;
}
if($clustLeft + $clustLen < $textLen)
{
$suffix = $delimiter;
}
return $prefix . trim(mb_substr($text, $clustLeft, $clustLen + 1)) . $suffix;
}

Categories