Detect Browser Language in PHP - php

I use the following PHP script as index for my website.
This script should include a specific page depending on the browser's language (automatically detected).
This script does not work well with all browsers, so it always includes index_en.php for any detected language (the cause of the problem is most probably an issue with some Accept-Language header not being considered).
Could you please suggest me a more robust solution?
<?php
// Open session var
session_start();
// views: 1 = first visit; >1 = second visit
// Detect language from user agent browser
function lixlpixel_get_env_var($Var)
{
if(empty($GLOBALS[$Var]))
{
$GLOBALS[$Var]=(!empty($GLOBALS['_SERVER'][$Var]))?
$GLOBALS['_SERVER'][$Var] : (!empty($GLOBALS['HTTP_SERVER_VARS'][$Var])) ? $GLOBALS['HTTP_SERVER_VARS'][$Var]:'';
}
}
function lixlpixel_detect_lang()
{
// Detect HTTP_ACCEPT_LANGUAGE & HTTP_USER_AGENT.
lixlpixel_get_env_var('HTTP_ACCEPT_LANGUAGE');
lixlpixel_get_env_var('HTTP_USER_AGENT');
$_AL=strtolower($GLOBALS['HTTP_ACCEPT_LANGUAGE']);
$_UA=strtolower($GLOBALS['HTTP_USER_AGENT']);
// Try to detect Primary language if several languages are accepted.
foreach($GLOBALS['_LANG'] as $K)
{
if(strpos($_AL, $K)===0)
return $K;
}
// Try to detect any language if not yet detected.
foreach($GLOBALS['_LANG'] as $K)
{
if(strpos($_AL, $K)!==false)
return $K;
}
foreach($GLOBALS['_LANG'] as $K)
{
//if(preg_match("/[[( ]{$K}[;,_-)]/",$_UA)) // matching other letters (create an error for seo spyder)
return $K;
}
// Return default language if language is not yet detected.
return $GLOBALS['_DLANG'];
}
// Define default language.
$GLOBALS['_DLANG']='en';
// Define all available languages.
// WARNING: uncomment all available languages
$GLOBALS['_LANG'] = array(
'af', // afrikaans.
'ar', // arabic.
'bg', // bulgarian.
'ca', // catalan.
'cs', // czech.
'da', // danish.
'de', // german.
'el', // greek.
'en', // english.
'es', // spanish.
'et', // estonian.
'fi', // finnish.
'fr', // french.
'gl', // galician.
'he', // hebrew.
'hi', // hindi.
'hr', // croatian.
'hu', // hungarian.
'id', // indonesian.
'it', // italian.
'ja', // japanese.
'ko', // korean.
'ka', // georgian.
'lt', // lithuanian.
'lv', // latvian.
'ms', // malay.
'nl', // dutch.
'no', // norwegian.
'pl', // polish.
'pt', // portuguese.
'ro', // romanian.
'ru', // russian.
'sk', // slovak.
'sl', // slovenian.
'sq', // albanian.
'sr', // serbian.
'sv', // swedish.
'th', // thai.
'tr', // turkish.
'uk', // ukrainian.
'zh' // chinese.
);
// Redirect to the correct location.
// Example Implementation aff var lang to name file
/*
echo 'The Language detected is: '.lixlpixel_detect_lang(); // For Demonstration
echo "<br />";
*/
$lang_var = lixlpixel_detect_lang(); //insert lang var system in a new var for conditional statement
/*
echo "<br />";
echo $lang_var; // print var for trace
echo "<br />";
*/
// Insert the right page iacoording with the language in the browser
switch ($lang_var){
case "fr":
//echo "PAGE DE";
include("index_fr.php");//include check session DE
break;
case "it":
//echo "PAGE IT";
include("index_it.php");
break;
case "en":
//echo "PAGE EN";
include("index_en.php");
break;
default:
//echo "PAGE EN - Setting Default";
include("index_en.php");//include EN in all other cases of different lang detection
break;
}
?>

why dont you keep it simple and clean
<?php
$lang = substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0, 2);
$acceptLang = ['fr', 'it', 'en'];
$lang = in_array($lang, $acceptLang) ? $lang : 'en';
require_once "index_{$lang}.php";
?>

Accept-Language is a list of weighted values (see q parameter). That means just looking at the first language does not mean it’s also the most preferred; in fact, a q value of 0 means not acceptable at all.
So instead of just looking at the first language, parse the list of accepted languages and available languages and find the best match:
// parse list of comma separated language tags and sort it by the quality value
function parseLanguageList($languageList) {
if (is_null($languageList)) {
if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
return array();
}
$languageList = $_SERVER['HTTP_ACCEPT_LANGUAGE'];
}
$languages = array();
$languageRanges = explode(',', trim($languageList));
foreach ($languageRanges as $languageRange) {
if (preg_match('/(\*|[a-zA-Z0-9]{1,8}(?:-[a-zA-Z0-9]{1,8})*)(?:\s*;\s*q\s*=\s*(0(?:\.\d{0,3})|1(?:\.0{0,3})))?/', trim($languageRange), $match)) {
if (!isset($match[2])) {
$match[2] = '1.0';
} else {
$match[2] = (string) floatval($match[2]);
}
if (!isset($languages[$match[2]])) {
$languages[$match[2]] = array();
}
$languages[$match[2]][] = strtolower($match[1]);
}
}
krsort($languages);
return $languages;
}
// compare two parsed arrays of language tags and find the matches
function findMatches($accepted, $available) {
$matches = array();
$any = false;
foreach ($accepted as $acceptedQuality => $acceptedValues) {
$acceptedQuality = floatval($acceptedQuality);
if ($acceptedQuality === 0.0) continue;
foreach ($available as $availableQuality => $availableValues) {
$availableQuality = floatval($availableQuality);
if ($availableQuality === 0.0) continue;
foreach ($acceptedValues as $acceptedValue) {
if ($acceptedValue === '*') {
$any = true;
}
foreach ($availableValues as $availableValue) {
$matchingGrade = matchLanguage($acceptedValue, $availableValue);
if ($matchingGrade > 0) {
$q = (string) ($acceptedQuality * $availableQuality * $matchingGrade);
if (!isset($matches[$q])) {
$matches[$q] = array();
}
if (!in_array($availableValue, $matches[$q])) {
$matches[$q][] = $availableValue;
}
}
}
}
}
}
if (count($matches) === 0 && $any) {
$matches = $available;
}
krsort($matches);
return $matches;
}
// compare two language tags and distinguish the degree of matching
function matchLanguage($a, $b) {
$a = explode('-', $a);
$b = explode('-', $b);
for ($i=0, $n=min(count($a), count($b)); $i<$n; $i++) {
if ($a[$i] !== $b[$i]) break;
}
return $i === 0 ? 0 : (float) $i / count($a);
}
$accepted = parseLanguageList($_SERVER['HTTP_ACCEPT_LANGUAGE']);
var_dump($accepted);
$available = parseLanguageList('en, fr, it');
var_dump($available);
$matches = findMatches($accepted, $available);
var_dump($matches);
If findMatches returns an empty array, no match was found and you can fall back on the default language.

The existing answers are a little too verbose so I created this smaller, auto-matching version.
function prefered_language(array $available_languages, $http_accept_language) {
$available_languages = array_flip($available_languages);
$langs;
preg_match_all('~([\w-]+)(?:[^,\d]+([\d.]+))?~', strtolower($http_accept_language), $matches, PREG_SET_ORDER);
foreach($matches as $match) {
list($a, $b) = explode('-', $match[1]) + array('', '');
$value = isset($match[2]) ? (float) $match[2] : 1.0;
if(isset($available_languages[$match[1]])) {
$langs[$match[1]] = $value;
continue;
}
if(isset($available_languages[$a])) {
$langs[$a] = $value - 0.1;
}
}
arsort($langs);
return $langs;
}
And the sample usage:
//$_SERVER["HTTP_ACCEPT_LANGUAGE"] = 'en-us,en;q=0.8,es-cl;q=0.5,zh-cn;q=0.3';
// Languages we support
$available_languages = array("en", "zh-cn", "es");
$langs = prefered_language($available_languages, $_SERVER["HTTP_ACCEPT_LANGUAGE"]);
/* Result
Array
(
[en] => 0.8
[es] => 0.4
[zh-cn] => 0.3
)*/
Full gist source here

The official way to handle this is using the PECL HTTP library. Unlike some answers here, this correctly handles the language priorities (q-values), partial language matches and will return the closest match, or when there are no matches it falls back to the first language in your array.
PECL HTTP:
http://pecl.php.net/package/pecl_http
How to use:
http://php.net/manual/fa/function.http-negotiate-language.php
$supportedLanguages = [
'en-US', // first one is the default/fallback
'fr',
'fr-FR',
'de',
'de-DE',
'de-AT',
'de-CH',
];
// Returns the negotiated language
// or the default language (i.e. first array entry) if none match.
$language = http_negotiate_language($supportedLanguages, $result);

The problem with the selected answer above is that the user may have their first choice set as a language that's not in the case structure, but one of their other language choices are set. You should loop until you find a match.
This is a super simple solution that works better. Browsers return the languages in order of preference, so that simplifies the problem. While the language designator can be more than two characters (e.g. - "EN-US"), typically the first two are sufficient. In the following code example I'm looking for a match from a list of known languages my program is aware of.
$known_langs = array('en','fr','de','es');
$user_pref_langs = explode(',', $_SERVER['HTTP_ACCEPT_LANGUAGE']);
foreach($user_pref_langs as $idx => $lang) {
$lang = substr($lang, 0, 2);
if (in_array($lang, $known_langs)) {
echo "Preferred language is $lang";
break;
}
}
I hope you find this a quick and simple solution that you can easily use in your code. I've been using this in production for quite a while.

Try this one:
#########################################################
# Copyright © 2008 Darrin Yeager #
# https://www.dyeager.org/ #
# Licensed under BSD license. #
# https://www.dyeager.org/downloads/license-bsd.txt #
#########################################################
function getDefaultLanguage() {
if (isset($_SERVER["HTTP_ACCEPT_LANGUAGE"]))
return parseDefaultLanguage($_SERVER["HTTP_ACCEPT_LANGUAGE"]);
else
return parseDefaultLanguage(NULL);
}
function parseDefaultLanguage($http_accept, $deflang = "en") {
if(isset($http_accept) && strlen($http_accept) > 1) {
# Split possible languages into array
$x = explode(",",$http_accept);
foreach ($x as $val) {
#check for q-value and create associative array. No q-value means 1 by rule
if(preg_match("/(.*);q=([0-1]{0,1}.\d{0,4})/i",$val,$matches))
$lang[$matches[1]] = (float)$matches[2];
else
$lang[$val] = 1.0;
}
#return default language (highest q-value)
$qval = 0.0;
foreach ($lang as $key => $value) {
if ($value > $qval) {
$qval = (float)$value;
$deflang = $key;
}
}
}
return strtolower($deflang);
}
https://www.dyeager.org/blog/2008/10/getting-browser-default-language-php.html

Unfortunately, none of the answers to this question takes into account some valid HTTP_ACCEPT_LANGUAGE such as:
q=0.8,en-US;q=0.5,en;q=0.3: having the q priority value at first place.
ZH-CN: old browsers that capitalise (wrongly) the whole langcode.
*: that basically say "serve whatever language you have".
After a comprehensive test with thousands of different Accept-Languages that reached my server, this is my language detection method:
define('SUPPORTED_LANGUAGES', ['en', 'es']);
function detect_language($fallback='en') {
foreach (preg_split('/[;,]/', $_SERVER['HTTP_ACCEPT_LANGUAGE']) as $sub) {
if (substr($sub, 0, 2) == 'q=') continue;
if (strpos($sub, '-') !== false) $sub = explode('-', $sub)[0];
if (in_array(strtolower($sub), SUPPORTED_LANGUAGES)) return $sub;
}
return $fallback;
}

The following script is a modified version of Xeoncross's code (thank you for that Xeoncross) that falls-back to a default language setting if no languages match the supported ones, or if a match is found it replaces the default language setting with a new one according to the language priority.
In this scenario the user's browser is set in order of priority to Spanish, Dutch, US English and English and the application supports English and Dutch only with no regional variations and English is the default language. The order of the values in the "HTTP_ACCEPT_LANGUAGE" string is not important if for some reason the browser does not order the values correctly.
$supported_languages = array("en","nl");
$supported_languages = array_flip($supported_languages);
var_dump($supported_languages); // array(2) { ["en"]=> int(0) ["nl"]=> int(1) }
$http_accept_language = $_SERVER["HTTP_ACCEPT_LANGUAGE"]; // es,nl;q=0.8,en-us;q=0.5,en;q=0.3
preg_match_all('~([\w-]+)(?:[^,\d]+([\d.]+))?~', strtolower($http_accept_language), $matches, PREG_SET_ORDER);
$available_languages = array();
foreach ($matches as $match)
{
list($language_code,$language_region) = explode('-', $match[1]) + array('', '');
$priority = isset($match[2]) ? (float) $match[2] : 1.0;
$available_languages[][$language_code] = $priority;
}
var_dump($available_languages);
/*
array(4) {
[0]=>
array(1) {
["es"]=>
float(1)
}
[1]=>
array(1) {
["nl"]=>
float(0.8)
}
[2]=>
array(1) {
["en"]=>
float(0.5)
}
[3]=>
array(1) {
["en"]=>
float(0.3)
}
}
*/
$default_priority = (float) 0;
$default_language_code = 'en';
foreach ($available_languages as $key => $value)
{
$language_code = key($value);
$priority = $value[$language_code];
if ($priority > $default_priority && array_key_exists($language_code,$supported_languages))
{
$default_priority = $priority;
$default_language_code = $language_code;
var_dump($default_priority); // float(0.8)
var_dump($default_language_code); // string(2) "nl"
}
}
var_dump($default_language_code); // string(2) "nl"

Quick and simple:
$language = trim(substr( strtok(strtok($_SERVER['HTTP_ACCEPT_LANGUAGE'], ','), ';'), 0, 5));
NOTE:
The first language code is what is being used by the browser, the rest are other languages the user has setup in the browser.
Some languages have a region code, eg. en-GB, others just have the language code, eg. sk.
If you just want the language and not the region (eg. en, fr, es, etc.), you can use:
$language =substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0, 2);

I think the cleanest way is this!
<?php
$lang = substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0, 2);
$supportedLanguages=['en','fr','gr'];
if(!in_array($lang,$supportedLanguages)){
$lang='en';
}
require("index_".$lang.".php");

There is a method in php-intl extension:
locale_accept_from_http($_SERVER['HTTP_ACCEPT_LANGUAGE'])

FOR LARAVEL USERS, here's a single line of code that returns a very clean collection (or array) of preferred languages:
$langs = Str::of($_SERVER['HTTP_ACCEPT_LANGUAGE'])
->explode(',')
->transform(fn($lang) => Str::substr($lang, 0, 2))
->unique();

All of the above with fallback to 'en':
$lang = substr(explode(',',$_SERVER['HTTP_ACCEPT_LANGUAGE'])[0],0,2)?:'en';
...or with default language fallback and known language array:
function lang( $l = ['en'], $u ){
return $l[
array_keys(
$l,
substr(
explode(
',',
$u ?: $_SERVER['HTTP_ACCEPT_LANGUAGE']
)[0],
0,
2
)
)[0]
] ?: $l[0];
}
One Line:
function lang($l=['en'],$u){return $l[array_keys($l,substr(explode(',',$u?:$_SERVER['HTTP_ACCEPT_LANGUAGE'])[0],0,2))[0]]?:$l[0];}
Examples:
// first known lang is always default
$_SERVER['HTTP_ACCEPT_LANGUAGE'] = 'en-us';
lang(['de']); // 'de'
lang(['de','en']); // 'en'
// manual set accept-language
lang(['de'],'en-us'); // 'de'
lang(['de'],'de-de, en-us'); // 'de'
lang(['en','fr'],'de-de, en-us'); // 'en'
lang(['en','fr'],'fr-fr, en-us'); // 'fr'
lang(['de','en'],'fr-fr, en-us'); // 'de'

Try,
$lang = substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0,2);
if ($lang == 'tr') {
include_once('include/language/tr.php');
}elseif ($lang == 'en') {
include_once('include/language/en.php');
}elseif ($lang == 'de') {
include_once('include/language/de.php');
}elseif ($lang == 'fr') {
include_once('include/language/fr.php');
}else{
include_once('include/language/tr.php');
}
Thanks to

Since PHP 5.3.0 there is a Locale class bundled with the php-intl extension which has a method for this:
echo Locale::acceptFromHttp($_SERVER['HTTP_ACCEPT_LANGUAGE']);
or procedural style:
locale_accept_from_http($_SERVER['HTTP_ACCEPT_LANGUAGE']);
https://www.php.net/manual/en/locale.acceptfromhttp.php

I've got this one, which sets a cookie. And as you can see, it first checks if the language is posted by the user. Because browser language not always tells about the user.
<?php
$lang = getenv("HTTP_ACCEPT_LANGUAGE");
$set_lang = explode(',', $lang);
if (isset($_POST['lang']))
{
$taal = $_POST['lang'];
setcookie("lang", $taal);
header('Location: /p/');
}
else
{
setcookie("lang", $set_lang[0]);
echo $set_lang[0];
echo '<br>';
echo $set_lang[1];
header('Location: /p/');
}
?>

Related

PHP: Detect user language and able to change language

I have a website with ability to choose language. And I wanted to make that when user enters first time to the website, php gets his system language and writes to cookie (So user by default every time when he enters time will have same language). But when user want to change website language, he will press a button with chosen language (For example Russian), then website language will be set for russian, and when he will enter website again, he will have russian language.
So far I have this code, but it's really confusing and it doesnt work properly.
HTML:
<a href="index.php?language=en">
<a href="index.php?language=ru">
PHP:
<?php
ini_set('display_errors',1);
error_reporting(E_ALL);
$language = substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0, 2);
if (empty($_COOKIE['language'])){
setcookie('language', $language);
}
if ( !empty($_GET['language']) ) {
$_COOKIE['language'] = $_GET['language'] === 'en' ? 'en' : 'ru';
} else {
switch ($language){
case "ru":
$language = 'ru';
break;
case "en":
$language = 'en';
break;
default:
$language = 'en';
break;
}
}
if ( $_COOKIE['language'] == "en") {
$language = 'en';
} else {
$language = 'ru';
}
$xml = simplexml_load_file("language.xml") or die("Equestria forgot languages");
$s_nav_main = $xml->s_nav_main->$language;
$s_nav_more = $xml->s_nav_more->$language;
$s_nav_bot = $xml->s_nav_bot->$language;
$s_nav_partners = $xml->s_nav_partners->$language;
$s_nav_developer = $xml->s_nav_developer->$language;
$s_aboutus = $xml->s_aboutus->$language;
$s_title = $xml->s_title->$language;
$s_head_title = $xml->s_head_title->$language;
$s_head_info = $xml->s_head_info->$language;
$s_statistics_people = $xml->s_statistics_people->$language;
$s_statistics_online = $xml->s_statistics_online->$language;
$s_statistics_messages = $xml->s_statistics_messages->$language;
$s_why_we_best = $xml->s_why_we_best->$language;
$s_why_we_best_content_title = $xml->s_why_we_best_content_title->$language;
$s_why_we_best_content_info = $xml->s_why_we_best_content_info->$language;
$s_why_we_best_adm_title = $xml->s_why_we_best_adm_title->$language;
$s_why_we_best_adm_info = $xml->s_why_we_best_adm_info->$language;
$s_why_we_best_comfort_title = $xml->s_why_we_best_comfort_title->$language;
$s_why_we_best_comfort_info = $xml->s_why_we_best_comfort_info->$language;
$s_why_we_best_wtf_title = $xml->s_why_we_best_wtf_title->$language;
$s_why_we_best_wtf_info = $xml->s_why_we_best_wtf_info->$language;
$s_trusted_title = $xml->s_trusted_title->$language;
$s_trusted_info = $xml->s_trusted_info->$language;
$s_people_celestia = $xml->s_people_celestia->$language;
$s_people_celestia_comment = $xml->s_people_celestia_comment->$language;
$s_people_luna = $xml->s_people_luna->$language;
$s_people_luna_comment = $xml->s_people_luna_comment->$language;
$s_people_twilight = $xml->s_people_twilight->$language;
$s_people_twilight_comment = $xml->s_people_twilight_comment->$language;
$s_botinfo_info = $xml->s_botinfo_info->$language;
$s_botinfo_more = $xml->s_botinfo_more->$language;
?>
The first place you should look for the users preferred language is the Accept-Language header. Geo-IP lookups are a dangerous and expensive waste of time (at least for determining language). Beyond that, you can set a cookie to override the choices presented by the browser, but there are legal implications around this for websites in Europe.
$avail_lang=array(
'en'=>1,
'fr'=>1,
'de'=>1,
'ru'=>1
);
define("DEFAULT_LANG", 'en');
...
if ($_COOKIE['language'] && isset($avail_lang[$_COOKIE['language']]) {
$use_lang=$_COOKIE['language'];
}
// override with GET if provided
if ($_GET['language'] && isset($avail_lang[$_GET['language']]) {
$use_lang=$_GET['language'];
}
// no language? check browser
if (!$use_lang) {
$request_lang=explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
foreach($request_lang as $i) {
list($lang, $pref)=explode("=", trim($i));
$pref=$pref ? 0.0+$pref : 1.0;
list($lang, $country)=explode("-", $lang);
$pref_lang[$lang]=$pref;
}
rsort($pref_lang);
$use_lang=array_shift(array_intersect_key($pref_lang, $avail_lang));
if (!$use_lang) $use_lang=DEFAULT_LANGUAGE;
}
if (user_accepts_cookies() && $use_lang!=$_COOKIE['language']) {
set_lang_cookie($use_lang);
}
a simple logic can be adopted here -
when a user lands at your website you should track his/her IP address, we can easily get their country using that IP. Then you can easily serve language to them.
Found the way how to do this:
$lang = substr($_SERVER['HTTP_ACCEPT_LANGUAGE'], 0, 2);
if ( !empty($_GET['language']) ) {
$_COOKIE['language'] = $_GET['language'] === 'en' ? 'en' : 'ru';
} elseif (empty($_COOKIE['language'])) {
$_COOKIE['language'] = $lang;
}
setcookie('language', $_COOKIE['language']);
if ( $_COOKIE['language'] == "en") {
$language = 'en';
} else {
$language = 'ru';
}

validating a URL with PHP and returning only the host domain name.

I want to validate a domain name and then return the main domain striped down e.g. http://www.example.co.uk/path/ to example.co.uk
I have made a start with help from various sources and can do this with .com, .net, .org, .info & all the .uk’s.
$targetUrl = 'http://sub.example.uk/test/';
$host = filter(get_domain($targetUrl));
function filter($domain){
if($domain){
$domain_array = explode(".", $domain);
$domain_count = count($domain_array);
$domain_last = end($domain_array);
$domain_first = $domain_array[0];
$domain_second = $domain_array[1];
$domain_second_last = array_slice($domain_array, -2, 1);
$domain_second_last = $domain_second_last[0];
$domain_third_last = array_slice($domain_array, -3, 1);
$domain_third_last = $domain_third_last[0];
// UK Validation
$uk_second = array('ac', 'co', 'gov', 'judiciary', 'ltd', 'me', 'mod', 'net', 'nhs', 'nic', 'org', 'parliament', 'plc', 'police', 'sch');
if($domain_last == 'uk'){
if($domain_count == '2'){
// if domain.uk
return $domain;
}elseif(in_array($domain_second, $uk_second)){
//if domain.$uk_second.uk
return $domain;
}elseif(in_array($domain_second_last, $uk_second)){
// if subdomain on 2 dd.dd.co.uk rename to dd.co.uk
$domain = $domain_third_last.'.'.$domain_second_last.'.'.$domain_last;
return $domain;
}else{
// finaly it must be a dsd.sds.uk so lets remove the subdomain
$domain = $domain_second_last.'.'.$domain_last;
return $domain;
}
}
// END .UK
// SImple Single TLDs
$single_tlds = array('com', 'net', 'org', 'info');
if(in_array($domain_last, $single_tlds)){
if($domain_count == '2'){
// simple is it a ddd.com
return $domain;
}else{
$domain = $domain_second_last.'.'.$domain_last;
return $domain;
}
}
}//if domain
}
function get_domain($domain) {
$domain = strtolower($domain);
if (!filter_var($domain, FILTER_VALIDATE_URL) === false) {
$urlParts = parse_url($domain);
$domain = $urlParts['host'];
$domain = str_ireplace('www.','',$domain);
$original = $domain = strtolower($domain);
if (filter_var($domain, FILTER_VALIDATE_IP)) { return $domain; }
$arr = array_slice(array_filter(explode('.', $domain, 4), function($value){
return $value !== 'www'; }), 0); //rebuild array indexes
if (count($arr) > 2) {
$count = count($arr);
$_sub = explode('.', $count === 4 ? $arr[3] : $arr[2]);
if (count($_sub) === 2) { // two level TLD
$removed = array_shift($arr);
if ($count === 4) // got a subdomain acting as a domain
$removed = array_shift($arr);
}
elseif (count($_sub) === 1){ // one level TLD
$removed = array_shift($arr); //remove the subdomain
if (strlen($_sub[0]) === 2 && $count === 3) // TLD domain must be 2 letters
array_unshift($arr, $removed);
else{
// non country TLD according to IANA
$tlds = array( 'aero', 'arpa', 'asia', 'biz', 'cat', 'com', 'coop', 'edu', 'gov', 'info', 'jobs', 'mil', 'mobi', 'museum', 'name', 'net', 'org', 'post', 'pro', 'tel', 'travel', 'xxx', );
if (count($arr) > 2 && in_array($_sub[0], $tlds) !== false) {//special TLD don't have a country
array_shift($arr);
}
}
}
else { // more than 3 levels, something is wrong
for ($i = count($_sub); $i > 1; $i--)
$removed = array_shift($arr);
}
}
elseif (count($arr) === 2) {
$arr0 = array_shift($arr);
if (strpos(join('.', $arr), '.') === false
&& in_array($arr[0], array('localhost','test','invalid')) === false) // not a reserved domain
{
// seems invalid domain, restore it
array_unshift($arr, $arr0);
}
}
return join('.', $arr);
}
}
It’s just not very scalable I’m going to have to go through all the domain suffixes and add them. I’m sure there must be a simpler way? Would someone be so kind to help out? Maybe some way of loading the list from https://publicsuffix.org/list/public_suffix_list.dat
So, for a list of data and the results I would expect to see are:
http://subdomain.example.co.uk/path/site.php -> example.co.uk
http://subdomain.example.uk/path/site.php -> example.uk
www.subdomain.example.uk/path/site.php -> example.uk
subdomain.example.uk -> example.uk
http://gobble.gobble.notavalidsuffix -> false
The below will validate a URL by stripping the unnecessary URL parameters etc.. from a domain and then pass this string into gethostbyname(). This will then query a DNS server for the given root domain, if successful, you will be presented back with an IP, if not, the same input string will be returned. I have then passed this result into a filter which validates IP strings. If it's successful, it will then return the domain in the format given. Just make sure you are pointing to a DNS provider which will not resolve every DNS lookup...for example, my ISP in the UK automatically resolves every failed DNS lookup with a valid A record which in-turn resolves to web page saying "No Such Webpage". Google DNS works fine so use that if you can.
function validDom($url) {
$newUrl = (filter_var($url, FILTER_VALIDATE_URL)) ? $url : FALSE;
if ($newUrl === FALSE) {
return FALSE;
}
$urlSplit = explode('/', $newUrl);
foreach ($urlSplit as $k=>$v) {
if(substr_count($v, '.') >= 2) {
$newUrl = $v;
}
}
$cleanDomain = substr_replace($newUrl, '', 0, strpos($newUrl, '.')+1);
$chkDNS = gethostbyname($cleanDomain);
if (filter_var($chkDNS, FILTER_VALIDATE_IP) !== FALSE) {
return $cleanDomain;
}
return false;
}
Test Domains
$domainArr = [
'https://www.facebook.com',
'https://www.care.org.uk',
'https://www.facebook.co.uk',
'https://www.google.com/dfsdfsdfsd/sdfsdf',
'https://sub.fsdfsdfsdfsdfsd.co.uk/dfsdfsdf',
'https://www.nhs.uk/dfsdfsdfsdfsd?fgfg=fgfg',
'javascript://comment%0Aalert(1)"hello',
];
foreach($domainArr as $k=>$v) {
var_dump(validDom($v));
echo '<br>';
}
Output:
string(12) "facebook.com"
string(11) "care.org.uk"
string(14) "facebook.co.uk"
string(10) "google.com"
bool(false)
string(6) "nhs.uk"
bool(false)
Edit:
This function will also get around the issue with malicious code bypassing FILTER_VALIDATE_URL due to javascript://comment%0Aalert(1)"hello' not resolving via DNS which ultimately ends in a fail.
The truth is that validating a url in PHP is a complex task.
You could use the built-in parse_url() and filter_var() functions, but as a number of user comments on PHP.net, and even the documentation, point out, they're not very reliable.
For one, they don't support internationalized domain names (URLs containing non-ASCII, e.g. Unicode characters).
Note that the function will only find ASCII URLs to be valid; internationalized domain names (containing non-ASCII characters) will fail.
For another, they pass a lot of false positives. The documentation states:
Beware a valid URL may not specify the HTTP protocol http:// so further validation may be required to determine the URL uses an expected protocol, e.g. ssh:// or mailto:.
They also don't have a list of valid name extensions. This means something like asdf://asdf.asdf gets passed by filter_var. I tried it, and it actually was.
filter_var could also be a potential XSS vulnerability, because it passes something like javascript://comment%0Aalert(1)"hello as valid.
Sorry to be a bearer of bad tidings, but that's the truth. I did spot a number of libraries for validation in PHP which included URLs, but they all still built upon parse_url or filter_var. I'm also not confident regex could the job.
However, (plug time:) I'm working on a PHP library that should be able to achieve what you want, and I hope to get it done in a couple of days. 😊
Here you are:
function filterUrl ($url) {
if (filter_var($url, FILTER_VALIDATE_URL)) {
$host = parse_url($url, PHP_URL_HOST);
$parts = explode('.', $host);
$lastParts = array_slice($parts, -3, 3);
return implode('.', $lastParts);
} else {
return false;
}
}

How to use Locale::lookup with locale from $_SERVER['HTTP_ACCEPT_LANGUAGE']?

I create a function, which should return the best translation depending of user's locale.
The input is an array (from the BDD) of translation of the same text in many languages, the language is stored as the key like following:
$textes = array(2) {
["fr-fr"]=> string(24) "Editer l'item"
["en-en"]=> string(22) "Edit the item"
}
The function returns the item when $textescontains only one item, else it should returns the best translation; I try to deal with intl's Locale function but it always return an empty string:
private static function getMeilleureTraduction(array $textes) {
if (count($textes) === 1) {
return array_pop($textes);
}
// Returns "fr,en;q=0.8,fr-fr;q=0.5,en-us;q=0.3"
$a = $_SERVER['HTTP_ACCEPT_LANGUAGE'];
// Returns "fr"
$b = Locale::acceptFromHttp($_SERVER['HTTP_ACCEPT_LANGUAGE']);
// Returns ""
$c = Locale::lookup(array_keys($textes), $a, true);
// Returns ""
$d = Locale::lookup(array_keys($textes), $b, true);
// return $textes[$bestLocale];
}
So the lookup function doesn't help me because it is not able to make a decision. Do I use it wrong? Or maybe I do not understand php-intl's function's aim, and I should code it my myself?
Finally found a working function HERE.
The function:
<?php
/*
determine which language out of an available set the user prefers most
$available_languages array with language-tag-strings (must be lowercase) that are available
$http_accept_language a HTTP_ACCEPT_LANGUAGE string (read from $_SERVER['HTTP_ACCEPT_LANGUAGE'] if left out)
*/
function prefered_language ($available_languages,$http_accept_language="auto") {
// if $http_accept_language was left out, read it from the HTTP-Header
if ($http_accept_language == "auto") $http_accept_language = isset($_SERVER['HTTP_ACCEPT_LANGUAGE']) ? $_SERVER['HTTP_ACCEPT_LANGUAGE'] : '';
// standard for HTTP_ACCEPT_LANGUAGE is defined under
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
// pattern to find is therefore something like this:
// 1#( language-range [ ";" "q" "=" qvalue ] )
// where:
// language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
// qvalue = ( "0" [ "." 0*3DIGIT ] )
// | ( "1" [ "." 0*3("0") ] )
preg_match_all("/([[:alpha:]]{1,8})(-([[:alpha:]|-]{1,8}))?" .
"(\s*;\s*q\s*=\s*(1\.0{0,3}|0\.\d{0,3}))?\s*(,|$)/i",
$http_accept_language, $hits, PREG_SET_ORDER);
// default language (in case of no hits) is the first in the array
$bestlang = $available_languages[0];
$bestqval = 0;
foreach ($hits as $arr) {
// read data from the array of this hit
$langprefix = strtolower ($arr[1]);
if (!empty($arr[3])) {
$langrange = strtolower ($arr[3]);
$language = $langprefix . "-" . $langrange;
}
else $language = $langprefix;
$qvalue = 1.0;
if (!empty($arr[5])) $qvalue = floatval($arr[5]);
// find q-maximal language
if (in_array($language,$available_languages) && ($qvalue > $bestqval)) {
$bestlang = $language;
$bestqval = $qvalue;
}
// if no direct hit, try the prefix only but decrease q-value by 10% (as http_negotiate_language does)
else if (in_array($langprefix,$available_languages) && (($qvalue*0.9) > $bestqval)) {
$bestlang = $langprefix;
$bestqval = $qvalue*0.9;
}
}
return $bestlang;
}
?>

Why is require_once echoing entire file contents? [closed]

This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable to the worldwide audience of the internet. For help making this question more broadly applicable, visit the help center.
Closed 10 years ago.
I have a class in a file "evalmath.php".
If I require it like this: require_once('evalmath.php'); the entire contents of that file is echoed out to the screen.
If I do it like this, require_once( 'evalmath.php' );, it doesn't.
HUH?
EDIT - SOURCE CODE OF EVALMATH.PHP
<?
/*
================================================================================
EvalMath - PHP Class to safely evaluate math expressions
Copyright (C) 2005 Miles Kaufmann <http://www.twmagic.com/>
================================================================================
NAME
EvalMath - safely evaluate math expressions
SYNOPSIS
<?
include('evalmath.class.php');
$m = new EvalMath;
// basic evaluation:
$result = $m->evaluate('2+2');
// supports: order of operation; parentheses; negation; built-in functions
$result = $m->evaluate('-8(5/2)^2*(1-sqrt(4))-8');
// create your own variables
$m->evaluate('a = e^(ln(pi))');
// or functions
$m->evaluate('f(x,y) = x^2 + y^2 - 2x*y + 1');
// and then use them
$result = $m->evaluate('3*f(42,a)');
?>
DESCRIPTION
Use the EvalMath class when you want to evaluate mathematical expressions
from untrusted sources. You can define your own variables and functions,
which are stored in the object. Try it, it's fun!
METHODS
$m->evalute($expr)
Evaluates the expression and returns the result. If an error occurs,
prints a warning and returns false. If $expr is a function assignment,
returns true on success.
$m->e($expr)
A synonym for $m->evaluate().
$m->vars()
Returns an associative array of all user-defined variables and values.
$m->funcs()
Returns an array of all user-defined functions.
PARAMETERS
$m->suppress_errors
Set to true to turn off warnings when evaluating expressions
$m->last_error
If the last evaluation failed, contains a string describing the error.
(Useful when suppress_errors is on).
AUTHOR INFORMATION
Copyright 2005, Miles Kaufmann.
LICENSE
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1 Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
class EvalMath {
var $suppress_errors = false;
var $last_error = null;
var $v = array('e'=>2.71,'pi'=>3.14); // variables (and constants)
var $f = array(); // user-defined functions
var $vb = array('e', 'pi'); // constants
var $fb = array( // built-in functions
'sin','sinh','arcsin','asin','arcsinh','asinh',
'cos','cosh','arccos','acos','arccosh','acosh',
'tan','tanh','arctan','atan','arctanh','atanh',
'sqrt','abs','ln','log');
function EvalMath() {
// make the variables a little more accurate
$this->v['pi'] = pi();
$this->v['e'] = exp(1);
}
function e($expr) {
return $this->evaluate($expr);
}
function evaluate($expr) {
$this->last_error = null;
$expr = trim($expr);
if (substr($expr, -1, 1) == ';') $expr = substr($expr, 0, strlen($expr)-1); // strip semicolons at the end
//===============
// is it a variable assignment?
if (preg_match('/^\s*([a-z]\w*)\s*=\s*(.+)$/', $expr, $matches)) {
if (in_array($matches[1], $this->vb)) { // make sure we're not assigning to a constant
return $this->trigger("cannot assign to constant '$matches[1]'");
}
if (($tmp = $this->pfx($this->nfx($matches[2]))) === false) return false; // get the result and make sure it's good
$this->v[$matches[1]] = $tmp; // if so, stick it in the variable array
return $this->v[$matches[1]]; // and return the resulting value
//===============
// is it a function assignment?
} elseif (preg_match('/^\s*([a-z]\w*)\s*\(\s*([a-z]\w*(?:\s*,\s*[a-z]\w*)*)\s*\)\s*=\s*(.+)$/', $expr, $matches)) {
$fnn = $matches[1]; // get the function name
if (in_array($matches[1], $this->fb)) { // make sure it isn't built in
return $this->trigger("cannot redefine built-in function '$matches[1]()'");
}
$args = explode(",", preg_replace("/\s+/", "", $matches[2])); // get the arguments
if (($stack = $this->nfx($matches[3])) === false) return false; // see if it can be converted to postfix
for ($i = 0; $i<count($stack); $i++) { // freeze the state of the non-argument variables
$token = $stack[$i];
if (preg_match('/^[a-z]\w*$/', $token) and !in_array($token, $args)) {
if (array_key_exists($token, $this->v)) {
$stack[$i] = $this->v[$token];
} else {
return $this->trigger("undefined variable '$token' in function definition");
}
}
}
$this->f[$fnn] = array('args'=>$args, 'func'=>$stack);
return true;
//===============
} else {
return $this->pfx($this->nfx($expr)); // straight up evaluation, woo
}
}
function vars() {
$output = $this->v;
unset($output['pi']);
unset($output['e']);
return $output;
}
function funcs() {
$output = array();
foreach ($this->f as $fnn=>$dat)
$output[] = $fnn . '(' . implode(',', $dat['args']) . ')';
return $output;
}
//===================== HERE BE INTERNAL METHODS ====================\\
// Convert infix to postfix notation
function nfx($expr) {
$index = 0;
$stack = new EvalMathStack;
$output = array(); // postfix form of expression, to be passed to pfx()
$expr = trim(strtolower($expr));
$ops = array('+', '-', '*', '/', '^', '_');
$ops_r = array('+'=>0,'-'=>0,'*'=>0,'/'=>0,'^'=>1); // right-associative operator?
$ops_p = array('+'=>0,'-'=>0,'*'=>1,'/'=>1,'_'=>1,'^'=>2); // operator precedence
$expecting_op = false; // we use this in syntax-checking the expression
// and determining when a - is a negation
if (preg_match("/[^\w\s+*^\/()\.,-]/", $expr, $matches)) { // make sure the characters are all good
return $this->trigger("illegal character '{$matches[0]}'");
}
while(1) { // 1 Infinite Loop ;)
$op = substr($expr, $index, 1); // get the first character at the current index
// find out if we're currently at the beginning of a number/variable/function/parenthesis/operand
$ex = preg_match('/^([a-z]\w*\(?|\d+(?:\.\d*)?|\.\d+|\()/', substr($expr, $index), $match);
//===============
if ($op == '-' and !$expecting_op) { // is it a negation instead of a minus?
$stack->push('_'); // put a negation on the stack
$index++;
} elseif ($op == '_') { // we have to explicitly deny this, because it's legal on the stack
return $this->trigger("illegal character '_'"); // but not in the input expression
//===============
} elseif ((in_array($op, $ops) or $ex) and $expecting_op) { // are we putting an operator on the stack?
if ($ex) { // are we expecting an operator but have a number/variable/function/opening parethesis?
$op = '*'; $index--; // it's an implicit multiplication
}
// heart of the algorithm:
while($stack->count > 0 and ($o2 = $stack->last()) and in_array($o2, $ops) and ($ops_r[$op] ? $ops_p[$op] < $ops_p[$o2] : $ops_p[$op] <= $ops_p[$o2])) {
$output[] = $stack->pop(); // pop stuff off the stack into the output
}
// many thanks: http://en.wikipedia.org/wiki/Reverse_Polish_notation#The_algorithm_in_detail
$stack->push($op); // finally put OUR operator onto the stack
$index++;
$expecting_op = false;
//===============
} elseif ($op == ')' and $expecting_op) { // ready to close a parenthesis?
while (($o2 = $stack->pop()) != '(') { // pop off the stack back to the last (
if (is_null($o2)) return $this->trigger("unexpected ')'");
else $output[] = $o2;
}
if (preg_match("/^([a-z]\w*)\($/", $stack->last(2), $matches)) { // did we just close a function?
$fnn = $matches[1]; // get the function name
$arg_count = $stack->pop(); // see how many arguments there were (cleverly stored on the stack, thank you)
$output[] = $stack->pop(); // pop the function and push onto the output
if (in_array($fnn, $this->fb)) { // check the argument count
if($arg_count > 1)
return $this->trigger("too many arguments ($arg_count given, 1 expected)");
} elseif (array_key_exists($fnn, $this->f)) {
if ($arg_count != count($this->f[$fnn]['args']))
return $this->trigger("wrong number of arguments ($arg_count given, " . count($this->f[$fnn]['args']) . " expected)");
} else { // did we somehow push a non-function on the stack? this should never happen
return $this->trigger("internal error");
}
}
$index++;
//===============
} elseif ($op == ',' and $expecting_op) { // did we just finish a function argument?
while (($o2 = $stack->pop()) != '(') {
if (is_null($o2)) return $this->trigger("unexpected ','"); // oops, never had a (
else $output[] = $o2; // pop the argument expression stuff and push onto the output
}
// make sure there was a function
if (!preg_match("/^([a-z]\w*)\($/", $stack->last(2), $matches))
return $this->trigger("unexpected ','");
$stack->push($stack->pop()+1); // increment the argument count
$stack->push('('); // put the ( back on, we'll need to pop back to it again
$index++;
$expecting_op = false;
//===============
} elseif ($op == '(' and !$expecting_op) {
$stack->push('('); // that was easy
$index++;
$allow_neg = true;
//===============
} elseif ($ex and !$expecting_op) { // do we now have a function/variable/number?
$expecting_op = true;
$val = $match[1];
if (preg_match("/^([a-z]\w*)\($/", $val, $matches)) { // may be func, or variable w/ implicit multiplication against parentheses...
if (in_array($matches[1], $this->fb) or array_key_exists($matches[1], $this->f)) { // it's a func
$stack->push($val);
$stack->push(1);
$stack->push('(');
$expecting_op = false;
} else { // it's a var w/ implicit multiplication
$val = $matches[1];
$output[] = $val;
}
} else { // it's a plain old var or num
$output[] = $val;
}
$index += strlen($val);
//===============
} elseif ($op == ')') { // miscellaneous error checking
return $this->trigger("unexpected ')'");
} elseif (in_array($op, $ops) and !$expecting_op) {
return $this->trigger("unexpected operator '$op'");
} else { // I don't even want to know what you did to get here
return $this->trigger("an unexpected error occured");
}
if ($index == strlen($expr)) {
if (in_array($op, $ops)) { // did we end with an operator? bad.
return $this->trigger("operator '$op' lacks operand");
} else {
break;
}
}
while (substr($expr, $index, 1) == ' ') { // step the index past whitespace (pretty much turns whitespace
$index++; // into implicit multiplication if no operator is there)
}
}
while (!is_null($op = $stack->pop())) { // pop everything off the stack and push onto output
if ($op == '(') return $this->trigger("expecting ')'"); // if there are (s on the stack, ()s were unbalanced
$output[] = $op;
}
return $output;
}
// evaluate postfix notation
function pfx($tokens, $vars = array()) {
if ($tokens == false) return false;
$stack = new EvalMathStack;
foreach ($tokens as $token) { // nice and easy
// if the token is a binary operator, pop two values off the stack, do the operation, and push the result back on
if (in_array($token, array('+', '-', '*', '/', '^'))) {
if (is_null($op2 = $stack->pop())) return $this->trigger("internal error");
if (is_null($op1 = $stack->pop())) return $this->trigger("internal error");
switch ($token) {
case '+':
$stack->push($op1+$op2); break;
case '-':
$stack->push($op1-$op2); break;
case '*':
$stack->push($op1*$op2); break;
case '/':
if ($op2 == 0) return $this->trigger("division by zero");
$stack->push($op1/$op2); break;
case '^':
$stack->push(pow($op1, $op2)); break;
}
// if the token is a unary operator, pop one value off the stack, do the operation, and push it back on
} elseif ($token == "_") {
$stack->push(-1*$stack->pop());
// if the token is a function, pop arguments off the stack, hand them to the function, and push the result back on
} elseif (preg_match("/^([a-z]\w*)\($/", $token, $matches)) { // it's a function!
$fnn = $matches[1];
if (in_array($fnn, $this->fb)) { // built-in function:
if (is_null($op1 = $stack->pop())) return $this->trigger("internal error");
$fnn = preg_replace("/^arc/", "a", $fnn); // for the 'arc' trig synonyms
if ($fnn == 'ln') $fnn = 'log';
eval('$stack->push(' . $fnn . '($op1));'); // perfectly safe eval()
} elseif (array_key_exists($fnn, $this->f)) { // user function
// get args
$args = array();
for ($i = count($this->f[$fnn]['args'])-1; $i >= 0; $i--) {
if (is_null($args[$this->f[$fnn]['args'][$i]] = $stack->pop())) return $this->trigger("internal error");
}
$stack->push($this->pfx($this->f[$fnn]['func'], $args)); // yay... recursion!!!!
}
// if the token is a number or variable, push it on the stack
} else {
if (is_numeric($token)) {
$stack->push($token);
} elseif (array_key_exists($token, $this->v)) {
$stack->push($this->v[$token]);
} elseif (array_key_exists($token, $vars)) {
$stack->push($vars[$token]);
} else {
return $this->trigger("undefined variable '$token'");
}
}
}
// when we're out of tokens, the stack should have a single element, the final result
if ($stack->count != 1) return $this->trigger("internal error");
return $stack->pop();
}
// trigger an error, but nicely, if need be
function trigger($msg) {
$this->last_error = $msg;
if (!$this->suppress_errors) trigger_error($msg, E_USER_WARNING);
return false;
}
}
// for internal use
class EvalMathStack {
var $stack = array();
var $count = 0;
function push($val) {
$this->stack[$this->count] = $val;
$this->count++;
}
function pop() {
if ($this->count > 0) {
$this->count--;
return $this->stack[$this->count];
}
return null;
}
function last($n=1) {
return $this->stack[$this->count-$n];
}
}
The class file starts with a short tag [<?] - if short tags are disabled [which should be default behavior], whole file is considered plain text and thus not parsed.
Just replace it with a full tag - <?php.
Just try to enclose the script with <?php instead of <? it might be with the servers configuration issue not accepting it.
Leading off your confirmation that short tags are disabled, I am guessing you are debugging your script like this?
// dumps everything!
require_once('evalmath.php');
// dumps nothing??
require_once( 'evalmath.php' );
If your testing looks something like that, then the only reason the second form doesn't dump your file is because it doesn't actually include it a second time. require_once.

How to evaluate formula passed as string in PHP?

Just trying to figure out the proper and safer way to execute mathematical operation passed as string. In my scenario it is values fetched from image EXIF data.
After little research I found two way of doing it.
first, using eval:
function calculator1($str){
eval("\$str = $str;");
return $str;
}
second, using create_function:
function calculator2($str){
$fn = create_function("", "return ({$str});" );
return $fn();
};
Both examples require string cleanup to avoid malicious code execution. Is there any other or shorter way of doing so?
This might help.
http://www.phpclasses.org/browse/package/2695.html
Annoying login required to download. I copied an pasted it here for you.
This class can be used to safely evaluate mathematical expressions.
The class can take an expression in a text string and evaluate it by replacing values of variables and calculating the results of mathematical functions and operations.
It supports implicit multiplication, multivariable functions and nested functions.
It can be used to evaluate expressions from untrusted sources. It provides robust error checking and only evaluates a limited set of functions.
It could be used to generate graphs from expressions of formulae.
/*
================================================================================
EvalMath - PHP Class to safely evaluate math expressions
Copyright (C) 2005 Miles Kaufmann <http://www.twmagic.com/>
================================================================================
NAME
EvalMath - safely evaluate math expressions
SYNOPSIS
<?
include('evalmath.class.php');
$m = new EvalMath;
// basic evaluation:
$result = $m->evaluate('2+2');
// supports: order of operation; parentheses; negation; built-in functions
$result = $m->evaluate('-8(5/2)^2*(1-sqrt(4))-8');
// create your own variables
$m->evaluate('a = e^(ln(pi))');
// or functions
$m->evaluate('f(x,y) = x^2 + y^2 - 2x*y + 1');
// and then use them
$result = $m->evaluate('3*f(42,a)');
?>
DESCRIPTION
Use the EvalMath class when you want to evaluate mathematical expressions
from untrusted sources. You can define your own variables and functions,
which are stored in the object. Try it, it's fun!
METHODS
$m->evalute($expr)
Evaluates the expression and returns the result. If an error occurs,
prints a warning and returns false. If $expr is a function assignment,
returns true on success.
$m->e($expr)
A synonym for $m->evaluate().
$m->vars()
Returns an associative array of all user-defined variables and values.
$m->funcs()
Returns an array of all user-defined functions.
PARAMETERS
$m->suppress_errors
Set to true to turn off warnings when evaluating expressions
$m->last_error
If the last evaluation failed, contains a string describing the error.
(Useful when suppress_errors is on).
AUTHOR INFORMATION
Copyright 2005, Miles Kaufmann.
LICENSE
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1 Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
class EvalMath {
var $suppress_errors = false;
var $last_error = null;
var $v = array('e'=>2.71,'pi'=>3.14); // variables (and constants)
var $f = array(); // user-defined functions
var $vb = array('e', 'pi'); // constants
var $fb = array( // built-in functions
'sin','sinh','arcsin','asin','arcsinh','asinh',
'cos','cosh','arccos','acos','arccosh','acosh',
'tan','tanh','arctan','atan','arctanh','atanh',
'sqrt','abs','ln','log');
function EvalMath() {
// make the variables a little more accurate
$this->v['pi'] = pi();
$this->v['e'] = exp(1);
}
function e($expr) {
return $this->evaluate($expr);
}
function evaluate($expr) {
$this->last_error = null;
$expr = trim($expr);
if (substr($expr, -1, 1) == ';') $expr = substr($expr, 0, strlen($expr)-1); // strip semicolons at the end
//===============
// is it a variable assignment?
if (preg_match('/^\s*([a-z]\w*)\s*=\s*(.+)$/', $expr, $matches)) {
if (in_array($matches[1], $this->vb)) { // make sure we're not assigning to a constant
return $this->trigger("cannot assign to constant '$matches[1]'");
}
if (($tmp = $this->pfx($this->nfx($matches[2]))) === false) return false; // get the result and make sure it's good
$this->v[$matches[1]] = $tmp; // if so, stick it in the variable array
return $this->v[$matches[1]]; // and return the resulting value
//===============
// is it a function assignment?
} elseif (preg_match('/^\s*([a-z]\w*)\s*\(\s*([a-z]\w*(?:\s*,\s*[a-z]\w*)*)\s*\)\s*=\s*(.+)$/', $expr, $matches)) {
$fnn = $matches[1]; // get the function name
if (in_array($matches[1], $this->fb)) { // make sure it isn't built in
return $this->trigger("cannot redefine built-in function '$matches[1]()'");
}
$args = explode(",", preg_replace("/\s+/", "", $matches[2])); // get the arguments
if (($stack = $this->nfx($matches[3])) === false) return false; // see if it can be converted to postfix
for ($i = 0; $i<count($stack); $i++) { // freeze the state of the non-argument variables
$token = $stack[$i];
if (preg_match('/^[a-z]\w*$/', $token) and !in_array($token, $args)) {
if (array_key_exists($token, $this->v)) {
$stack[$i] = $this->v[$token];
} else {
return $this->trigger("undefined variable '$token' in function definition");
}
}
}
$this->f[$fnn] = array('args'=>$args, 'func'=>$stack);
return true;
//===============
} else {
return $this->pfx($this->nfx($expr)); // straight up evaluation, woo
}
}
function vars() {
$output = $this->v;
unset($output['pi']);
unset($output['e']);
return $output;
}
function funcs() {
$output = array();
foreach ($this->f as $fnn=>$dat)
$output[] = $fnn . '(' . implode(',', $dat['args']) . ')';
return $output;
}
//===================== HERE BE INTERNAL METHODS ====================\\
// Convert infix to postfix notation
function nfx($expr) {
$index = 0;
$stack = new EvalMathStack;
$output = array(); // postfix form of expression, to be passed to pfx()
$expr = trim(strtolower($expr));
$ops = array('+', '-', '*', '/', '^', '_');
$ops_r = array('+'=>0,'-'=>0,'*'=>0,'/'=>0,'^'=>1); // right-associative operator?
$ops_p = array('+'=>0,'-'=>0,'*'=>1,'/'=>1,'_'=>1,'^'=>2); // operator precedence
$expecting_op = false; // we use this in syntax-checking the expression
// and determining when a - is a negation
if (preg_match("/[^\w\s+*^\/()\.,-]/", $expr, $matches)) { // make sure the characters are all good
return $this->trigger("illegal character '{$matches[0]}'");
}
while(1) { // 1 Infinite Loop ;)
$op = substr($expr, $index, 1); // get the first character at the current index
// find out if we're currently at the beginning of a number/variable/function/parenthesis/operand
$ex = preg_match('/^([a-z]\w*\(?|\d+(?:\.\d*)?|\.\d+|\()/', substr($expr, $index), $match);
//===============
if ($op == '-' and !$expecting_op) { // is it a negation instead of a minus?
$stack->push('_'); // put a negation on the stack
$index++;
} elseif ($op == '_') { // we have to explicitly deny this, because it's legal on the stack
return $this->trigger("illegal character '_'"); // but not in the input expression
//===============
} elseif ((in_array($op, $ops) or $ex) and $expecting_op) { // are we putting an operator on the stack?
if ($ex) { // are we expecting an operator but have a number/variable/function/opening parethesis?
$op = '*'; $index--; // it's an implicit multiplication
}
// heart of the algorithm:
while($stack->count > 0 and ($o2 = $stack->last()) and in_array($o2, $ops) and ($ops_r[$op] ? $ops_p[$op] < $ops_p[$o2] : $ops_p[$op] <= $ops_p[$o2])) {
$output[] = $stack->pop(); // pop stuff off the stack into the output
}
// many thanks: http://en.wikipedia.org/wiki/Reverse_Polish_notation#The_algorithm_in_detail
$stack->push($op); // finally put OUR operator onto the stack
$index++;
$expecting_op = false;
//===============
} elseif ($op == ')' and $expecting_op) { // ready to close a parenthesis?
while (($o2 = $stack->pop()) != '(') { // pop off the stack back to the last (
if (is_null($o2)) return $this->trigger("unexpected ')'");
else $output[] = $o2;
}
if (preg_match("/^([a-z]\w*)\($/", $stack->last(2), $matches)) { // did we just close a function?
$fnn = $matches[1]; // get the function name
$arg_count = $stack->pop(); // see how many arguments there were (cleverly stored on the stack, thank you)
$output[] = $stack->pop(); // pop the function and push onto the output
if (in_array($fnn, $this->fb)) { // check the argument count
if($arg_count > 1)
return $this->trigger("too many arguments ($arg_count given, 1 expected)");
} elseif (array_key_exists($fnn, $this->f)) {
if ($arg_count != count($this->f[$fnn]['args']))
return $this->trigger("wrong number of arguments ($arg_count given, " . count($this->f[$fnn]['args']) . " expected)");
} else { // did we somehow push a non-function on the stack? this should never happen
return $this->trigger("internal error");
}
}
$index++;
//===============
} elseif ($op == ',' and $expecting_op) { // did we just finish a function argument?
while (($o2 = $stack->pop()) != '(') {
if (is_null($o2)) return $this->trigger("unexpected ','"); // oops, never had a (
else $output[] = $o2; // pop the argument expression stuff and push onto the output
}
// make sure there was a function
if (!preg_match("/^([a-z]\w*)\($/", $stack->last(2), $matches))
return $this->trigger("unexpected ','");
$stack->push($stack->pop()+1); // increment the argument count
$stack->push('('); // put the ( back on, we'll need to pop back to it again
$index++;
$expecting_op = false;
//===============
} elseif ($op == '(' and !$expecting_op) {
$stack->push('('); // that was easy
$index++;
$allow_neg = true;
//===============
} elseif ($ex and !$expecting_op) { // do we now have a function/variable/number?
$expecting_op = true;
$val = $match[1];
if (preg_match("/^([a-z]\w*)\($/", $val, $matches)) { // may be func, or variable w/ implicit multiplication against parentheses...
if (in_array($matches[1], $this->fb) or array_key_exists($matches[1], $this->f)) { // it's a func
$stack->push($val);
$stack->push(1);
$stack->push('(');
$expecting_op = false;
} else { // it's a var w/ implicit multiplication
$val = $matches[1];
$output[] = $val;
}
} else { // it's a plain old var or num
$output[] = $val;
}
$index += strlen($val);
//===============
} elseif ($op == ')') { // miscellaneous error checking
return $this->trigger("unexpected ')'");
} elseif (in_array($op, $ops) and !$expecting_op) {
return $this->trigger("unexpected operator '$op'");
} else { // I don't even want to know what you did to get here
return $this->trigger("an unexpected error occured");
}
if ($index == strlen($expr)) {
if (in_array($op, $ops)) { // did we end with an operator? bad.
return $this->trigger("operator '$op' lacks operand");
} else {
break;
}
}
while (substr($expr, $index, 1) == ' ') { // step the index past whitespace (pretty much turns whitespace
$index++; // into implicit multiplication if no operator is there)
}
}
while (!is_null($op = $stack->pop())) { // pop everything off the stack and push onto output
if ($op == '(') return $this->trigger("expecting ')'"); // if there are (s on the stack, ()s were unbalanced
$output[] = $op;
}
return $output;
}
// evaluate postfix notation
function pfx($tokens, $vars = array()) {
if ($tokens == false) return false;
$stack = new EvalMathStack;
foreach ($tokens as $token) { // nice and easy
// if the token is a binary operator, pop two values off the stack, do the operation, and push the result back on
if (in_array($token, array('+', '-', '*', '/', '^'))) {
if (is_null($op2 = $stack->pop())) return $this->trigger("internal error");
if (is_null($op1 = $stack->pop())) return $this->trigger("internal error");
switch ($token) {
case '+':
$stack->push($op1+$op2); break;
case '-':
$stack->push($op1-$op2); break;
case '*':
$stack->push($op1*$op2); break;
case '/':
if ($op2 == 0) return $this->trigger("division by zero");
$stack->push($op1/$op2); break;
case '^':
$stack->push(pow($op1, $op2)); break;
}
// if the token is a unary operator, pop one value off the stack, do the operation, and push it back on
} elseif ($token == "_") {
$stack->push(-1*$stack->pop());
// if the token is a function, pop arguments off the stack, hand them to the function, and push the result back on
} elseif (preg_match("/^([a-z]\w*)\($/", $token, $matches)) { // it's a function!
$fnn = $matches[1];
if (in_array($fnn, $this->fb)) { // built-in function:
if (is_null($op1 = $stack->pop())) return $this->trigger("internal error");
$fnn = preg_replace("/^arc/", "a", $fnn); // for the 'arc' trig synonyms
if ($fnn == 'ln') $fnn = 'log';
eval('$stack->push(' . $fnn . '($op1));'); // perfectly safe eval()
} elseif (array_key_exists($fnn, $this->f)) { // user function
// get args
$args = array();
for ($i = count($this->f[$fnn]['args'])-1; $i >= 0; $i--) {
if (is_null($args[$this->f[$fnn]['args'][$i]] = $stack->pop())) return $this->trigger("internal error");
}
$stack->push($this->pfx($this->f[$fnn]['func'], $args)); // yay... recursion!!!!
}
// if the token is a number or variable, push it on the stack
} else {
if (is_numeric($token)) {
$stack->push($token);
} elseif (array_key_exists($token, $this->v)) {
$stack->push($this->v[$token]);
} elseif (array_key_exists($token, $vars)) {
$stack->push($vars[$token]);
} else {
return $this->trigger("undefined variable '$token'");
}
}
}
// when we're out of tokens, the stack should have a single element, the final result
if ($stack->count != 1) return $this->trigger("internal error");
return $stack->pop();
}
// trigger an error, but nicely, if need be
function trigger($msg) {
$this->last_error = $msg;
if (!$this->suppress_errors) trigger_error($msg, E_USER_WARNING);
return false;
}
}
// for internal use
class EvalMathStack {
var $stack = array();
var $count = 0;
function push($val) {
$this->stack[$this->count] = $val;
$this->count++;
}
function pop() {
if ($this->count > 0) {
$this->count--;
return $this->stack[$this->count];
}
return null;
}
function last($n=1) {
return $this->stack[$this->count-$n];
}
}
EDIT: Jitters wanted the version that supports reverse polish notation. Reminds me of my college days when I had an HP calculator :)
<?php
/* This Class can be useful for writting RPN macros or FORTH like parsers
#Author: Arturo Gonzalez-Mata Santana (Spain)
arturogmata#gmail.com
#copyright 2007: www.phpsqlasp.com
It is part of a project to recover "macros" from some old aplications
This code is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 3
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
class RPNstack
{
var $data=array();
var $compare=0;
function pop() {return array_shift ($this->data);}
function push($x) {array_unshift($this->data, $x);}
function count() {return count($this->data);}
function first() {return $this->data[0];}
function top() {return end($this->data);} //last element of
function swap() { // interchange tow elements
$t = $this->data[1];
$this->data[1] = $this->data[0];
$this->data[0] = $t;
}
function dup() { // put a copy of X element in the stack
array_unshift($this->data, $this->data[0]);
}
function dump(){ // dump array data for debuging
print_r($this->data);
}
function parse($tok) // execute actions with the stack for each token
{
$r = null;
$tok = strtoupper(trim($tok));
//$this->dump(); // this line is for debugging purpose only
switch ($tok) :
// FIRST "IF THEN" AND OTHER FLOW CONTROLS
case ('THEN'): break;
case('IF'):
if ($this->pop() == 0) do { // if condition is false do nothing until "THEN"
$tok = strtoupper(strtok (" "));
} while ($tok <> "THEN"); // IF THERE IS NO "THEN" THIS SHALL BE AN ENLESS LOOP
break;
// basic math operators //OPERADORES MATEMATICOS BASICOS
case('+'):
$r = $this->pop() + $this->pop();
// $r = array_shift($this->data) + array_shift($this->data); // is more efficient but less understable
break;
case('-'):
$r = $this->pop(); $r = $this->pop()-$r;
break;
case('*'):
$r = $this->pop() * $this->pop();
break;
case('/'):
$r = $this->pop(); $r = $this->pop() / $r;
break;
// stack operators //OPERADORES DE PILA
case ('DUP'):
$r=$this->dup();
break;
case ('SWAP'):
$this->swap();
break;
// COMPARISON OPERATORS
case ('='):
if ($this->data[0] == $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
case ('<>'):
if ($this->data[0] <> $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
case ('<'):
if ($this->data[0] < $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
case ('>'):
if ($this->data[0] > $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
case ('>='):
if ($this->data[0] >= $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
case ('<='):
if ($this->data[0] <= $this->data[1]) $r = $this->push(1);
else $r = $this->push(0);
break;
// WARNING FOR NON IMPLEMENTED FUNCTIONS
default:
return sprintf('I don\'t know how to "%s" ', $tok);
endswitch;
if (!is_null($r)) $this->push($r);
return $r;
} // parse
function parse_line($cadena)
{
$tok = strtok ($cadena," ");
while ($tok!= '') {
if (is_numeric ($tok)) {
$this->push($tok);
} else {
$r = $this->parse($tok);
}
$tok = strtok (" ");
}
return $r;
}
} // class RPN
?>

Categories