I am trying to print an integer in JavaScript with commas as thousands separators. For example, I want to show the number 1234567 as "1,234,567". How would I go about doing this?
Here is how I am doing it:
function numberWithCommas(x) {
x = x.toString();
var pattern = /(-?\d+)(\d{3})/;
while (pattern.test(x))
x = x.replace(pattern, "$1,$2");
return x;
}
console.log(numberWithCommas(1000))
Is there a simpler or more elegant way to do it? It would be nice if it works with floats also, but that is not necessary. It does not need to be locale-specific to decide between periods and commas.
I used the idea from Kerry's answer, but simplified it since I was just looking for something simple for my specific purpose. Here is what I have:
function numberWithCommas(x) {
return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
}
function numberWithCommas(x) {
return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
}
function test(x, expect) {
const result = numberWithCommas(x);
const pass = result === expect;
console.log(`${pass ? "✓" : "ERROR ====>"} ${x} => ${result}`);
return pass;
}
let failures = 0;
failures += !test(0, "0");
failures += !test(100, "100");
failures += !test(1000, "1,000");
failures += !test(10000, "10,000");
failures += !test(100000, "100,000");
failures += !test(1000000, "1,000,000");
failures += !test(10000000, "10,000,000");
if (failures) {
console.log(`${failures} test(s) failed`);
} else {
console.log("All tests passed");
}
.as-console-wrapper {
max-height: 100% !important;
}
The regex uses 2 lookahead assertions:
a positive one to look for any point in the string that has a multiple of 3 digits in a row after it,
a negative assertion to make sure that point only has exactly a multiple of 3 digits. The replacement expression puts a comma there.
For example, if you pass it 123456789.01, the positive assertion will match every spot to the left of the 7 (since 789 is a multiple of 3 digits, 678 is a multiple of 3 digits, 567, etc.). The negative assertion checks that the multiple of 3 digits does not have any digits after it. 789 has a period after it so it is exactly a multiple of 3 digits, so a comma goes there. 678 is a multiple of 3 digits but it has a 9 after it, so those 3 digits are part of a group of 4, and a comma does not go there. Similarly for 567. 456789 is 6 digits, which is a multiple of 3, so a comma goes before that. 345678 is a multiple of 3, but it has a 9 after it, so no comma goes there. And so on. The \B keeps the regex from putting a comma at the beginning of the string.
#neu-rah mentioned that this function adds commas in undesirable places if there are more than 3 digits after the decimal point. If this is a problem, you can use this function:
function numberWithCommas(x) {
var parts = x.toString().split(".");
parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, ",");
return parts.join(".");
}
function numberWithCommas(x) {
var parts = x.toString().split(".");
parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, ",");
return parts.join(".");
}
function test(x, expect) {
const result = numberWithCommas(x);
const pass = result === expect;
console.log(`${pass ? "✓" : "ERROR ====>"} ${x} => ${result}`);
return pass;
}
let failures = 0;
failures += !test(0 , "0");
failures += !test(0.123456 , "0.123456");
failures += !test(100 , "100");
failures += !test(100.123456 , "100.123456");
failures += !test(1000 , "1,000");
failures += !test(1000.123456 , "1,000.123456");
failures += !test(10000 , "10,000");
failures += !test(10000.123456 , "10,000.123456");
failures += !test(100000 , "100,000");
failures += !test(100000.123456 , "100,000.123456");
failures += !test(1000000 , "1,000,000");
failures += !test(1000000.123456 , "1,000,000.123456");
failures += !test(10000000 , "10,000,000");
failures += !test(10000000.123456, "10,000,000.123456");
if (failures) {
console.log(`${failures} test(s) failed`);
} else {
console.log("All tests passed");
}
.as-console-wrapper {
max-height: 100% !important;
}
#t.j.crowder pointed out that now that JavaScript has lookbehind (support info), it can be solved in the regular expression itself:
function numberWithCommas(x) {
return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
}
function numberWithCommas(x) {
return x.toString().replace(/\B(?<!\.\d*)(?=(\d{3})+(?!\d))/g, ",");
}
function test(x, expect) {
const result = numberWithCommas(x);
const pass = result === expect;
console.log(`${pass ? "✓" : "ERROR ====>"} ${x} => ${result}`);
return pass;
}
let failures = 0;
failures += !test(0, "0");
failures += !test(0.123456, "0.123456");
failures += !test(100, "100");
failures += !test(100.123456, "100.123456");
failures += !test(1000, "1,000");
failures += !test(1000.123456, "1,000.123456");
failures += !test(10000, "10,000");
failures += !test(10000.123456, "10,000.123456");
failures += !test(100000, "100,000");
failures += !test(100000.123456, "100,000.123456");
failures += !test(1000000, "1,000,000");
failures += !test(1000000.123456, "1,000,000.123456");
failures += !test(10000000, "10,000,000");
failures += !test(10000000.123456, "10,000,000.123456");
if (failures) {
console.log(`${failures} test(s) failed`);
} else {
console.log("All tests passed");
}
.as-console-wrapper {
max-height: 100% !important;
}
(?<!\.\d*) is a negative lookbehind that says the match can't be preceded by a . followed by zero or more digits. The negative lookbehind is faster than the split and join solution (comparison), at least in V8.
I'm surprised nobody mentioned Number.prototype.toLocaleString.
It's implemented in JavaScript 1.5 (which was introduced in 1999) so it's basically supported across all major browsers.
var n = 34523453.345;
console.log(n.toLocaleString()); // "34,523,453.345"
It also works in Node.js as of v0.12 via inclusion of Intl
If you want something different, Numeral.js might be interesting.
Below are two different browser APIs that can transform Numbers into structured Strings. Keep in mind that not all users' machines have a locale that uses commas in numbers. To enforce commas to the output, any "western" locale may be used, such as en-US
let number = 1234567890; // Example number to be converted
⚠️ Mind that javascript has a maximum integer value of 9007199254740991
toLocaleString
// default behaviour on a machine with a local that uses commas for numbers
let number = 1234567890;
number.toLocaleString(); // "1,234,567,890"
// With custom settings, forcing a "US" locale to guarantee commas in output
let number2 = 1234.56789; // floating point example
number2.toLocaleString('en-US', {maximumFractionDigits:2}); // "1,234.57"
//You can also force a minimum of 2 trailing digits
let number3 = 1.5;
number3.toLocaleString('en-US', {minimumFractionDigits:2, maximumFractionDigits:2}); //"1.50"
NumberFormat
let number = 1234567890;
let nf = new Intl.NumberFormat('en-US');
nf.format(number); // "1,234,567,890"
From what I checked (Firefox at least) they are both more or less same regarding performance.
⚡ Live demo: https://codepen.io/vsync/pen/MWjdbgL?editors=1000
I suggest using phpjs.org 's number_format()
function number_format(number, decimals, dec_point, thousands_sep) {
var n = !isFinite(+number) ? 0 : +number,
prec = !isFinite(+decimals) ? 0 : Math.abs(decimals),
sep = (typeof thousands_sep === 'undefined') ? ',' : thousands_sep,
dec = (typeof dec_point === 'undefined') ? '.' : dec_point,
toFixedFix = function (n, prec) {
// Fix for IE parseFloat(0.55).toFixed(0) = 0;
var k = Math.pow(10, prec);
return Math.round(n * k) / k;
},
s = (prec ? toFixedFix(n, prec) : Math.round(n)).toString().split('.');
if (s[0].length > 3) {
s[0] = s[0].replace(/\B(?=(?:\d{3})+(?!\d))/g, sep);
}
if ((s[1] || '').length < prec) {
s[1] = s[1] || '';
s[1] += new Array(prec - s[1].length + 1).join('0');
}
return s.join(dec);
}
UPDATE 02/13/14
People have been reporting this doesn't work as expected, so I did a JS Fiddle that includes automated tests.
Update 26/11/2017
Here's that fiddle as a Stack Snippet with slightly modified output:
function number_format(number, decimals, dec_point, thousands_sep) {
var n = !isFinite(+number) ? 0 : +number,
prec = !isFinite(+decimals) ? 0 : Math.abs(decimals),
sep = (typeof thousands_sep === 'undefined') ? ',' : thousands_sep,
dec = (typeof dec_point === 'undefined') ? '.' : dec_point,
toFixedFix = function (n, prec) {
// Fix for IE parseFloat(0.55).toFixed(0) = 0;
var k = Math.pow(10, prec);
return Math.round(n * k) / k;
},
s = (prec ? toFixedFix(n, prec) : Math.round(n)).toString().split('.');
if (s[0].length > 3) {
s[0] = s[0].replace(/\B(?=(?:\d{3})+(?!\d))/g, sep);
}
if ((s[1] || '').length < prec) {
s[1] = s[1] || '';
s[1] += new Array(prec - s[1].length + 1).join('0');
}
return s.join(dec);
}
var exampleNumber = 1;
function test(expected, number, decimals, dec_point, thousands_sep)
{
var actual = number_format(number, decimals, dec_point, thousands_sep);
console.log(
'Test case ' + exampleNumber + ': ' +
'(decimals: ' + (typeof decimals === 'undefined' ? '(default)' : decimals) +
', dec_point: "' + (typeof dec_point === 'undefined' ? '(default)' : dec_point) + '"' +
', thousands_sep: "' + (typeof thousands_sep === 'undefined' ? '(default)' : thousands_sep) + '")'
);
console.log(' => ' + (actual === expected ? 'Passed' : 'FAILED') + ', got "' + actual + '", expected "' + expected + '".');
exampleNumber++;
}
test('1,235', 1234.56);
test('1 234,56', 1234.56, 2, ',', ' ');
test('1234.57', 1234.5678, 2, '.', '');
test('67,00', 67, 2, ',', '.');
test('1,000', 1000);
test('67.31', 67.311, 2);
test('1,000.6', 1000.55, 1);
test('67.000,00000', 67000, 5, ',', '.');
test('1', 0.9, 0);
test('1.20', '1.20', 2);
test('1.2000', '1.20', 4);
test('1.200', '1.2000', 3);
.as-console-wrapper {
max-height: 100% !important;
}
This is a variation of #mikez302's answer, but modified to support numbers with decimals (per #neu-rah's feedback that numberWithCommas(12345.6789) -> "12,345.6,789" instead of "12,345.6789"
function numberWithCommas(n) {
var parts=n.toString().split(".");
return parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, ",") + (parts[1] ? "." + parts[1] : "");
}
function formatNumber (num) {
return num.toString().replace(/(\d)(?=(\d{3})+(?!\d))/g, "$1,")
}
print(formatNumber(2665)); // 2,665
print(formatNumber(102665)); // 102,665
print(formatNumber(111102665)); // 111,102,665
Using Regular expression
function toCommas(value) {
return value.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
}
console.log(toCommas(123456789)); // 123,456,789
console.log(toCommas(1234567890)); // 1,234,567,890
console.log(toCommas(1234)); // 1,234
Using toLocaleString()
var number = 123456.789;
// request a currency format
console.log(number.toLocaleString('de-DE', { style: 'currency', currency: 'EUR' }));
// → 123.456,79 €
// the Japanese yen doesn't use a minor unit
console.log(number.toLocaleString('ja-JP', { style: 'currency', currency: 'JPY' }))
// → ¥123,457
// limit to three significant digits
console.log(number.toLocaleString('en-IN', { maximumSignificantDigits: 3 }));
// → 1,23,000
ref MDN:Number.prototype.toLocaleString()
Using Intl.NumberFormat()
var number = 123456.789;
console.log(new Intl.NumberFormat('de-DE', { style: 'currency', currency: 'EUR' }).format(number));
// expected output: "123.456,79 €"
// the Japanese yen doesn't use a minor unit
console.log(new Intl.NumberFormat('ja-JP', { style: 'currency', currency: 'JPY' }).format(number));
// expected output: "¥123,457"
// limit to three significant digits
console.log(new Intl.NumberFormat('en-IN', { maximumSignificantDigits: 3 }).format(number));
// expected output: "1,23,000"
ref Intl.NumberFormat
DEMO AT HERE
<script type="text/javascript">
// Using Regular expression
function toCommas(value) {
return value.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
}
function commas() {
var num1 = document.myform.number1.value;
// Using Regular expression
document.getElementById('result1').value = toCommas(parseInt(num1));
// Using toLocaleString()
document.getElementById('result2').value = parseInt(num1).toLocaleString('ja-JP', {
style: 'currency',
currency: 'JPY'
});
// Using Intl.NumberFormat()
document.getElementById('result3').value = new Intl.NumberFormat('ja-JP', {
style: 'currency',
currency: 'JPY'
}).format(num1);
}
</script>
<FORM NAME="myform">
<INPUT TYPE="text" NAME="number1" VALUE="123456789">
<br>
<INPUT TYPE="button" NAME="button" Value="=>" onClick="commas()">
<br>Using Regular expression
<br>
<INPUT TYPE="text" ID="result1" NAME="result1" VALUE="">
<br>Using toLocaleString()
<br>
<INPUT TYPE="text" ID="result2" NAME="result2" VALUE="">
<br>Using Intl.NumberFormat()
<br>
<INPUT TYPE="text" ID="result3" NAME="result3" VALUE="">
</FORM>
Performance
http://jsben.ch/sifRd
Intl.NumberFormat
Native JS function. Supported by IE11, Edge, latest Safari, Chrome, Firefox, Opera, Safari on iOS and Chrome on Android.
var number = 3500;
console.log(new Intl.NumberFormat().format(number));
// → '3,500' if in US English locale
I am quite impressed by the number of answers this question has got. I like the answer by uKolka:
n.toLocaleString()
But unfortunately, in some locales like Spanish, it does not work (IMHO) as expected for numbers below 10,000:
Number(1000).toLocaleString('ES-es')
Gives 1000 and not 1.000.
See toLocaleString not working on numbers less than 10000 in all browsers to know why.
So I had to use the answer by Elias Zamaria choosing the right thousands separator character:
n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, Number(10000).toLocaleString().substring(2, 3))
This one works well as a one-liner for both locales that use , or . as the thousands separator and starts working from 1,000 in all cases.
Number(1000).toString().replace(/\B(?=(\d{3})+(?!\d))/g, Number(10000).toLocaleString().substring(2, 3))
Gives 1.000 with a Spanish locale context.
Should you want to have absolute control over the way a number is formatted, you may also try the following:
let number = 1234.567
let decimals = 2
let decpoint = '.' // Or Number(0.1).toLocaleString().substring(1, 2)
let thousand = ',' // Or Number(10000).toLocaleString().substring(2, 3)
let n = Math.abs(number).toFixed(decimals).split('.')
n[0] = n[0].split('').reverse().map((c, i, a) =>
i > 0 && i < a.length && i % 3 == 0 ? c + thousand : c
).reverse().join('')
let final = (Math.sign(number) < 0 ? '-' : '') + n.join(decpoint)
console.log(final)
Gives 1,234.57.
This one does not need a regular expression. It works by adjusting the number to the desired amount of decimals with toFixed first, then dividing it around the decimal point . if there is one. The left side is then turned into an array of digits which is reversed. Then a thousands separator is added every three digits from the start and the result reversed again. The final result is the union of the two parts. The sign of the input number is removed with Math.abs first and then put back if necessary.
It is not a one-liner but not much longer and easily turned into a function. Variables have been added for clarity, but those may be substituted by their desired values if known in advance. You may use the expressions that use toLocaleString as a way to find out the right characters for the decimal point and the thousands separator for the current locale (bear in mind that those require a more modern Javascript.)
Thanks to everyone for their replies. I have built off of some of the answers to make a more "one-size-fits-all" solution.
The first snippet adds a function that mimics PHP's number_format() to the Number prototype. If I am formatting a number, I usually want decimal places so the function takes in the number of decimal places to show. Some countries use commas as the decimal and decimals as the thousands separator so the function allows these separators to be set.
Number.prototype.numberFormat = function(decimals, dec_point, thousands_sep) {
dec_point = typeof dec_point !== 'undefined' ? dec_point : '.';
thousands_sep = typeof thousands_sep !== 'undefined' ? thousands_sep : ',';
var parts = this.toFixed(decimals).split('.');
parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, thousands_sep);
return parts.join(dec_point);
}
You would use this as follows:
var foo = 5000;
console.log(foo.numberFormat(2)); // us format: 5,000.00
console.log(foo.numberFormat(2, ',', '.')); // european format: 5.000,00
I found that I often needed to get the number back for math operations, but parseFloat converts 5,000 to 5, simply taking the first sequence of integer values. So I created my own float conversion function and added it to the String prototype.
String.prototype.getFloat = function(dec_point, thousands_sep) {
dec_point = typeof dec_point !== 'undefined' ? dec_point : '.';
thousands_sep = typeof thousands_sep !== 'undefined' ? thousands_sep : ',';
var parts = this.split(dec_point);
var re = new RegExp("[" + thousands_sep + "]");
parts[0] = parts[0].replace(re, '');
return parseFloat(parts.join(dec_point));
}
Now you can use both functions as follows:
var foo = 5000;
var fooString = foo.numberFormat(2); // The string 5,000.00
var fooFloat = fooString.getFloat(); // The number 5000;
console.log((fooString.getFloat() + 1).numberFormat(2)); // The string 5,001.00
I think this is the shortest regular expression that does it:
/\B(?=(\d{3})+\b)/g
"123456".replace(/\B(?=(\d{3})+\b)/g, ",")
I checked it on a few numbers and it worked.
Number.prototype.toLocaleString() would have been awesome if it was provided natively by all browsers (Safari).
I checked all other answers but noone seemed to polyfill it. Here is a poc towards that, which is actually a combination of first two answers; if toLocaleString works it uses it, if it doesn't it uses a custom function.
var putThousandsSeparators;
putThousandsSeparators = function(value, sep) {
if (sep == null) {
sep = ',';
}
// check if it needs formatting
if (value.toString() === value.toLocaleString()) {
// split decimals
var parts = value.toString().split('.')
// format whole numbers
parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, sep);
// put them back together
value = parts[1] ? parts.join('.') : parts[0];
} else {
value = value.toLocaleString();
}
return value;
};
alert(putThousandsSeparators(1234567.890));
The thousands separator can be inserted in an international-friendly manner using the browser's Intl object:
Intl.NumberFormat().format(1234);
// returns "1,234" if the user's locale is en_US, for example
See MDN's article on NumberFormat for more, you can specify locale behavior or default to the user's. This is a little more foolproof because it respects local differences; many countries use periods to separate digits while a comma denotes the decimals.
Intl.NumberFormat isn't available in all browsers yet, but it works in latest Chrome, Opera, & IE. Firefox's next release should support it. Webkit doesn't seem to have a timeline for implementation.
You can either use this procedure to format your currency needing.
var nf = new Intl.NumberFormat('en-US', {
style: 'currency',
currency: 'USD',
minimumFractionDigits: 2,
maximumFractionDigits: 2
});
nf.format(123456.789); // ‘$123,456.79’
For more info you can access this link.
https://www.justinmccandless.com/post/formatting-currency-in-javascript/
if you are dealing with currency values and formatting a lot then it might be worth to add tiny accounting.js which handles lot of edge cases and localization:
// Default usage:
accounting.formatMoney(12345678); // $12,345,678.00
// European formatting (custom symbol and separators), could also use options object as second param:
accounting.formatMoney(4999.99, "€", 2, ".", ","); // €4.999,99
// Negative values are formatted nicely, too:
accounting.formatMoney(-500000, "£ ", 0); // £ -500,000
// Simple `format` string allows control of symbol position [%v = value, %s = symbol]:
accounting.formatMoney(5318008, { symbol: "GBP", format: "%v %s" }); // 5,318,008.00 GBP
The following code uses char scan, so there's no regex.
function commafy( num){
var parts = (''+(num<0?-num:num)).split("."), s=parts[0], L, i=L= s.length, o='';
while(i--){ o = (i===0?'':((L-i)%3?'':','))
+s.charAt(i) +o }
return (num<0?'-':'') + o + (parts[1] ? '.' + parts[1] : '');
}
It shows promising performance: http://jsperf.com/number-formatting-with-commas/5
2015.4.26: Minor fix to resolve issue when num<0. See https://jsfiddle.net/runsun/p5tqqvs3/
Here's a simple function that inserts commas for thousand separators. It uses array functions rather than a RegEx.
/**
* Format a number as a string with commas separating the thousands.
* #param num - The number to be formatted (e.g. 10000)
* #return A string representing the formatted number (e.g. "10,000")
*/
var formatNumber = function(num) {
var array = num.toString().split('');
var index = -3;
while (array.length + index > 0) {
array.splice(index, 0, ',');
// Decrement by 4 since we just added another unit to the array.
index -= 4;
}
return array.join('');
};
CodeSandbox link with examples: https://codesandbox.io/s/p38k63w0vq
Use This code to handle currency format for india. Country code can be changed to handle other country currency.
let amount =350256.95
var formatter = new Intl.NumberFormat('en-IN', {
minimumFractionDigits: 2,
});
// Use it.
formatter.format(amount);
output:
3,50,256.95
You can also use the Intl.NumberFormat constructor. Here is how you can do it.
resultNumber = new Intl.NumberFormat('en-IN', { maximumSignificantDigits: 3 }).format(yourNumber);
Universal, fast, accurate, simple function
Using RegEx (Fast & Accurate)
Support Numbers(Float/Integer)/String/Multiple numbers in a string
Smart well (Not grouping decimals - Compatible with different types of grouping)
Support all browsers specially 'Safari' & 'IE' & many older browsers
[Optional] Respecting non-English (Persian/Arabic) digits (+ Pre-fix)
TL;DR - Full version function (minified):
// num: Number/s (String/Number),
// sep: Thousands separator (String) - Default: ','
// dec: Decimal separator (String) - Default: '.' (Just one char)
// u: Universal support for languages characters (String - RegEx character set / class) - Example: '[\\d\\u0660-\\u0669\\u06f0-\\u06f9]' (English/Persian/Arabic), Default: '\\d' (English)
function formatNums(num,sep,dec,u){sep=sep||',';u=u||'\\d';if(typeof num!='string'){num=String(num);if(dec&&dec!='.')num=num.replace('.',dec);}return num.replace(RegExp('\\'+(dec||'.')+u+'+|'+u+'(?=(?:'+u+'{3})+(?!'+u+'))','g'),function(a){return a.length==1?a+sep:a})}
text='100000000 English or Persian/Arabic ۱۲۳۴۵۶۷۸۹/٠١٢٣٤٥٦٧٨٩ this is 123123123.123123123 with this -123123 and these 10 100 1000 123123/123123 (2000000) .33333 100.00 or any like 500000Kg';
console.log(formatNums(10000000.0012));
console.log(formatNums(10000000.0012,'.',',')); // German
console.log(formatNums(text,',','.','[\\d\\u0660-\\u0669\\u06f0-\\u06f9]')); // Respect Persian/Arabic digits
<input oninput="document.getElementById('result').textContent=formatNums(this.value)" placeholder="Type a number here">
<div id="result"></div>
Why NOT satisfied with other answers?
Number.prototype.toLocaleString() / Intl.NumberFormat
(Right answer)
If no well arguments, we can't expect same result. Also with arguments options we still can't be sure what can be the result because it will use local settings and possible client modifications effect on it or the browser/device not support it.
>~2016 browsers support and still in 2021 some reports that in some cases like Safari or IE/Edge do not return as expected.
toLocaleString() Work with numbers, Intl.NumberFormat Work with String/Numbers; Strings will be/have to be parsed and also rounded if necessary, so:
If we already have a localized string with non-English digits we have to replace numbers with the English one, then parse it, then use it again with the local options. (If it return what we expect)
Generally while parsing we cant expect not missing decimal zeros or details in big numbers or respecting other languages numeral characters
Decimal / Thousand separator characters can not be customized more than language options, except with post-fixings with replace() + RegEx again. (For example in Persian usually we don't use the suggested Arabic comma and also sometime we use ∕ Fraction/Division slash as decimal separator)
Slow performance in loops
Not so good RegEx ways (Fastest & One-liner ways)
/\B(?=(\d{3})+\b)/ it will group decimals too. // 123,123.123,123 !!!
/(?<!\.\d+)\B(?=(\d{3})+\b)/ used look-behind that not supported well yet. Please check:
https://caniuse.com/js-regexp-lookbehind
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#browser_compatibility
Note: Generally lookbehind can be against of original RegEx structure (because of how the analyzer should work like do not buffer the raw behind as a parser) and actually it can make the performance seriously low (In this case ~30%). I think it pushed inside over the time by requests.
/\B(?=(?=\d*\.)(\d{3})+(?!\d))/ just work with float numbers and ignore integers.
.replace(/(?:[^.\d]|^)\d+/g,function(a){return a.replace(/\B(?=(?:\d{3})+\b)/g,',');}) (My old idea) Using 2 RegEx. First one find the integer parts, second one put separator. Why 2 functions, when it can be mixed?
/(\..*)$|(\d)(?=(\d{3})+(?!\d))/g (Good idea by #djulien - i voted up) but when the RegEx is global, (\..*)$ it can make a mistake even with a space in end.
Also using capturing groups (Example: (\d)) will make the performance low so if it possible, use non-capturing groups (Example: (?:\d)) or if an statement already exist in our function let's mix it.
In this case, not using capturing groups improve performance about ~20% and in case of /\B(?=(\d{3})+\b)/g vs /\B(?=(?:\d{3})+\b)/g, the second one is ~8% faster.
About regex performances:
Note: Sure different methods, browsers, hardware, system status, cases and even changes on ECMAScript will effect on result of checking performance. But some changes logically should effect result and i used this one just as visual example.
Using library's like Numeral.js so much not necessary functions for a simple task.
Heavy code / Not accurate functions that used .split('.') or .toFixed() or Math.floor() ...
Final result:
There is no best of all and it should be chosen based on the need. My priority of sorting;
Compatibility
Capability
Universality
Ease of use
Performance
toLocaleString() (Compatibility - Universality) [Native function]
If you have to change digits and grouping from English to another language
If you are not sure about your client language
If you don't need to have exact expected result
If you don't care about older version of Safari
// 1000000.2301
parseFloat(num) // (Pre-fix) If the input is string
.toLocaleString('en-US', {
useGrouping: true // (Default is true, here is just for show)
});
// 1,000,000.23
Read more: https://www.w3schools.com/jsref/jsref_tolocalestring_number.asp
Intl.NumberFormat() (Capability - Universality - Compatibility) [Native function]
Almost same as toLocaleString() +
Great capability of supporting currency, units, etc... any language (Modern browsers)
// 1000000.2301
new Intl.NumberFormat('en-US', { // It can be 'fa-IR' : Farsi - Iran
numberingSystem: 'arab'
}).format(num)
// ١٬٠٠٠٬٠٠٠٫٢٣
Read more: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat/NumberFormat
With these much options of the native functions, we still can not expect:
Exact result (+ Not parsing the input / Not rounding / Not converting big numbers)
Accepting other languages digits as input
Customizing separators
Trusting browsers support
Performance
So you maybe need a function like any of these:
formatNums() (Compatibility - Ease of use)
Full version (Capability) (Not faster than toLocaleString) - Explain:
function formatNums(num, sep, dec, u) {
// Setting defaults
sep = sep || ','; // Seperator
u = u || '\\d'; // Universal character set \d: 0-9 (English)
// Mixing of Handeling numbers when the decimal character should be changed + Being sure the input is string
if (typeof num != 'string') {
num = String(num);
if (dec && dec != '.') num = num.replace('.', dec); // Replacing sure decimal character with the custom
}
//
return num.replace(RegExp('\\' + (dec || '.') + u + '+|' + u + '(?=(?:' + u + '{3})+(?!' + u + '))', 'g'),
// The RegEx will be like /\.\d+|\d(?=(?:\d{3})+(?!\d))/g if not be customized
// RegEx explain:
// 1) \.\d+ : First try to get any part that started with a dot and followed by any much of English digits, one or more (For ignoring it later)
// 2) | : Or
// 3) \d : Get any 1 char digit
// 3.1) (?=...) : That the next of that should be
// 3.2) (?:\d{3}) : 3 length digits
// 3.2.1) + : One or more of the group
// 3.3) (?!\d) : ...till any place that there is no digits
function(a) { // Any match can be the decimal part or the integer part so lets check it
return a.length == 1 ? a + sep : a // If the match is one character, it is from the grouping part as item (3) in Regex explain so add the seperator next of it, if not, ignore it and return it back.
})
}
function formatNums(num,sep,dec,u) {
sep=sep||',';
u=u||'\\d';
if(typeof num!='string') {
num=String(num);
if( dec && dec!='.') num=num.replace('.',dec);
}
return num.replace(RegExp('\\'+(dec||'.')+u+'+|'+u+'(?=(?:'+u+'{3})+(?!'+u+'))','g'),function(a) {return a.length==1 ? a+sep : a})
}
console.log(formatNums(1000000.2301));
console.log(formatNums(100.2301));
console.log(formatNums(-2000.2301));
console.log(formatNums(123123123,' , '));
console.log(formatNums('0000.0000'));
console.log(formatNums('5000000.00'));
console.log(formatNums('5000000,00',' ',','));
console.log(formatNums(5000000.1234,' ',','));
console.log(formatNums('۱۲۳۴۵۶۷۸۹/۹۰۰۰',',','/','[\\d\\u0660-\\u0669\\u06f0-\\u06f9]'));
Play with the examples here:
https://jsfiddle.net/PAPIONbit/198xL3te/
Light version (Performance) (~30% faster than toLocaleString)
function formatNums(num,sep) {
sep=sep||',';
return String(num).replace(/\.\d+|\d(?=(?:\d{3})+(?!\d))/g,
function(a) {
return a.length==1?a+sep:a
}
);
}
console.log(formatNums(1000000.2301));
console.log(formatNums(100.2301));
console.log(formatNums(-2000.2301));
console.log(formatNums(123123123,' '));
Check the RegEx (Without the necessary function) : https://regexr.com/66ott
(num+'').replace(/\B(?=(?:\d{3})+\b)/g,','); (Performance - Compatibility)
Best choose if The input is Specified / Predefined. (Like usual prices that sure will not have more than 3 decimals)
(~65% faster than toLocaleString)
num=1000000;
str='123123.100';
console.log((num+'').replace(/\B(?=(?:\d{3})+\b)/g,','));
console.log(str.replace(/\B(?=(?:\d{3})+\b)/g,','));
+
For Persian/Arabic local clients:
If your client going to use Persian/Arabic numbers for input as what is usual in Iran, I think the best way is instead of keeping the original characters, convert them to English before you deal with, to you can calculate it.
// ۱۲۳۴۵۶۷۸۹۰
function toEnNum(n) { // Replacing Persian/Arabic numbers character with English
n.replace(/[\u0660-\u0669\u06f0-\u06f9]/g, // RegEx unicode range Persian/Arabic numbers char
function(c) {
return c.charCodeAt(0) & 0xf; // Replace the char with real number by getting the binary index and breaking to lowest using 15
}
);
}
// 1234567890
And for still showing them as original looking there is 2 ways:
CSS Using Persian/Arabic fonts with local digits (My choose)
Convert the result back with Intl.NumberFormat or a function like: https://stackoverflow.com/a/13787021/7514010
My Old-school function on this post: (~15% Faster than toLocalString)
// 10000000.0012
function formatNums(n, s) {
return s = s || ",", String(n).
replace(/(?:^|[^.\d])\d+/g, // First this RegEx take just integer parts
function(n) {
return n.replace(/\B(?=(?:\d{3})+\b)/g, s);
})
}
// 10,000,000.0012
var formatNumber = function (number) {
var splitNum;
number = Math.abs(number);
number = number.toFixed(2);
splitNum = number.split('.');
splitNum[0] = splitNum[0].replace(/\B(?=(\d{3})+(?!\d))/g, ",");
return splitNum.join(".");
}
EDIT:
The function only work with positive number. for exmaple:
var number = -123123231232;
formatNumber(number)
Output: "123,123,231,232"
But to answer the question above toLocaleString() method just solves the problem.
var number = 123123231232;
number.toLocaleString()
Output: "123,123,231,232"
Cheer!
My answer is the only answer that completely replaces jQuery with a much more sensible alternative:
function $(dollarAmount)
{
const locale = 'en-US';
const options = { style: 'currency', currency: 'USD' };
return Intl.NumberFormat(locale, options).format(dollarAmount);
}
This solution not only adds commas, but it also rounds to the nearest penny in the event that you input an amount like $(1000.9999) you'll get $1,001.00. Additionally, the value you input can safely be a number or a string; it doesn't matter.
If you're dealing with money, but don't want a leading dollar sign shown on the amount, you can also add this function, which uses the previous function but removes the $:
function no$(dollarAmount)
{
return $(dollarAmount).replace('$','');
}
If you're not dealing with money, and have varying decimal formatting requirements, here's a more versatile function:
function addCommas(number, minDecimalPlaces = 0, maxDecimalPlaces = Math.max(3,minDecimalPlaces))
{
const options = {};
options.maximumFractionDigits = maxDecimalPlaces;
options.minimumFractionDigits = minDecimalPlaces;
return Intl.NumberFormat('en-US',options).format(number);
}
Oh, and by the way, the fact that this code does not work in some old version of Internet Explorer is completely intentional. I try to break IE anytime that I can catch it not supporting modern standards.
Please remember that excessive praise, in the comment section, is considered off-topic. Instead, just shower me with up-votes.
I Wrote this one before stumbling on this post. No regex and you can actually understand the code.
$(function(){
function insertCommas(s) {
// get stuff before the dot
var d = s.indexOf('.');
var s2 = d === -1 ? s : s.slice(0, d);
// insert commas every 3 digits from the right
for (var i = s2.length - 3; i > 0; i -= 3)
s2 = s2.slice(0, i) + ',' + s2.slice(i);
// append fractional part
if (d !== -1)
s2 += s.slice(d);
return s2;
}
$('#theDudeAbides').text( insertCommas('1234567.89012' ) );
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
<div id="theDudeAbides"></div>
For anyone who likes 1-liners and a single regex, but doesn't want to use split(), here is an enhanced version of the regex from other answers that handles (ignores) decimal places:
var formatted = (x+'').replace(/(\..*)$|(\d)(?=(\d{3})+(?!\d))/g, (digit, fract) => fract || digit + ',');
The regex first matches a substring starting with a literal "." and replaces it with itself ("fract"), and then matches any digit followed by multiples of 3 digits and puts "," after it.
For example, x = 12345678.12345678 will give formatted = '12,345,678.12345678'.
Let me try to improve uKolka's answer and maybe help others save some time.
Use Numeral.js.
document.body.textContent = numeral(1234567).format('0,0');
<script src="//cdnjs.cloudflare.com/ajax/libs/numeral.js/1.4.5/numeral.min.js"></script>
You should go with Number.prototype.toLocaleString() only if its browser compatibilty is not an issue.
Just for future Googlers (or not necessarily 'Googlers'):
All of solutions mentioned above are wonderful, however, RegExp might be awfully bad thing to use in a situation like that.
So, yes, you might use some of the options proposed or even write something primitive yet useful like:
const strToNum = str => {
//Find 1-3 digits followed by exactly 3 digits & a comma or end of string
let regx = /(\d{1,3})(\d{3}(?:,|$))/;
let currStr;
do {
currStr = (currStr || str.split(`.`)[0])
.replace( regx, `$1,$2`)
} while (currStr.match(regx)) //Stop when there's no match & null's returned
return ( str.split(`.`)[1] ) ?
currStr.concat(`.`, str.split(`.`)[1]) :
currStr;
};
strToNum(`123`) // => 123
strToNum(`123456`) // => 123,456
strToNum(`-1234567.0987`) // => -1,234,567.0987
The regexp that's used here is fairly simple and the loop will go precisely the number of times it takes to get the job done.
And you might optimize it far better, "DRYify" code & so on.
Yet,
(-1234567.0987).toLocaleString();
(in most situations) would be a far better choice.
The point is not in the speed of execution or in cross-browser compatibility.
In situations when you'd like to show the resulting number to user, .toLocaleString() method gives you superpower to speak the same language with the user of your website or app (whatever her/his language is).
This method according to ECMAScript documentation was introduced in 1999, and I believe that the reason for that was the hope that the Internet at some point will connect people all around the world, so, some "internalization" tools were needed.
Today the Internet does connect all of us, so, it is important to remember that the world is a way more complex that we might imagine & that (/almost) all of us are here, in the Internet.
Obviously, considering the diversity of people, it is impossible to guarantee perfect UX for everybody because we speak different languages, value different things, etc. And exactly because of this, it is even more important to try to localize things as much as it's possible.
So, considering that there're some particular standards for representation of date, time, numbers, etc. & that we have a tool to display those things in the format preferred by the final user, isn't that rare and almost irresponsible not to use that tool (especially in situations when we want to display this data to the user)?
For me, using RegExp instead of .toLocaleString() in situation like that sounds a little bit like creating a clock app with JavaScript & hard-coding it in such a way so it'll display Prague time only (which would be quite useless for people who don't live in Prague) even though the default behaviour of
new Date();
is to return the data according to final user's clock.
An alternative way, supporting decimals, different separators and negatives.
var number_format = function(number, decimal_pos, decimal_sep, thousand_sep) {
var ts = ( thousand_sep == null ? ',' : thousand_sep )
, ds = ( decimal_sep == null ? '.' : decimal_sep )
, dp = ( decimal_pos == null ? 2 : decimal_pos )
, n = Math.floor(Math.abs(number)).toString()
, i = n.length % 3
, f = ((number < 0) ? '-' : '') + n.substr(0, i)
;
for(;i<n.length;i+=3) {
if(i!=0) f+=ts;
f+=n.substr(i,3);
}
if(dp > 0)
f += ds + parseFloat(number).toFixed(dp).split('.')[1]
return f;
}
Some corrections by #Jignesh Sanghani, don't forget to upvote his comment.
I think this function will take care of all the issues related to this problem.
function commaFormat(inputString) {
inputString = inputString.toString();
var decimalPart = "";
if (inputString.indexOf('.') != -1) {
//alert("decimal number");
inputString = inputString.split(".");
decimalPart = "." + inputString[1];
inputString = inputString[0];
//alert(inputString);
//alert(decimalPart);
}
var outputString = "";
var count = 0;
for (var i = inputString.length - 1; i >= 0 && inputString.charAt(i) != '-'; i--) {
//alert("inside for" + inputString.charAt(i) + "and count=" + count + " and outputString=" + outputString);
if (count == 3) {
outputString += ",";
count = 0;
}
outputString += inputString.charAt(i);
count++;
}
if (inputString.charAt(0) == '-') {
outputString += "-";
}
//alert(outputString);
//alert(outputString.split("").reverse().join(""));
return outputString.split("").reverse().join("") + decimalPart;
}
If you're looking for a short and sweet solution:
const number = 12345678.99;
const numberString = String(number).replace(
/^\d+/,
number => [...number].map(
(digit, index, digits) => (
!index || (digits.length - index) % 3 ? '' : ','
) + digit
).join('')
);
// numberString: 12,345,678.99
I think your solution is one of the shorter ones I've seen for this. I don't think there are any standard JavaScript functions to do this sort of thing, so you're probably on your own.
I checked the CSS 3 specifications to see whether it's possible to do this in CSS, but unless you want every digit in its own <span>, I don't think that's possible.
I did find one project on Google Code that looked promising: flexible-js-formatting. I haven't used it, but it looks pretty flexible and has unit tests using JsUnit. The developer also has a lot of posts (though old) about this topic.
Be sure to consider international users: lots of nations use a space as the separator and use the comma for separating the decimal from the integral part of the number.
Closed. This question needs to be more focused. It is not currently accepting answers.
Closed 1 year ago.
Locked. This question and its answers are locked because the question is off-topic but has historical significance. It is not currently accepting new answers or interactions.
I want to create a URL shortener service where you can write a long URL into an input field and the service shortens the URL to "http://www.example.org/abcdef".
Instead of "abcdef" there can be any other string with six characters containing a-z, A-Z and 0-9. That makes 56~57 billion possible strings.
My approach:
I have a database table with three columns:
id, integer, auto-increment
long, string, the long URL the user entered
short, string, the shortened URL (or just the six characters)
I would then insert the long URL into the table. Then I would select the auto-increment value for "id" and build a hash of it. This hash should then be inserted as "short". But what sort of hash should I build? Hash algorithms like MD5 create too long strings. I don't use these algorithms, I think. A self-built algorithm will work, too.
My idea:
For "http://www.google.de/" I get the auto-increment id 239472. Then I do the following steps:
short = '';
if divisible by 2, add "a"+the result to short
if divisible by 3, add "b"+the result to short
... until I have divisors for a-z and A-Z.
That could be repeated until the number isn't divisible any more. Do you think this is a good approach? Do you have a better idea?
Due to the ongoing interest in this topic, I've published an efficient solution to GitHub, with implementations for JavaScript, PHP, Python and Java. Add your solutions if you like :)
I would continue your "convert number to string" approach. However, you will realize that your proposed algorithm fails if your ID is a prime and greater than 52.
Theoretical background
You need a Bijective Function f. This is necessary so that you can find a inverse function g('abc') = 123 for your f(123) = 'abc' function. This means:
There must be no x1, x2 (with x1 ≠ x2) that will make f(x1) = f(x2),
and for every y you must be able to find an x so that f(x) = y.
How to convert the ID to a shortened URL
Think of an alphabet we want to use. In your case, that's [a-zA-Z0-9]. It contains 62 letters.
Take an auto-generated, unique numerical key (the auto-incremented id of a MySQL table for example).
For this example, I will use 12510 (125 with a base of 10).
Now you have to convert 12510 to X62 (base 62).
12510 = 2×621 + 1×620 = [2,1]
This requires the use of integer division and modulo. A pseudo-code example:
digits = []
while num > 0
remainder = modulo(num, 62)
digits.push(remainder)
num = divide(num, 62)
digits = digits.reverse
Now map the indices 2 and 1 to your alphabet. This is how your mapping (with an array for example) could look like:
0 → a
1 → b
...
25 → z
...
52 → 0
61 → 9
With 2 → c and 1 → b, you will receive cb62 as the shortened URL.
http://shor.ty/cb
How to resolve a shortened URL to the initial ID
The reverse is even easier. You just do a reverse lookup in your alphabet.
e9a62 will be resolved to "4th, 61st, and 0th letter in the alphabet".
e9a62 = [4,61,0] = 4×622 + 61×621 + 0×620 = 1915810
Now find your database-record with WHERE id = 19158 and do the redirect.
Example implementations (provided by commenters)
C++
Python
Ruby
Haskell
C#
CoffeeScript
Perl
Why would you want to use a hash?
You can just use a simple translation of your auto-increment value to an alphanumeric value. You can do that easily by using some base conversion. Say you character space (A-Z, a-z, 0-9, etc.) has 62 characters, convert the id to a base-40 number and use the characters as the digits.
public class UrlShortener {
private static final String ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private static final int BASE = ALPHABET.length();
public static String encode(int num) {
StringBuilder sb = new StringBuilder();
while ( num > 0 ) {
sb.append( ALPHABET.charAt( num % BASE ) );
num /= BASE;
}
return sb.reverse().toString();
}
public static int decode(String str) {
int num = 0;
for ( int i = 0; i < str.length(); i++ )
num = num * BASE + ALPHABET.indexOf(str.charAt(i));
return num;
}
}
Not an answer to your question, but I wouldn't use case-sensitive shortened URLs. They are hard to remember, usually unreadable (many fonts render 1 and l, 0 and O and other characters very very similar that they are near impossible to tell the difference) and downright error prone. Try to use lower or upper case only.
Also, try to have a format where you mix the numbers and characters in a predefined form. There are studies that show that people tend to remember one form better than others (think phone numbers, where the numbers are grouped in a specific form). Try something like num-char-char-num-char-char. I know this will lower the combinations, especially if you don't have upper and lower case, but it would be more usable and therefore useful.
My approach: Take the Database ID, then Base36 Encode it. I would NOT use both Upper AND Lowercase letters, because that makes transmitting those URLs over the telephone a nightmare, but you could of course easily extend the function to be a base 62 en/decoder.
Here is my PHP 5 class.
<?php
class Bijective
{
public $dictionary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
public function __construct()
{
$this->dictionary = str_split($this->dictionary);
}
public function encode($i)
{
if ($i == 0)
return $this->dictionary[0];
$result = '';
$base = count($this->dictionary);
while ($i > 0)
{
$result[] = $this->dictionary[($i % $base)];
$i = floor($i / $base);
}
$result = array_reverse($result);
return join("", $result);
}
public function decode($input)
{
$i = 0;
$base = count($this->dictionary);
$input = str_split($input);
foreach($input as $char)
{
$pos = array_search($char, $this->dictionary);
$i = $i * $base + $pos;
}
return $i;
}
}
A Node.js and MongoDB solution
Since we know the format that MongoDB uses to create a new ObjectId with 12 bytes.
a 4-byte value representing the seconds since the Unix epoch,
a 3-byte machine identifier,
a 2-byte process id
a 3-byte counter (in your machine), starting with a random value.
Example (I choose a random sequence)
a1b2c3d4e5f6g7h8i9j1k2l3
a1b2c3d4 represents the seconds since the Unix epoch,
4e5f6g7 represents machine identifier,
h8i9 represents process id
j1k2l3 represents the counter, starting with a random value.
Since the counter will be unique if we are storing the data in the same machine we can get it with no doubts that it will be duplicate.
So the short URL will be the counter and here is a code snippet assuming that your server is running properly.
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
// Create a schema
const shortUrl = new Schema({
long_url: { type: String, required: true },
short_url: { type: String, required: true, unique: true },
});
const ShortUrl = mongoose.model('ShortUrl', shortUrl);
// The user can request to get a short URL by providing a long URL using a form
app.post('/shorten', function(req ,res){
// Create a new shortUrl */
// The submit form has an input with longURL as its name attribute.
const longUrl = req.body["longURL"];
const newUrl = ShortUrl({
long_url : longUrl,
short_url : "",
});
const shortUrl = newUrl._id.toString().slice(-6);
newUrl.short_url = shortUrl;
console.log(newUrl);
newUrl.save(function(err){
console.log("the new URL is added");
})
});
I keep incrementing an integer sequence per domain in the database and use Hashids to encode the integer into a URL path.
static hashids = Hashids(salt = "my app rocks", minSize = 6)
I ran a script to see how long it takes until it exhausts the character length. For six characters it can do 164,916,224 links and then goes up to seven characters. Bitly uses seven characters. Under five characters looks weird to me.
Hashids can decode the URL path back to a integer but a simpler solution is to use the entire short link sho.rt/ka8ds3 as a primary key.
Here is the full concept:
function addDomain(domain) {
table("domains").insert("domain", domain, "seq", 0)
}
function addURL(domain, longURL) {
seq = table("domains").where("domain = ?", domain).increment("seq")
shortURL = domain + "/" + hashids.encode(seq)
table("links").insert("short", shortURL, "long", longURL)
return shortURL
}
// GET /:hashcode
function handleRequest(req, res) {
shortURL = req.host + "/" + req.param("hashcode")
longURL = table("links").where("short = ?", shortURL).get("long")
res.redirect(301, longURL)
}
C# version:
public class UrlShortener
{
private static String ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private static int BASE = 62;
public static String encode(int num)
{
StringBuilder sb = new StringBuilder();
while ( num > 0 )
{
sb.Append( ALPHABET[( num % BASE )] );
num /= BASE;
}
StringBuilder builder = new StringBuilder();
for (int i = sb.Length - 1; i >= 0; i--)
{
builder.Append(sb[i]);
}
return builder.ToString();
}
public static int decode(String str)
{
int num = 0;
for ( int i = 0, len = str.Length; i < len; i++ )
{
num = num * BASE + ALPHABET.IndexOf( str[(i)] );
}
return num;
}
}
You could hash the entire URL, but if you just want to shorten the id, do as marcel suggested. I wrote this Python implementation:
https://gist.github.com/778542
Take a look at https://hashids.org/ it is open source and in many languages.
Their page outlines some of the pitfalls of other approaches.
If you don't want re-invent the wheel ... http://lilurl.sourceforge.net/
// simple approach
$original_id = 56789;
$shortened_id = base_convert($original_id, 10, 36);
$un_shortened_id = base_convert($shortened_id, 36, 10);
alphabet = map(chr, range(97,123)+range(65,91)) + map(str,range(0,10))
def lookup(k, a=alphabet):
if type(k) == int:
return a[k]
elif type(k) == str:
return a.index(k)
def encode(i, a=alphabet):
'''Takes an integer and returns it in the given base with mappings for upper/lower case letters and numbers 0-9.'''
try:
i = int(i)
except Exception:
raise TypeError("Input must be an integer.")
def incode(i=i, p=1, a=a):
# Here to protect p.
if i <= 61:
return lookup(i)
else:
pval = pow(62,p)
nval = i/pval
remainder = i % pval
if nval <= 61:
return lookup(nval) + incode(i % pval)
else:
return incode(i, p+1)
return incode()
def decode(s, a=alphabet):
'''Takes a base 62 string in our alphabet and returns it in base10.'''
try:
s = str(s)
except Exception:
raise TypeError("Input must be a string.")
return sum([lookup(i) * pow(62,p) for p,i in enumerate(list(reversed(s)))])a
Here's my version for whomever needs it.
Why not just translate your id to a string? You just need a function that maps a digit between, say, 0 and 61 to a single letter (upper/lower case) or digit. Then apply this to create, say, 4-letter codes, and you've got 14.7 million URLs covered.
Here is a decent URL encoding function for PHP...
// From http://snipplr.com/view/22246/base62-encode--decode/
private function base_encode($val, $base=62, $chars='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') {
$str = '';
do {
$i = fmod($val, $base);
$str = $chars[$i] . $str;
$val = ($val - $i) / $base;
} while($val > 0);
return $str;
}
Don't know if anyone will find this useful - it is more of a 'hack n slash' method, yet is simple and works nicely if you want only specific chars.
$dictionary = "abcdfghjklmnpqrstvwxyz23456789";
$dictionary = str_split($dictionary);
// Encode
$str_id = '';
$base = count($dictionary);
while($id > 0) {
$rem = $id % $base;
$id = ($id - $rem) / $base;
$str_id .= $dictionary[$rem];
}
// Decode
$id_ar = str_split($str_id);
$id = 0;
for($i = count($id_ar); $i > 0; $i--) {
$id += array_search($id_ar[$i-1], $dictionary) * pow($base, $i - 1);
}
Did you omit O, 0, and i on purpose?
I just created a PHP class based on Ryan's solution.
<?php
$shorty = new App_Shorty();
echo 'ID: ' . 1000;
echo '<br/> Short link: ' . $shorty->encode(1000);
echo '<br/> Decoded Short Link: ' . $shorty->decode($shorty->encode(1000));
/**
* A nice shorting class based on Ryan Charmley's suggestion see the link on Stack Overflow below.
* #author Svetoslav Marinov (Slavi) | http://WebWeb.ca
* #see http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener/10386945#10386945
*/
class App_Shorty {
/**
* Explicitly omitted: i, o, 1, 0 because they are confusing. Also use only lowercase ... as
* dictating this over the phone might be tough.
* #var string
*/
private $dictionary = "abcdfghjklmnpqrstvwxyz23456789";
private $dictionary_array = array();
public function __construct() {
$this->dictionary_array = str_split($this->dictionary);
}
/**
* Gets ID and converts it into a string.
* #param int $id
*/
public function encode($id) {
$str_id = '';
$base = count($this->dictionary_array);
while ($id > 0) {
$rem = $id % $base;
$id = ($id - $rem) / $base;
$str_id .= $this->dictionary_array[$rem];
}
return $str_id;
}
/**
* Converts /abc into an integer ID
* #param string
* #return int $id
*/
public function decode($str_id) {
$id = 0;
$id_ar = str_split($str_id);
$base = count($this->dictionary_array);
for ($i = count($id_ar); $i > 0; $i--) {
$id += array_search($id_ar[$i - 1], $this->dictionary_array) * pow($base, $i - 1);
}
return $id;
}
}
?>
public class TinyUrl {
private final String characterMap = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private final int charBase = characterMap.length();
public String covertToCharacter(int num){
StringBuilder sb = new StringBuilder();
while (num > 0){
sb.append(characterMap.charAt(num % charBase));
num /= charBase;
}
return sb.reverse().toString();
}
public int covertToInteger(String str){
int num = 0;
for(int i = 0 ; i< str.length(); i++)
num += characterMap.indexOf(str.charAt(i)) * Math.pow(charBase , (str.length() - (i + 1)));
return num;
}
}
class TinyUrlTest{
public static void main(String[] args) {
TinyUrl tinyUrl = new TinyUrl();
int num = 122312215;
String url = tinyUrl.covertToCharacter(num);
System.out.println("Tiny url: " + url);
System.out.println("Id: " + tinyUrl.covertToInteger(url));
}
}
This is what I use:
# Generate a [0-9a-zA-Z] string
ALPHABET = map(str,range(0, 10)) + map(chr, range(97, 123) + range(65, 91))
def encode_id(id_number, alphabet=ALPHABET):
"""Convert an integer to a string."""
if id_number == 0:
return alphabet[0]
alphabet_len = len(alphabet) # Cache
result = ''
while id_number > 0:
id_number, mod = divmod(id_number, alphabet_len)
result = alphabet[mod] + result
return result
def decode_id(id_string, alphabet=ALPHABET):
"""Convert a string to an integer."""
alphabet_len = len(alphabet) # Cache
return sum([alphabet.index(char) * pow(alphabet_len, power) for power, char in enumerate(reversed(id_string))])
It's very fast and can take long integers.
For a similar project, to get a new key, I make a wrapper function around a random string generator that calls the generator until I get a string that hasn't already been used in my hashtable. This method will slow down once your name space starts to get full, but as you have said, even with only 6 characters, you have plenty of namespace to work with.
I have a variant of the problem, in that I store web pages from many different authors and need to prevent discovery of pages by guesswork. So my short URLs add a couple of extra digits to the Base-62 string for the page number. These extra digits are generated from information in the page record itself and they ensure that only 1 in 3844 URLs are valid (assuming 2-digit Base-62). You can see an outline description at http://mgscan.com/MBWL.
Very good answer, I have created a Golang implementation of the bjf:
package bjf
import (
"math"
"strings"
"strconv"
)
const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
func Encode(num string) string {
n, _ := strconv.ParseUint(num, 10, 64)
t := make([]byte, 0)
/* Special case */
if n == 0 {
return string(alphabet[0])
}
/* Map */
for n > 0 {
r := n % uint64(len(alphabet))
t = append(t, alphabet[r])
n = n / uint64(len(alphabet))
}
/* Reverse */
for i, j := 0, len(t) - 1; i < j; i, j = i + 1, j - 1 {
t[i], t[j] = t[j], t[i]
}
return string(t)
}
func Decode(token string) int {
r := int(0)
p := float64(len(token)) - 1
for i := 0; i < len(token); i++ {
r += strings.Index(alphabet, string(token[i])) * int(math.Pow(float64(len(alphabet)), p))
p--
}
return r
}
Hosted at github: https://github.com/xor-gate/go-bjf
Implementation in Scala:
class Encoder(alphabet: String) extends (Long => String) {
val Base = alphabet.size
override def apply(number: Long) = {
def encode(current: Long): List[Int] = {
if (current == 0) Nil
else (current % Base).toInt :: encode(current / Base)
}
encode(number).reverse
.map(current => alphabet.charAt(current)).mkString
}
}
class Decoder(alphabet: String) extends (String => Long) {
val Base = alphabet.size
override def apply(string: String) = {
def decode(current: Long, encodedPart: String): Long = {
if (encodedPart.size == 0) current
else decode(current * Base + alphabet.indexOf(encodedPart.head),encodedPart.tail)
}
decode(0,string)
}
}
Test example with Scala test:
import org.scalatest.{FlatSpec, Matchers}
class DecoderAndEncoderTest extends FlatSpec with Matchers {
val Alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"A number with base 10" should "be correctly encoded into base 62 string" in {
val encoder = new Encoder(Alphabet)
encoder(127) should be ("cd")
encoder(543513414) should be ("KWGPy")
}
"A base 62 string" should "be correctly decoded into a number with base 10" in {
val decoder = new Decoder(Alphabet)
decoder("cd") should be (127)
decoder("KWGPy") should be (543513414)
}
}
Function based in Xeoncross Class
function shortly($input){
$dictionary = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9'];
if($input===0)
return $dictionary[0];
$base = count($dictionary);
if(is_numeric($input)){
$result = [];
while($input > 0){
$result[] = $dictionary[($input % $base)];
$input = floor($input / $base);
}
return join("", array_reverse($result));
}
$i = 0;
$input = str_split($input);
foreach($input as $char){
$pos = array_search($char, $dictionary);
$i = $i * $base + $pos;
}
return $i;
}
Here is a Node.js implementation that is likely to bit.ly. generate a highly random seven-character string.
It uses Node.js crypto to generate a highly random 25 charset rather than randomly selecting seven characters.
var crypto = require("crypto");
exports.shortURL = new function () {
this.getShortURL = function () {
var sURL = '',
_rand = crypto.randomBytes(25).toString('hex'),
_base = _rand.length;
for (var i = 0; i < 7; i++)
sURL += _rand.charAt(Math.floor(Math.random() * _rand.length));
return sURL;
};
}
My Python 3 version
base_list = list("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
base = len(base_list)
def encode(num: int):
result = []
if num == 0:
result.append(base_list[0])
while num > 0:
result.append(base_list[num % base])
num //= base
print("".join(reversed(result)))
def decode(code: str):
num = 0
code_list = list(code)
for index, code in enumerate(reversed(code_list)):
num += base_list.index(code) * base ** index
print(num)
if __name__ == '__main__':
encode(341413134141)
decode("60FoItT")
For a quality Node.js / JavaScript solution, see the id-shortener module, which is thoroughly tested and has been used in production for months.
It provides an efficient id / URL shortener backed by pluggable storage defaulting to Redis, and you can even customize your short id character set and whether or not shortening is idempotent. This is an important distinction that not all URL shorteners take into account.
In relation to other answers here, this module implements the Marcel Jackwerth's excellent accepted answer above.
The core of the solution is provided by the following Redis Lua snippet:
local sequence = redis.call('incr', KEYS[1])
local chars = '0123456789ABCDEFGHJKLMNPQRSTUVWXYZ_abcdefghijkmnopqrstuvwxyz'
local remaining = sequence
local slug = ''
while (remaining > 0) do
local d = (remaining % 60)
local character = string.sub(chars, d + 1, d + 1)
slug = character .. slug
remaining = (remaining - d) / 60
end
redis.call('hset', KEYS[2], slug, ARGV[1])
return slug
Why not just generate a random string and append it to the base URL? This is a very simplified version of doing this in C#.
static string chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
static string baseUrl = "https://google.com/";
private static string RandomString(int length)
{
char[] s = new char[length];
Random rnd = new Random();
for (int x = 0; x < length; x++)
{
s[x] = chars[rnd.Next(chars.Length)];
}
Thread.Sleep(10);
return new String(s);
}
Then just add the append the random string to the baseURL:
string tinyURL = baseUrl + RandomString(5);
Remember this is a very simplified version of doing this and it's possible the RandomString method could create duplicate strings. In production you would want to take in account for duplicate strings to ensure you will always have a unique URL. I have some code that takes account for duplicate strings by querying a database table I could share if anyone is interested.
This is my initial thoughts, and more thinking can be done, or some simulation can be made to see if it works well or any improvement is needed:
My answer is to remember the long URL in the database, and use the ID 0 to 9999999999999999 (or however large the number is needed).
But the ID 0 to 9999999999999999 can be an issue, because
it can be shorter if we use hexadecimal, or even base62 or base64. (base64 just like YouTube using A-Z a-z 0-9 _ and -)
if it increases from 0 to 9999999999999999 uniformly, then hackers can visit them in that order and know what URLs people are sending each other, so it can be a privacy issue
We can do this:
have one server allocate 0 to 999 to one server, Server A, so now Server A has 1000 of such IDs. So if there are 20 or 200 servers constantly wanting new IDs, it doesn't have to keep asking for each new ID, but rather asking once for 1000 IDs
for the ID 1, for example, reverse the bits. So 000...00000001 becomes 10000...000, so that when converted to base64, it will be non-uniformly increasing IDs each time.
use XOR to flip the bits for the final IDs. For example, XOR with 0xD5AA96...2373 (like a secret key), and the some bits will be flipped. (whenever the secret key has the 1 bit on, it will flip the bit of the ID). This will make the IDs even harder to guess and appear more random
Following this scheme, the single server that allocates the IDs can form the IDs, and so can the 20 or 200 servers requesting the allocation of IDs. The allocating server has to use a lock / semaphore to prevent two requesting servers from getting the same batch (or if it is accepting one connection at a time, this already solves the problem). So we don't want the line (queue) to be too long for waiting to get an allocation. So that's why allocating 1000 or 10000 at a time can solve the issue.