There is a list of IP addrress, original = ["1.2.3.4", "1.2.4.1", "1.3.45.67", "1.3.67.89"]. This list has 1000s of IP address.
I want to create groups which matches the first two octets.
Group1 = ["1.2.3.4", "1.2.4.1"]
Group2 = ["1.3.45.67", "1.3.67.89"]
How can I achieve this in nodejs?
The following code works, but would like to know much simpler way to achieve the same.
let initArray=['172.25.17.42','172.21.17.5','172.22.45.2','172.22.66.5','172.25.5.2','172.21.8.27','172.25.13.30','172.21.14.13','172.21.17.5','172.05.17.42','172.06.17.5','172.07.45.2','172.06.66.5','172.05.5.2','172.07.8.27','172.05.13.30','172.06.14.13','172.07.17.5','172.11.17.42','172.12.17.5','172.11.45.2','172.12.66.5','172.33.5.2','172.40.8.27','172.33.13.30','172.40.14.13','172.50.17.5'];
initArray.sort();
var resArray = [];
initArray.forEach(function (eachIP) {
let ocTets = eachIP.split('.');
let twoOctets = ocTets[0]+'.'+ocTets[1];
resArray.push(twoOctets);
})
const counts = {};
for (var i = 0; i < resArray.length; i++) {
var num = resArray[i];//num = 171.21
counts[num] = counts[num] ? counts[num] + 1 : 1;//counts[171.21 = 1]
}
let sum = 0;
var newMap = new Map(Object.entries(counts));
var resMap = new Map();
console.log(newMap);
let keyarray = [];
let j=0;
newMap.forEach((value, key, thisMap) => {
sum+=value;
if(sum<=5){
keyarray.push(key);
}
else
{
keyarray.push(key);
console.log('keyarray:'+keyarray);
let keyStr = keyarray.toString();
resMap.set(keyStr, sum);
while (keyarray.length) { keyarray.pop(); }
sum = 0;
}
})
console.log(resMap)
ipv4 IP addresses are just 32-bit numbers. While it may be convenient for a human to view an IP as a four-octet string, it's generally easier to write IP logic using numbers. For example, in your code you sort the IPs, but the IPs are sorted alphanumerically which probably is not what you want. One approach, then, is to map all of your IP addresses to numbers and then apply your grouping logic.
Take the IP addresses 172.25.17.42 and 172.25.5.2. These respectively map to 0xAC19112A and 0xAC190502 in hex. You can see right away that the first two octets match (0xAC19).
Given the numeric representations of the IPs, simply shifting each number right by 16 bits gives you the desired grouping. Alternatively, you can mask off the bits other than the two octets you want (keep in mind that JavaScript numbers are 64-bit).
I would follow this recipe:
Convert the IPs to numbers.
Shift each number right by 16 bits to get the first two octets.
Reduce the list of IPs down to an octet-to-IP map/the format desired format (e.g. octet group to count or whatever).
Here's some code.
const ips = ['172.25.17.42', '172.21.17.5', '172.22.45.2', '172.22.66.5',
'172.25.5.2', '172.21.8.27', '172.25.13.30', '172.21.14.13', '172.21.17.5',
'172.05.17.42', '172.06.17.5', '172.07.45.2', '172.06.66.5', '172.05.5.2',
'172.07.8.27', '172.05.13.30', '172.06.14.13', '172.07.17.5', '172.11.17.42',
'172.12.17.5', '172.11.45.2', '172.12.66.5', '172.33.5.2', '172.40.8.27',
'172.33.13.30', '172.40.14.13', '172.50.17.5'];
// Reduce the array of IPs down to a map of first-two-octets to IP.
const octetMap = ips
.reduce((map, ip) => {
// The first two octets of the IP as a number.
const firstTwo = ip2int(ip) >> 16;
// Alternative approach (JS numbers are 64 bit).
//const firstTwo = ip2int(ip) & 0x00FF0000;
// First time seeing firstTwo initialize an array, then add the IP.
if (!map.has(firstTwo))
map.set(firstTwo, []);
map.get(firstTwo).push(ip);
return map;
}, new Map());
console.log(octetMap);
// This returns an unsigned 32-bit integer representation of the ip string.
function ip2int(ip) {
return ip
.split('.')
.reduce((num, oct) => (num << 8) + parseInt(oct, 10), 0) >>> 0;
}
In a hybrid Android/Cordova game that I am creating I let users provide an identifier in the form of an Emoji + an alphanumeric - i.e. 0..9,A..Z,a..z - name. For example
🙋️Stackoverflow
Server-side the user identifiers are stored with the Emoji and Name parts separated with only the Name part requiried to be unique. From time-to-time the game displays a "league table" so the user can see how well they are performing compared to other players. For this purpose the server sends back a sequence of ten "high score" values consisting of Emoji, Name and Score.
This is then presented to the user in a table with three columns - one each for Emoji, Name and Score. And this is where I have hit a slight problem. Initially I had quite naively assumed that I could figure out the Emoji by simply looking at handle.codePointAt(0). When it dawned on me that an Emoji could in fact be a sequence of one or more 16 bit Unicode values I changed my code as follows
Part 1:Dissecting the user supplied "handle"
var i,username,
codepoints = [],
handle = "🙋️StackOverflow",
len = handle,length;
while ((i < len) && (255 < handle.codePointAt(i)))
{codepoints.push(handle.codePointAt(i));i += 2;}
username = handle.substring(codepoints.length + 1);
At this point I have the "disssected" handle with
codepoints = [128587, 8205, 65039];
username = 'Stackoverflow;
A note of explanation for the i += 2 and the use of handle.length above. This article suggests that
handle.codePointAt(n) will return the code point for the full surrogate pair if you hit the leading surrogate. In my case since the Emoji has to be first character the leading surrogates for the sequence of 16 bit Unicodes for the emoji are at 0,2,4....
From the same article I learnt that String.length in Javascript will return the number of 16 bit code units.
Part II - Re generating the Emojis for the "league table"
Suppose the league table data squirted back to the app by my servers has the entry {emoji: [128583, 8205, 65039],username:"Stackexchange",points:100} for the emoji character 🙇️. Now here is the bothersome thing. If I do
var origCP = [],
i = 0,
origEmoji = '🙇️',
origLen = origEmoji.length;
while ((i < origLen) && (255 < origEmoji.codePointAt(i))
{origCP.push(origEmoji.codePointAt(i);i += 2;}
I get
origLen = 5, origCP = [128583, 8205, 65039]
However, if I regenerate the emoji from the provided data
var reEmoji = String.fromCodePoint.apply(String,[128583, 8205, 65039]),
reEmojiLen = reEmoji.length;
I get
reEmoji = '🙇️'
reEmojiLen = 4;
So while reEmoji has the correct emoji its reported length has mysteriously shrunk down to 4 code units in place of the original 5.
If I then extract code points from the regenerated emoji
var reCP = [],
i = 0;
while ((i < reEmojiLen) && (255 < reEmoji.codePointAt(i))
{reCP.push(reEmoji.codePointAt(i);i += 2;}
which gives me
reCP = [128583, 8205];
Even curioser, origEmoji.codePointAt(3) gives the trailing surrogate pair value of 9794 while reEmoji.codePointAt(3) gives the value of the next full surrogate pair 65039.
I could at this point just say
Do I really care?
After all, I just want to show the league table emojis in a separate column so as long as I am getting the right emoji the niceties of what is happening under the hood do not matter. However, this might well be stocking up problems for the future.
Can anyone here shed any light on what is happening?
emojis are more complicated than just single chars, they come in "sequences", e.g. a zwj-sequence (combine multiple emojis into one image) or a presentation sequence (provide different variations of the same symbol) and some more, see tr51 for all the nasty details.
If you "dump" your string like this
str = "🙋️StackOverflow"
console.log(...[...str].map(x => x.codePointAt(0).toString(16)))
you'll see that it's actually an (incorrectly formed) zwj-sequence wrapped in a presentation sequence.
So, to slice emojis accurately, you need to iterate the string as an array of codepoints (not units!) and extract plane 1 CPs (>0xffff) + ZWJ's + variation selectors. Example:
function sliceEmoji(str) {
let res = ['', ''];
for (let c of str) {
let n = c.codePointAt(0);
let isEmoji = n > 0xfff || n === 0x200d || (0xfe00 <= n && n <= 0xfeff);
res[1 - isEmoji] += c;
}
return res;
}
function hex(str) {
return [...str].map(x => x.codePointAt(0).toString(16))
}
myStr = "🙋️StackOverflow"
console.log(sliceEmoji(myStr))
console.log(sliceEmoji(myStr).map(hex))
Given a uuid(v4) without dashes, how can I shorten it to a 15 or less than 15 characters string? I should also be able to go back to the original uuid from the 15 characters string.
I am trying to shorten it to send it in a flat file and the file format specifies this field to be a 15 characters alphanumeric field. Given that shortened uuid, I should be able to map it back to the original uuid.
Here is what I tried, but definitely not what I wanted.
export function shortenUUID(uuidToShorten: string, length: number) {
const uuidWithoutDashes = uuidToShorten.replace(/-/g , '');
const radix = uuidWithoutDashes.length;
const randomId = [];
for (let i = 0; i < length; i++) {
randomId[i] = uuidWithoutDashes[ 0 | Math.random() * radix];
}
return randomId.join('');
}
As AuxTaco pointed out, if you actually mean "alphanumeric" as in it matches "/^[A-Za-z0-9]{0,15}/" (giving the number of bits of 26 + 26 + 10 = 62), then it is really impossible. You can't fit 3 gallons of water in a gallon bucket without losing something. A UUID is 128-bits, so to convert that to a character space of 62, you'd need at least 22 characters (log[base 62](2^128) == ~22).
If you are more flexible on your charset and just need it 15 unicode characters you can put in a text document, then my answer will help.
Note: First part of this answer, I thought it said length of 16, not 15. The simpler answer won't work. The more complex version below still will.
In order to do so, you'd to use some kind of two-way compression algorithm (similar to an algorithm that is used for zipping files).
However, the problem with trying to compress something like a UUID is you'd probably have lots of collisions.
A UUID v4 is 32 characters long (without dashes). It's hexadecimal, so it's character space is 16 characters (0123456789ABCDEF)
That gives you a number of possible combinations of 16^32, approximately 3.4028237e+38 or 340,282,370,000,000,000,000,000,000,000,000,000,000. To make it recoverable after compression, you'd have to make sure you don't have any collisions (i.e., no 2 UUIDs turn into the same value). That's a lot of possible values (which is exactly why we use that many for UUID, the chance of 2 random UUIDs is only 1 out of that number big number).
To crunch that many possibilities to 16 characters, you'd have to have at least as many possible values. With 16 characters, you'd have to have 256 characters (root 16 of that big number, 256^16 == 16^32`). That's assuming you have an algorithm that'd never create a collision.
One way to ensure you never have collisions would be to convert it from a base-16 number to a base-256 number. That would give you a 1-to-1 relation, ensuring no collisions and making it perfectly reversible. Normally, switching bases is easy in JavaScript: parseInt(someStr, radix).toString(otherRadix) (e.g., parseInt('00FF', 16).toString(20). Unfortunately, JavaScript only does up to a radix of 36, so we'll have to do the conversion ourselves.
The catch with such a large base is representing it. You could arbitrarily pick 256 different characters, throw them in a string, and use that for a manual conversion. However, I don't think there are 256 different symbols on a standard US keyboard, even if you treat upper and lowercase as different glyphs.
A simpler solution would be to just use arbitrary character codes from 0 to 255 with String.fromCharCode().
Another small catch is if we tried to treat that all as one big number, we'd have issues because it's a really big number and JavaScript can't properly represent it exactly.
Instead of that, since we already have hexadecimal, we can just split it into pairs of decimals, convert those, then spit them out. 32 hexadecimal digits = 16 pairs, so that'll (coincidentally) be perfect. (If you had to solve this for an arbitrary size, you'd have to do some extra math and converting to split the number into pieces, convert, then reassemble.)
const uuid = '1234567890ABCDEF1234567890ABCDEF';
const letters = uuid.match(/.{2}/g).map(pair => String.fromCharCode(parseInt(pair, 16)));
const str = letters.join('');
console.log(str);
Note that there are some random characters in there, because not every char code maps to a "normal" symbol. If what you are sending to can't handle them, you'll instead need to go with the array approach: find 256 characters it can handle, make an array of them, and instead of String.fromCharCode(num), use charset[num].
To convert it back, you would just do the reverse: get the char code, convert to hex, add them together:
const uuid = '1234567890ABCDEF1234567890ABCDEF';
const compress = uuid =>
uuid.match(/.{2}/g).map(pair => String.fromCharCode(parseInt(pair, 16))).join('');
const expand = str =>
str.split('').map(letter => ('0' + letter.charCodeAt(0).toString(16)).substr(-2)).join('');
const str = compress(uuid);
const original = expand(str);
console.log(str, original, original.toUpperCase() === uuid.toUpperCase());
For fun, here is how you could do it for any arbitrary input base and output base.
This code is a bit messy because it is really expanded to make it more self-explanatory, but it basically does what I described above.
Since JavaScript doesn't have an infinite level of precision, if you end up converting a really big number, (one that looks like 2.00000000e+10), every number not shown after that e was essentially chopped off and replaced with a zero. To account for that, you'll have to break it up in some way.
In the code below, there is a "simple" way which doesn't account for this, so only works for smaller strings, and then a proper way which breaks it up. I chose a simple, yet somewhat inefficient, approach of just breaking up the string based on how many digits it gets turned into. This isn't the best way (since math doesn't really work like that), but it does the trick (at the cost of needed a smaller charset).
You could imploy a smarter splitting mechanism if you really needed to keep your charset size to a minimum.
const smallStr = '1234';
const str = '1234567890ABCDEF1234567890ABCDEF';
const hexCharset = '0123456789ABCDEF'; // could also be an array
const compressedLength = 16;
const maxDigits = 16; // this may be a bit browser specific. You can make it smaller to be safer.
const logBaseN = (num, n) => Math.log(num) / Math.log(n);
const nthRoot = (num, n) => Math.pow(num, 1/n);
const digitsInNumber = num => Math.log(num) * Math.LOG10E + 1 | 0;
const partitionString = (str, numPartitions) => {
const partsSize = Math.ceil(str.length / numPartitions);
let partitions = [];
for (let i = 0; i < numPartitions; i++) {
partitions.push(str.substr(i * partsSize, partsSize));
}
return partitions;
}
console.log('logBaseN test:', logBaseN(256, 16) === 2);
console.log('nthRoot test:', nthRoot(256, 2) === 16);
console.log('partitionString test:', partitionString('ABCDEFG', 3));
// charset.length should equal radix
const toDecimalFromCharset = (str, charset) =>
str.split('')
.reverse()
.map((char, index) => charset.indexOf(char) * Math.pow(charset.length, index))
.reduce((sum, num) => (sum + num), 0);
const fromDecimalToCharset = (dec, charset) => {
const radix = charset.length;
let str = '';
for (let i = Math.ceil(logBaseN(dec + 1, radix)) - 1; i >= 0; i--) {
const part = Math.floor(dec / Math.pow(radix, i));
dec -= part * Math.pow(radix, i);
str += charset[part];
}
return str;
};
console.log('toDecimalFromCharset test 1:', toDecimalFromCharset('01000101', '01') === 69);
console.log('toDecimalFromCharset test 2:', toDecimalFromCharset('FF', hexCharset) === 255);
console.log('fromDecimalToCharset test:', fromDecimalToCharset(255, hexCharset) === 'FF');
const arbitraryCharset = length => new Array(length).fill(1).map((a, i) => String.fromCharCode(i));
// the Math.pow() bit is the possible number of values in the original
const simpleDetermineRadix = (strLength, originalCharsetSize, compressedLength) => nthRoot(Math.pow(originalCharsetSize, strLength), compressedLength);
// the simple ones only work for values that in decimal are so big before lack of precision messes things up
// compressedCharset.length must be >= compressedLength
const simpleCompress = (str, originalCharset, compressedCharset, compressedLength) =>
fromDecimalToCharset(toDecimalFromCharset(str, originalCharset), compressedCharset);
const simpleExpand = (compressedStr, originalCharset, compressedCharset) =>
fromDecimalToCharset(toDecimalFromCharset(compressedStr, compressedCharset), originalCharset);
const simpleNeededRadix = simpleDetermineRadix(str.length, hexCharset.length, compressedLength);
const simpleCompressedCharset = arbitraryCharset(simpleNeededRadix);
const simpleCompressed = simpleCompress(str, hexCharset, simpleCompressedCharset, compressedLength);
const simpleExpanded = simpleExpand(simpleCompressed, hexCharset, simpleCompressedCharset);
// Notice, it gets a little confused because of a lack of precision in the really big number.
console.log('Original string:', str, toDecimalFromCharset(str, hexCharset));
console.log('Simple Compressed:', simpleCompressed, toDecimalFromCharset(simpleCompressed, simpleCompressedCharset));
console.log('Simple Expanded:', simpleExpanded, toDecimalFromCharset(simpleExpanded, hexCharset));
console.log('Simple test:', simpleExpanded === str);
// Notice it works fine for smaller strings and/or charsets
const smallCompressed = simpleCompress(smallStr, hexCharset, simpleCompressedCharset, compressedLength);
const smallExpanded = simpleExpand(smallCompressed, hexCharset, simpleCompressedCharset);
console.log('Small string:', smallStr, toDecimalFromCharset(smallStr, hexCharset));
console.log('Small simple compressed:', smallCompressed, toDecimalFromCharset(smallCompressed, simpleCompressedCharset));
console.log('Small expaned:', smallExpanded, toDecimalFromCharset(smallExpanded, hexCharset));
console.log('Small test:', smallExpanded === smallStr);
// these will break the decimal up into smaller numbers with a max length of maxDigits
// it's a bit browser specific where the lack of precision is, so a smaller maxDigits
// may make it safer
//
// note: charset may need to be a little bit bigger than what determineRadix decides, since we're
// breaking the string up
// also note: we're breaking the string into parts based on the number of digits in it as a decimal
// this will actually make each individual parts decimal length smaller, because of how numbers work,
// but that's okay. If you have a charset just barely big enough because of other constraints, you'll
// need to make this even more complicated to make sure it's perfect.
const partitionStringForCompress = (str, originalCharset) => {
const numDigits = digitsInNumber(toDecimalFromCharset(str, originalCharset));
const numParts = Math.ceil(numDigits / maxDigits);
return partitionString(str, numParts);
}
const partitionedPartSize = (str, originalCharset) => {
const parts = partitionStringForCompress(str, originalCharset);
return Math.floor((compressedLength - parts.length - 1) / parts.length) + 1;
}
const determineRadix = (str, originalCharset, compressedLength) => {
const parts = partitionStringForCompress(str, originalCharset);
return Math.ceil(nthRoot(Math.pow(originalCharset.length, parts[0].length), partitionedPartSize(str, originalCharset)));
}
const compress = (str, originalCharset, compressedCharset, compressedLength) => {
const parts = partitionStringForCompress(str, originalCharset);
const partSize = partitionedPartSize(str, originalCharset);
return parts.map(part => simpleCompress(part, originalCharset, compressedCharset, partSize)).join(compressedCharset[compressedCharset.length-1]);
}
const expand = (compressedStr, originalCharset, compressedCharset) =>
compressedStr.split(compressedCharset[compressedCharset.length-1])
.map(part => simpleExpand(part, originalCharset, compressedCharset))
.join('');
const neededRadix = determineRadix(str, hexCharset, compressedLength);
const compressedCharset = arbitraryCharset(neededRadix);
const compressed = compress(str, hexCharset, compressedCharset, compressedLength);
const expanded = expand(compressed, hexCharset, compressedCharset);
console.log('String:', str, toDecimalFromCharset(str, hexCharset));
console.log('Neded radix size:', neededRadix); // bigger than normal because of how we're breaking it up... this could be improved if needed
console.log('Compressed:', compressed);
console.log('Expanded:', expanded);
console.log('Final test:', expanded === str);
To use the above specifically to answer the question, you would use:
const hexCharset = '0123456789ABCDEF';
const compressedCharset = arbitraryCharset(determineRadix(uuid, hexCharset));
// UUID to 15 characters
const compressed = compress(uuid, hexCharset, compressedCharset, 15);
// 15 characters to UUID
const expanded = expanded(compressed, hexCharset, compressedCharset);
If there are problematic characters in the arbitrary, you'll have to do something to either filter those out, or hard-code a specific one. Just make sure all of the functions are deterministic (i.e., same result every time).
I have a situation where I have to search a grid if it contains a certain substring. I have a search bar where the user can type the string. The problem is that the grid contains mix of Japanese text and Unicode characters,
for example : MAGシンチ注 333MBq .
How can I compare for content equality the letter 'M' that I type from the keyboard and the letter "M" as in the example above? I am trying to do this using plain Javascript and not Jquery or other library. And I have to do this in Internet Explorer.
Thanks,
As mentioned in an insightful comment from #Rhymoid on the question, modern JavaScript (ES2015) includes support for normalization of Unicode. One mode of normalization is to map "compatible" letterforms from higher code pages down to their most basic representatives in lower code pages (to summarize, it's kind-of involved). The .normalize("NFKD") method will map the "M" from the Japanese code page down to the Latin equivalent. Thus
"MAGシンチ注 333MBq".normalize("NFKD")
will give
"MAGシンチ注 333MBq"
As of late 2016, .normalize() isn't supported by IE.
At a lower level, ES2015 also has .codePointAt() (mentioned in another good answer), which is like the older .charCodeAt() described below but which also understands UTF-16 pairs. However, .codePointAt() is (again, late 2016) not supported by Safari.
below is original answer for older browsers
You can use the .charCodeAt() method to examine the UTF-16 character codes in the string.
"M".charCodeAt(0)
is 77, while
"M".charCodeAt(0)
is 65325.
This approach is complicated by the fact that for some Unicode characters, the UTF-16 representation involves two separate character positions in the JavaScript string. The language does not provide native support for dealing with that, so you have to do it yourself. A character code between 55926 and 57343 (D800 and DFFF hex) indicates the start of a two-character pair. The UTF-16 Wikipedia page has more information, and there are various other sources.
Building a dictionary should work in any browser, find the charCodes at the start of ranges you want to transform then move the characters in your favourite way, for example
function shift65248(str) {
var dict = {}, characters = [],
character, i;
for (i = 0; i < 10; ++i) { // 0 - 9
character = String.fromCharCode(65296 + i);
dict[character] = String.fromCharCode(48 + i);
characters.push(character);
}
for (i = 0; i < 26; ++i) { // A - Z
character = String.fromCharCode(65313 + i);
dict[character] = String.fromCharCode(65 + i);
characters.push(character);
}
for (i = 0; i < 26; ++i) { // a - z
character = String.fromCharCode(65313 + i);
dict[character] = String.fromCharCode(97 + i);
characters.push(character);
}
return str.replace(
new RegExp(characters.join('|'), 'g'),
function (m) {return dict[m];}
);
}
shift65248('MAGシンチ注 333MBq'); // "MAGシンチ注 333MBq"
I tried just moving the whole range 65248..65375 onto 0..127 but it conflicted with the other characters :(
I am assuming that you have access to those strings, by reading the DOM for some other way.
If so, codePointAt will be your friend.
console.log("Test of values");
console.log("M".codePointAt(0));
console.log("M".codePointAt(0));
console.log("Determining end of string");
console.log("M".codePointAt(10));
var str = "MAGシンチ注 333MBq .";
var idx = 0;
do {
point = str.codePointAt(idx);
idx++;
console.log(point);
} while(point !== undefined);
You could try building your own dictionary and compare function as follows:
var compareDB = {
'm' : ['M'],
'b' : ['B']
};
function doCompare(inputChar, searchText){
inputCharLower = inputChar.toLowerCase();
searchTextLower = searchText.toLowerCase();
if(searchTextLower.indexOf(inputChar) > -1)
return true;
if(compareDB[inputCharLower] !== undefined)
{
for(i=0; i<compareDB[inputCharLower].length; i++){
if(searchTextLower.indexOf(compareDB[inputCharLower][i].toLowerCase()) > -1)
return true;
}
}
return false;
}
console.log("searching with m");
console.log(doCompare('m', "searching text with M"));
console.log("searching with m");
console.log(doCompare('m', "searching text with B"));
console.log("searching with B");
console.log(doCompare('B', "searching text with B"));