I am caching longitude and latitude (plus a bit more info) for possibly 1000s of locations, currently using a JavaScript hash, a {}. e.g.
var cache = {};
cache['Boston, MA'] = { id: someid, latlon: [lat, long] };
cache['Someotherplace, TX'] = { id: someotherid, latlon: [itslat, itslong]};
Everytime a new location comes up I do a geocode and add the results to the cache. I don't think Boston's latitude will change anytime soon...
Will lookups be reasonably fast? I don't need blazing fast, I'm not running Amazon, but as this data grows to, say 2000 locations, will it bog down? If so, what might be a good alternative?
Much of the performance of the entire javascript engine is based on property lookups on objects so I'm quite sure that significant effort has been paid to the performance of that in the basic JS engine.
But, as with all things related to performance you should measure yourself. It would only take a few minutes to build a test harness in jsperf and either compare it to an alternative or just see if regular JS lookup appears fast enough for you.
Here's a [little test harness][1] that shows more than 20,000 key lookups per ms on my computer. I would think that's fast enough for you.
function log(args) {
var str = "";
for (var i = 0; i < arguments.length; i++) {
if (typeof arguments[i] === "object") {
str += JSON.stringify(arguments[i]);
} else {
str += arguments[i];
}
}
var div = document.createElement("div");
div.innerHTML = str;
document.body.appendChild(div);
}
function addCommas(str) {
var amount = str + "";
var parts = amount.split(".");
amount = parts[0].split("").reverse();
var output = "";
for (var i = 0; i < amount.length; i++) {
output = amount[i] + output;
if ((i+1) % 3 == 0 && (amount.length-1) !== i) {
output = ',' + output;
}
}
if (parts.length > 1) {
output += "." + parts[1];
}
return output;
}
function now() {
return new Date().getTime();
}
// now fill the cache with a random set of keys
// the keys will var in length between minKeyLen and maxKeyLen
function createRandomKeys(num, minKeyLen, maxKeyLen, obj) {
function rand(min, max) {
return Math.floor(Math.random() * (max - min)) + min;
}
var chars = "abcdefghijlkmnopqrstuvwzyz";
var len, key, numKeys = 0;
while (numKeys < num) {
// generate random key length
len = rand(minKeyLen, maxKeyLen);
key = "";
// now select len random chars and combine into a string
for (var j = 0; j < len; j++) {
key += chars.charAt(rand(0, chars.length))
}
// put this key into our object, only count it if it's not already there
if (!Object.prototype.hasOwnProperty.call(obj, key)) {
++numKeys;
obj[key] = true;
}
}
}
var cache = {};
// put all the keys into our object
createRandomKeys(200000, 3, 15, cache);
// now get the list of keys, just so we know what to fetch in our test
var keys = Object.keys(cache);
// now time getting every key
var total = 0;
var start = now();
for (var i = 0; i < keys.length; i++) {
if (cache[keys[i]]) {
++total;
}
}
var end = now();
var elapsed = end - start;
log("Elapsed time = " + elapsed + "ms for " + addCommas(keys.length) + " key lookups - found " + addCommas(total));
log(elapsed/keys.length + "ms per lookup");
log(addCommas((keys.length / elapsed).toFixed(2)) + " key lookups per ms");
// show some sample keys
log("<hr>Sample keys (first 100 keys):<br>");
log(keys.slice(0, 100).join(", "));
Related
I am doing a modified version of collecting word co-occurrences, so I wrote my own javascript, and I am tracking the occurrences in three objects. However, once the objects get large (~8 million, 3 million, and 172000) a function that took 5 seconds per 100000 sentences now takes minutes to do one sentence with 30 words (30 tokens). I am nowhere near my RAM cap (I have 12 more GBs of RAM it could be using, and the program is only using 2.2GB). Using Node.js v17.3.1.
Why does my function take so long when the objects get bigger (even though the sentences remain the same length)? Should I be using a different object besides Javascript's default object, or is there a way improve the speed of access and setting these objects when they are so big?
Code:
let posCounts = {};
let negCounts = {};
// the number of times each word occurs
let wordCounts = {};
let tokens = // some function that gets tokens;
for (let k = 0; k < tokens.length; k++) {
// count word occurences
if (tokens[k] in wordCounts) {
wordCounts[tokens[k]] += 1;
} else {
wordCounts[tokens[k]] = 1;
}
for(let tok = k + 1; tok < tokens.length; tok++) {
if (tok == k) {
// avoid word to self cooccurrence
// should no longer be possible
continue;
} else {
// check which form of the cooccurence exists already in either count
actual_tok = (tokens[k] + "-" + tokens[tok]);
if(actual_tok in posCounts || actual_tok in negCounts) {
// no-op
} else {
actual_tok = (tokens[tok] + "-" + tokens[k]);
}
// condition set before this block of code
if(condition) {
if (actual_tok in posCounts) {
posCounts[actual_tok] += 1;
} else {
posCounts[actual_tok] = 1;
}
} else {
if (actual_tok in negCounts) {
negCounts[actual_tok] += 1;
} else {
negCounts[actual_tok] = 1;
}
}
}
}
}
Update: I've tried increasing the heap size via node train_matrices.js --max-old-space-size=12288 and node train_matrices.js --max_old_space_size=12288 (underline instead of dash), and that didn't work either.
Probably not the main issue in your code, but you can reduce the number of lookups by changing this structure from this:
if (tokens[k] in wordCounts) {
wordCounts[tokens[k]] += 1;
} else {
wordCounts[tokens[k]] = 1;
}
to this:
let token = tokens[k];
let cnt = wordCounts[token] || 0;
wordCounts[token] = cnt + 1;
And, as I said in a comment, I've read that a Map object with .get() and .set() is better suited when there are lots of dynamically created keys whereas plain objects are better suited when you have lots of objects with all the same keys (as the JS compiler can sometimes make a C-like struct for it), but this can't be done when you're regularly adding new keys.
The answer was to both use the increase memory flag node <YOUR_FILE_NAME>.js --max-old-space-size=12288 and change to using a Map instead of an object - thanks to #jfriend00 and #Norman Breau for the suggestions. That said, maps have a max capacity of 2^24 items or 1 GB, so I ended up using a modified version of the BigMap from this stackoverflow (modified to limit the total number of items still - ended up running completely out of RAM).
Modified code (you can replace BigMap with Map if you want):
let posCounts = new BigMap();
let negCounts = new BigMap();
let wordCounts = new BigMap();
let actual_tok;
tokens = // some code
// mark every cooccurrence
for (let k = 0; k < tokens.length; k++) {
// count word occurences
if (wordCounts.has(tokens[k])) {
wordCounts.set(tokens[k], wordCounts.get(tokens[k]) + 1);
} else {
wordCounts.set(tokens[k], 1);
}
for(let tok = k + 1; tok < tokens.length; tok++) {
if (tok == k) {
// avoid word to self cooccurrence
// should no longer be possible
continue;
} else {
// check which form of the cooccurence exists already in either count
actual_tok = (tokens[k] + "-" + tokens[tok]);
if(posCounts.has(actual_tok) || negCounts.has(actual_tok)) {
// no-op
} else {
actual_tok = (tokens[tok] + "-" + tokens[k]);
}
if(condition) {
if (posCounts.has(actual_tok)) {
posCounts.set(actual_tok, posCounts.get(actual_tok) + 1);
} else {
posCounts.set(actual_tok, 1);
}
} else {
if (negCounts.has(actual_tok)) {
negCounts.set(actual_tok, negCounts.get(actual_tok) + 1);
} else {
negCounts.set(actual_tok, 1);
}
}
}
}
}
}
I'm trying to shorten a text, if it is longer than a specified number.
So shorten_text_easy(text, 30), should return "We believe. In the future".
The first loop is working and displaying the correct total length of the strings in the array, but the second is where the error is, but I can't seem to find the issue.
var text = "We believe. In the future. The future is here. This is a test. We are testing.";
function shorten_text_easy(text, number) {
var text_array = text.split('. ').join('.///').split('///'); // Splitting the text into an array
var text_array_length = text_array.length;
var total_text_array_length = 0; // Predefining value to zero; The total length of all strings in the array
for (var i = 0; i < text_array_length; i++) { // To run while i is short than the length of the array
total_text_array_length += text_array[i].length; //
}
total_text_array_length = total_text_array_length + text_array_length - 1; // To exclude spaces which are omitted in array
console.log(total_text_array_length); // To show in console the first value of total_array_length
for (total_text_array_length; total_text_array_length > number; text_array.pop(-1), text_array_length--) { // Trying to remove the last item of an array if the total length of all strings in the array is larger than the number
for (var i = 0; i < text_array_length; i++) {
total_text_array_length += text_array[i].length;
console.log(total_text_array_length);
}
total_text_array_length = total_text_array_length + text_array_length - 1;
}
return text_array // This should return the end text array, when the total length of all strings is lower than 'number'
};
console.log(
shorten_text_easy(text, 30)
);
Slicing on last full stop that fits
const shorten_text_easy = (text, number) => {
let res = text.slice(0,number);
const pos = res.lastIndexOf(".");
return pos > 0 ? res.slice(0,pos+1) : res;
}
var text = "We believe. In the future. The future is here. This is a test. We are testing.";
console.log(
shorten_text_easy(text, 30)
);
I ended up doing this:
function shortenTextNextDot (text, length) {
var firstDot = text.indexOf(".") + 1;
if (length <= firstDot) {
return text.slice(0, firstDot);
} else {
var textLength = text.slice(0, length);
var restText = text.slice(length);
var nextDot = restText.indexOf(".") + 1;
var finalSentence = textLength + restText.slice(0, nextDot);
return finalSentence;
}
}
function shortenTextPrevDot (text, length) {
var firstDot = text.indexOf(".") + 1;
if (length <= firstDot) {
return text.slice(0, firstDot);
} else {
var textLength = text.slice(0, length);
var lastDot = textLength.lastIndexOf(".") + 1;
var finalSentence = textLength.slice(0, lastDot);
return finalSentence;
}
}
I'm trying to build a collaborative doc editor and implement operational transformation. Imagine we have a string that is manipulated simultaneously by 2 users. They can only add characters, not remove them. We want to incorporate both of their changes.
The original string is: catspider
The first user does this: cat<span id>spider</span>
The second user does this: c<span id>atspi</span>der
I'm trying to write a function that will produce: c<span id>at<span id>spi</span>der</span>
The function I've written is close, but it produces c<span id>at<span i</span>d>spider</span> codepen here
String.prototype.splice = function(start, newSubStr) {
return this.slice(0, start) + newSubStr + this.slice(start);
};
function merge(saved, working, requested) {
if (!saved || !working || !requested) {
return false;
}
var diffSavedWorking = createDiff(working, saved);
var diffRequestedWorking = createDiff(working, requested);
var newStr = working;
for (var i = 0; i < Math.max(diffRequestedWorking.length, diffSavedWorking.length); i++) {
//splice does an insert `before` -- we will assume that the saved document characters
//should always appear before the requested document characters in this merger operation
//so we first insert requested and then saved, which means that the final string will have the
//original characters first.
if (diffRequestedWorking[i]) {
newStr = newStr.splice(i, diffRequestedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffRequestedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
if (diffSavedWorking[i]) {
newStr = newStr.splice(i, diffSavedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffSavedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
}
return newStr;
}
//arr1 should be the shorter array.
//returns inserted characters at their
//insertion index.
function createDiff(arr1, arr2) {
var diff = [];
var j = 0;
for (var i = 0; i < arr1.length; i++) {
diff[i] = "";
while (arr2[j] !== arr1[i]) {
diff[i] += arr2[j];
j++;
}
j++;
}
var remainder = arr2.substr(j);
if (remainder) diff[i] = remainder;
return diff;
}
function insertNatX(arr, length, pos) {
for (var j = 0; j < length; j++) {
arr.splice(pos, 0, "");
}
}
var saved = 'cat<span id>spider</span>';
var working = 'catspider';
var requested = 'c<span id>atspi</span>der';
console.log(merge(saved, working, requested));
Would appreciate any thoughts on a better / simpler way to achieve this.
I need to organize an array of strings of random length into the least number of new strings with a max size. Is there a function or something in javascript, or something that can be translated to javascript, that will do this?
For example, the new strings might have max lengths of 1000 characters. The array might have strings of lengths 100, 48, 29, etc. I would want to combine those strings into as few new strings as possible.
edit: Sorry if this doesn't make sense, I tried my best.
No standard method in Javascript, but plenty of theoretical work has been done on this (i.e. the bin packing problem).
http://en.wikipedia.org/wiki/Bin_packing_problem
Some sample pseudo code in the link - should be trivial to translate to javascript.
The algorithm shown isn't going to be optimal in every case. To find the optimal solution to your example you'll just need to iterate over every possibility which might not be that bad depending on how many strings you have.
For my own entertainment, I wrote a simple bin packing algorithm. I picked a simple algorithm which is to sort the input strings by length. Create a new bin. Put the first (longest remaining) string into the bin and then keep filling it up with the longest strings that will fit until no more strings will fit. Create a new bin, repeat. To test it, I allocate an array of strings of random lengths and use that as input. You can see the output visually here: http://jsfiddle.net/jfriend00/FqPKe/.
Running it a bunch of times, it gets a fill percentage of between 91-98%, usually around 96%. Obviously the fill percentage is higher if there are more short strings to fill with.
Here's the code:
function generateRandomLengthStringArrays(num, maxLen) {
var sourceChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY1234567890";
var sourceIndex = 0;
var result = [];
var len, temp, fill;
function getNextSourceChar() {
var ch = sourceChars.charAt(sourceIndex++);
if (sourceIndex >= sourceChars.length) {
sourceIndex = 0;
}
return(ch);
}
for (var i = 0; i < num; i++) {
len = Math.floor(Math.random() * maxLen);
temp = new String();
fill = getNextSourceChar();
// create string
for (var j = 0; j < len; j++) {
temp += fill;
}
result.push(temp);
}
return(result);
}
function packIntoFewestBins(input, maxLen) {
// we assume that none of the strings in input are longer than maxLen (they wouldn't fit in any bin)
var result = [];
// algorithm here is to put the longest string into a bin and
// then find the next longest one that will fit into that bin with it
// repeat until nothing more fits in the bin, put next longest into a new bin
// rinse, lather, repeat
var bin, i, tryAgain, binLen;
// sort the input strings by length (longest first)
input.sort(function(a, b) {return(b.length - a.length)});
while (input.length > 0) {
bin = new String(); // create new bin
bin += input.shift(); // put first one in (longest we have left) and remove it
tryAgain = true;
while (bin.length < maxLen && tryAgain) {
tryAgain = false; // if we don't find any more that fit, we'll stop after this iteration
binLen = bin.length; // save locally for speed/convenience
// find longest string left that will fit in the bin
for (i = 0; i < input.length; i++) {
if (input[i].length + binLen <= maxLen) {
bin += input[i];
input.splice(i, 1); // remove this item from the array
tryAgain = true; // try one more time
break; // break out of for loop
}
}
}
result.push(bin);
}
return(result);
}
var binLength = 60;
var numStrings = 100;
var list = generateRandomLengthStringArrays(numStrings, binLength);
var result = packIntoFewestBins(list, binLength);
var capacity = result.length * binLength;
var fillage = 0;
for (var i = 0; i < result.length; i++) {
fillage += result[i].length;
$("#result").append(result[i] + "<br>")
}
$("#summary").html(
"Fill percentage: " + ((fillage/capacity) * 100).toFixed(1) + "%<br>" +
"Number of Input Strings: " + numStrings + "<br>" +
"Number of Output Bins: " + result.length + "<br>" +
"Bin Legnth: " + binLength + "<br>"
);
I want to create an array of random/pseudo-random numbers using a seed. I want the very same array to be created when the same seed is used and I want to have little or no visible pattern in the array. I'm working in JavaScript.
This is the random function I'm using, which I'm quite happy with (sorry, I forgot who the original author is):
function random(seed) {
if (!seed) seed = new Date().getTime();
seed = (seed*9301+49297) % 233280;
return seed/(233280.0);
}
This is the array generation:
var superSeed = random();
var nRandom = 100;
var randomArray = new Array();
for (var i = 0 ; i < nRandom ; i++){
randomArray.push(random((superSeed*10)+ (i)));
}
Somehow the pattern seems to be quite similar, no matter how often I run it. This question seems to be similar, but since it's about matrixes, I don't understand what's being said.
Thanks!
Having worked on similar things before I think we can use a fairly simple series, which takes two initial values and then you can get a lot more.
var a1,b1;
function InitSequence(v1, v2) {
a1 = Math.pow(v1, 5) / Math.pow(v1, 3);
b1 = Math.pow(v2, 8);
lastrand = (a1 + b1) & 0x7fffffff;
}
function SequenceNext() {
var alast = a1;
var nextVal = (a1 + b1) & 0x7fffffff;
b1 = alast;
a1 = nextVal;
return nextVal;
}
Then use it like this:
InitSequence(75, 21);
for (var i = 0; i < 99; i++) {
v = SequenceNext();
}
I tested it like this:
var used = new Array();
InitSequence(75, 21); // any two values will do.
// fill 10k into array.
for (var i = 0; i < 9999; i++) {
v = SequenceNext();
if (undefined != used[v]) {
used[v]++;
} else used[v] = 1;
//document.write(i+": "+v+"<br>");
}
// see if there any duplicates.
var tdup = 0;
for (xx in used) {
ndup = used[xx];
if (ndup > 1) {
document.write("duplicated " + xx + " :" + ndup + "<br>");
tdup += ndup;
}
}
document.write("Total dups " + tdup + "<br>");
This is using the Fibonacci series, which in mathematical terms, the sequence Fn of Fibonacci numbers is defined by the recurrence relation
. I'm starting with different values - (v1^5) / (v1^3) and v2 ^ 8; otherwise it would only ever be identical.
I like the "Super 7" PRNG. It is simple, fast (although the other answer with the fib. sequence is fast as well), and has the interesting property:
In the entire range -- albeit of a meager of 32k -- there are no duplicates using the "Super 7" PRNG.
Multiple 7's can be joined to increase the number of bits and/or provide multiple seeds. This non-duplication property can exposed or folded.
(The sequence of a PRNG is always the same given a starting seed: it's the distribution and cycle lengths that are interesting -- it is these properties that may make them ideal in different cases where "true randomness" isn't desired).
Happy coding.
Maybe you should try this
function s_random() {
s_random.m = 71402523; s_random.a = 409647; s_random.c = 1508892;
s_random.seed = (s_random.seed*s_random.a + s_random.c) % s_random.m;
return s_random.seed / s_random.m;
}
/*
generate IV
s_random.seed = Math.floor((new Date).getTime()/10000);
*/
s_random.seed = 130324232;
var CheckRandom = 4999999;
var PrintSamples = 100;
var used = new Array();
for (var i = 0; i < CheckRandom; i++) {
v = (Math.ceil(Math.sqrt(s_random())* 1000000000) * 8);
if (undefined != used[v]) {
used[v]++;
} else used[v] = 1;
if ( i< PrintSamples) document.write(i+": "+v+"");
}
/* see if there are any duplicates. */
var tdup = 0;
for (xx in used) {
ndup = used[xx];
if (ndup > 1) {
if (ndup < PrintSamples) document.write("duplicated " + xx + " :" + ndup + "");
tdup += ndup;
}
}
document.write("Total generated " + CheckRandom + "");
document.write("Total duplicates " + tdup + "");
Just got 5 million seeded, repeatable random numbers and no duplicates. Tested several times on Mac OS X with Safari.
Cheers,
Karl