I am doing a modified version of collecting word co-occurrences, so I wrote my own javascript, and I am tracking the occurrences in three objects. However, once the objects get large (~8 million, 3 million, and 172000) a function that took 5 seconds per 100000 sentences now takes minutes to do one sentence with 30 words (30 tokens). I am nowhere near my RAM cap (I have 12 more GBs of RAM it could be using, and the program is only using 2.2GB). Using Node.js v17.3.1.
Why does my function take so long when the objects get bigger (even though the sentences remain the same length)? Should I be using a different object besides Javascript's default object, or is there a way improve the speed of access and setting these objects when they are so big?
Code:
let posCounts = {};
let negCounts = {};
// the number of times each word occurs
let wordCounts = {};
let tokens = // some function that gets tokens;
for (let k = 0; k < tokens.length; k++) {
// count word occurences
if (tokens[k] in wordCounts) {
wordCounts[tokens[k]] += 1;
} else {
wordCounts[tokens[k]] = 1;
}
for(let tok = k + 1; tok < tokens.length; tok++) {
if (tok == k) {
// avoid word to self cooccurrence
// should no longer be possible
continue;
} else {
// check which form of the cooccurence exists already in either count
actual_tok = (tokens[k] + "-" + tokens[tok]);
if(actual_tok in posCounts || actual_tok in negCounts) {
// no-op
} else {
actual_tok = (tokens[tok] + "-" + tokens[k]);
}
// condition set before this block of code
if(condition) {
if (actual_tok in posCounts) {
posCounts[actual_tok] += 1;
} else {
posCounts[actual_tok] = 1;
}
} else {
if (actual_tok in negCounts) {
negCounts[actual_tok] += 1;
} else {
negCounts[actual_tok] = 1;
}
}
}
}
}
Update: I've tried increasing the heap size via node train_matrices.js --max-old-space-size=12288 and node train_matrices.js --max_old_space_size=12288 (underline instead of dash), and that didn't work either.
Probably not the main issue in your code, but you can reduce the number of lookups by changing this structure from this:
if (tokens[k] in wordCounts) {
wordCounts[tokens[k]] += 1;
} else {
wordCounts[tokens[k]] = 1;
}
to this:
let token = tokens[k];
let cnt = wordCounts[token] || 0;
wordCounts[token] = cnt + 1;
And, as I said in a comment, I've read that a Map object with .get() and .set() is better suited when there are lots of dynamically created keys whereas plain objects are better suited when you have lots of objects with all the same keys (as the JS compiler can sometimes make a C-like struct for it), but this can't be done when you're regularly adding new keys.
The answer was to both use the increase memory flag node <YOUR_FILE_NAME>.js --max-old-space-size=12288 and change to using a Map instead of an object - thanks to #jfriend00 and #Norman Breau for the suggestions. That said, maps have a max capacity of 2^24 items or 1 GB, so I ended up using a modified version of the BigMap from this stackoverflow (modified to limit the total number of items still - ended up running completely out of RAM).
Modified code (you can replace BigMap with Map if you want):
let posCounts = new BigMap();
let negCounts = new BigMap();
let wordCounts = new BigMap();
let actual_tok;
tokens = // some code
// mark every cooccurrence
for (let k = 0; k < tokens.length; k++) {
// count word occurences
if (wordCounts.has(tokens[k])) {
wordCounts.set(tokens[k], wordCounts.get(tokens[k]) + 1);
} else {
wordCounts.set(tokens[k], 1);
}
for(let tok = k + 1; tok < tokens.length; tok++) {
if (tok == k) {
// avoid word to self cooccurrence
// should no longer be possible
continue;
} else {
// check which form of the cooccurence exists already in either count
actual_tok = (tokens[k] + "-" + tokens[tok]);
if(posCounts.has(actual_tok) || negCounts.has(actual_tok)) {
// no-op
} else {
actual_tok = (tokens[tok] + "-" + tokens[k]);
}
if(condition) {
if (posCounts.has(actual_tok)) {
posCounts.set(actual_tok, posCounts.get(actual_tok) + 1);
} else {
posCounts.set(actual_tok, 1);
}
} else {
if (negCounts.has(actual_tok)) {
negCounts.set(actual_tok, negCounts.get(actual_tok) + 1);
} else {
negCounts.set(actual_tok, 1);
}
}
}
}
}
}
I have to create a function that checks if a word is palindrome or not. My reasoning is to break down the word letter by letter, normal and reverse, and then compare the two results to determine if a word is a palindrome or not. With an if - else I give the user an alert that tells perfectly the result. Now, I've learned that most of the functions must have a return with a variable that contains that desired result.
In this case I really can't have this, I think it can work perfectly this way. I tried with
var palindromeResult = (leftToRightWord == rightToLeftWord) ? 'true':'false';
console.log(palindromeResult);
return palindromeResult;
but it works only for the developer if he reads the console.log in the console, but it's a bad solution for me... Have you got any better idea than this? Below my full function code
function isPalindrome(wordToCheck) {
for(var i = 0; i < wordToCheck.length; i++) {
var leftToRightWord = wordToCheck[i];
console.log('Left ' + leftToRightWord);
}
for(var j = wordToCheck.length - 1; j >= 0; j--) {
var rightToLeftWord = wordToCheck[j];
console.log('Right ' + rightToLeftWord);
}
if ( leftToRightWord === rightToLeftWord) {
alert('La parola è palindroma');
} else {
alert('La parola non è palindroma');
}
Edit: At the end I changed a lot my code for a better legibility.
//Data
var userWord;
//I ask a word to the user
do {
userWord = prompt("Dimmi una parola");
} while (userWord.length === 0)
//Here the result of the function is saved and it can be reused
var functionResult = isPalindrome(userWord);
console.log (functionResult);
//Function to know if the word inserted is a palindrome or not
//the cycle with rightToLeft reverse the word so it can be compared to the normal word
//A pop-up will give the solution so it can be seen clearly on your screen
//The function result will ben saved outside the function in var functionResult so it can be seen with a console.log or reused for whatever use
function isPalindrome(wordToCheck) {
var rightToLeftWord = '';
for(var j = wordToCheck.length - 1; j >= 0; j--) {
rightToLeftWord = rightToLeftWord + wordToCheck[j];
}
console.log(rightToLeftWord);
var palindromeResult = wordToCheck == rightToLeftWord;
alert(palindromeResult);
return palindromeResult;
}
Now the cycle with for doesn't have problems anymore as you've pointed out to me and it correctly recognize if a word is a palindrome or not.
See the snippet. The function returns the answer and then you can alert it, assign it to a variable, echo it on the page and so on. I'm not pretty sure about your algorithm since it is telling me that 'abracadabra' is palindrome but it is not. RTL the sequence of the letters is wrong!
function isPalindrome(wordToCheck) {
for (var i = 0; i < wordToCheck.length; i++) {
var leftToRightWord = wordToCheck[i];
console.log('Left ' + leftToRightWord);
}
for (var j = wordToCheck.length - 1; j >= 0; j--) {
var rightToLeftWord = wordToCheck[j];
console.log('Right ' + rightToLeftWord);
}
if (leftToRightWord === rightToLeftWord) {
return 'La parola è palindroma';
} else {
return 'La parola non è palindroma';
}
}
alert(isPalindrome('abracadabra'));
I have a complex JSON file that needs parsing and my loop skills (or more precisely, the lackthereof), are really failing me.
I have the following xml file, and I am trying to get all elements on one row. In my perfect world (in no particular order)...
sku #, length, width, image, description, attribute value 1, attribute value 2, attribute value 3, etc.
The JSON file is as follows:
var json = {
"product":[
{
"shipdata":{
"_length":"2in",
"_width":"2in",
},
"sku":"90245",
"brand":"Brandy",
"image":"shirt.jpg",
"description":"description",
"attributes":{
"attribute":[
{
"_name":"Color",
"_value":"Black",
},
{
"_name":"Gender",
"_value":"Mens",
},
{
"_name":"Size",
"_value":"L",
},...
So, my intended result is:
90245, Brandy, Black, Men's, L, shirt.jpg, 2in, 2in
But when I loop like the following, I only get the first result for "name". Admittedly, I'm a newb, but if anyone can push me in the right direction or show a proof of concept, it would be so so appreciated. Thanks in advance / feel horrible to even ask such a low level question.
for(var l = 0; l < json.product[i].attributes.attribute.length; l++) {
var xxx = (json.product[i].attributes.attribute[l]['_name']);
}
$('body').append(xxx);
if you don't mind using lodash, this should help you:
var res=[];
_.each(json.product, function(p) {
res.push(p.brand);
res.push(p.sku);
_.each(p.attributes.attribute, function(at) {
res.push(at._value);
});
});
console.log(res.join(','));
//Brandy,90245,Black,Mens,L
working fiddle
EDIT: My solution is obviously not as good as scottjustin5000 's. I'm trying to explain the detailed steps on analyzing this problem.
You want to output a string from the JSON data. So we should break the parts of the string and process one by one.
90245, Brandy, Black, Men's, L, shirt.jpg, 2in, 2in
"sku", "brand", attribute, attribute, attribute, "image", "_length", "_width"
Let's start.
function parseJSONToLine(product) {
var line = "";
line = line + product["sku"] + ", ";
line = line + product["brand"] + ", ";
line += getAllAttributes(product);
line = line + product["image"] + ", ";
line = line + product["shipdata"]["_length"] + ", ";
line = line + product["shipdata"]["_width"];
return line;
}
products = json["product"];
for (var i = 0; i < products.length; i++) {
console.log(parseJSONToLine(products[i]));
}
This part is just assembling the line your want part by part. For the attributes, we need another loop:
function getAllAttributes(product) {
var attrStr = "";
var attrsDict = {};
var attrsOrder = ["Color", "Gender", "Size"];
var attrList = product["attributes"]["attribute"];
// loop through every attribute and put it in dictionary
for (var i = 0; i < attrList.length; i++) {
attrsDict[attrList[i]["_name"]] = attrList[i]["_value"];
}
for (var i = 0; i < attrsOrder.length; i++) {
attrStr = attrStr + attrsDict[attrsOrder[i]] + ", ";
}
return attrStr;
}
The last part is to put the line produced into your HTML. Just the $(body') line with:
$('body').append('<p>' + line + '</p>');
That's it. The point to solve this problem is to know what the line is consisted of. Then try to get the values in the JSON object one by one. When meeting something seems to be complicated, just try to write out the code and modify according to the output. console.log() is very helpful on this.
The reason of why your code doesn't work is, your JSON data contains not only arrays but also objects. You have to take them apart.
If you need further explanation on the snippet, comment me.
JSFiddle: https://jsfiddle.net/aresowj/g9wuLg28/
According to your JSON structure and the output you want, I'll suggest to do the following:
var output = Array(json.product.length); // will be an array of string
for(var i = 0; i < json.product.length; i++) {// loop on each product
output[i] = json.product[i].sku +', '+json.product[i].brand; // according to your question, seems that you want these 2 things first
for(var j = 0; j < json.product[i].attributes. attribute.length; j++){ // then we loops on the attributes
output[i] += ', ' +json.product[i].attributes. attribute[j]._name;
}
output[i] += ', ' +json.product[i].shipdata._length + ', ' + json.product[i].shipdata._width; // last we append to the string the with and height data
}
$('body').append(output)
var json = {
"product":[
{
"shipdata":{
"_length":"2in",
"_width":"2in",
},
"sku":"90245",
"brand":"Brandy",
"image":"shirt.jpg",
"description":"description",
"attributes":{
"attribute":[
{
"_name":"Color",
"_value":"Black",
},
{
"_name":"Gender",
"_value":"Mens",
},
{
"_name":"Size",
"_value":"L",
}
]
}
}
]
};
var output = Array(json.product.length); // will be an array of string
for(var i = 0; i < json.product.length; i++) {// loop on each product
output[i] = json.product[i].sku +', '+json.product[i].brand; // according to your question, seems that you want these 2 things first
for(var j = 0; j < json.product[i].attributes. attribute.length; j++){ // then we loops on the attributes
output[i] += ', ' +json.product[i].attributes. attribute[j]._name;
}
output[i] += ', ' +json.product[i].shipdata._length + ', ' + json.product[i].shipdata._width; // last we append to the string the with and height data
}
$('body').append(output)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
A solution using Array.map:
var res = json.product.map(function (p) {
return [p.sku, p.brand].concat(p.attributes.attribute.map(function (at) {
return at._value;
}))
});
res.forEach(function (r) { console.log(r.join(', ')) });
https://jsfiddle.net/xve4agp6/1/
Im currently learning about break and continue statements. It prints the 1st array, the 2nd array runs the alert like it suppose to, but the third one doesn't run, when i use the continue statement. Maybe im not doing it right? some guidance for a newbie would be nice.
Im using JSBin to run this.
p.s. im learning from the "Begining Javascript" book
Thanks
var n = [233, "john", 432];
var nIndex;
for (nIndex in n) {
if (isNaN(n[nIndex])) {
alert(n[nIndex] + " is not a number");
continue;
}
document.write(n[nIndex] + " ");
}
Continue does not work in :
for(i in array) {}
it works for for(i=0; i<n; i++){}
This is how you iterate over the elements of an array:
var data = [233, "john", 432];
for (var i = 0; i < data.length; ++i) {
if (isNaN(data[i])) {
alert(data[i] + " is not a number");
continue;
}
document.write(data[i] + " ");
}
By the way, you can remove the continue statement and instead use else on the alternate instructions:
var data = [233, "john", 432];
for (var i = 0; i < data.length; ++i) {
if (isNaN(data[i])) {
alert(data[i] + " is not a number");
} else {
document.write(data[i] + " ");
}
}
That's logically equivalent and you may find it easier to read.
I am a beginner and I've found some useful examples for what I want to do. The problem is the examples that I've found don't have enough comments for me to understand what is going on. So, I hope someone can help me implement the code I've found into the code that I already have. I'm making a text manipulation area to use to play with cipher text. It's all being done inside a single HTML text area. I've got a functions called, "function G_Group(size, count)", that breaks the text into rows and columns of choice and it is working great. The next tool that I want to add will transpose this matrix from (x,y) to (y,x). Because I have the "function G-Group" function, I don't believe I need to slice anything. I found a bit of JavaScript transposition code at http://rosettacode.org/wiki/Matrix_transposition#JavaScript but I don't know how to change the values to add it to what I've got already.
Function G_Group(size, count) is called like this.
<input type= button value="Grouping" onclick = "return G_Group(0, 0)" title="grouping" />
And here is the how i break text up into rows and columns:
function G_Group(size, count)
{
if (size <= 0)
{
size = document.encoder.group_size.value;
if (size <= 0)
{
alert('Invalid group size');
return false;
}
}
if (count <= 0)
{
count = document.encoder.group_count.value;
if (count <= 0)
{
alert('Invalid group count');
return false;
}
}
var t = document.encoder.text.value;
var o = '', groups = 0;
t = Tr(t, " \r\n\t");
while (t.length > 0)
{
if (o.length > 0)
{
o += ' ';
}
if (groups >= count)
{
o += "\n";
groups = 0;
}
groups ++;
o += t.slice(0, size);
t = t.slice(size, t.length);
}
document.encoder.text.value = o;
return false;
}
And this is the code that I want modify to transpose the array.
function Matrix(ary) {
this.mtx = ary
this.height = ary.length;
this.width = ary[0].length;
}
Matrix.prototype.toString = function() {
var s = []
for (var i = 0; i < this.mtx.length; i++)
s.push( this.mtx[i].join(",") );
return s.join("\n");
}
// returns a new matrix
Matrix.prototype.transpose = function() {
var transposed = [];
for (var i = 0; i < this.width; i++) {
transposed[i] = [];
for (var j = 0; j < this.height; j++) {
transposed[i][j] = this.mtx[j][i];
}
}
return new Matrix(transposed);
}
I am aware that I may be approaching this all wrong. And I'm aware that the questions I have are very basic, I'm a little embarrassed to ask these simple questions. Please excuse me. I'm 43 years old and had c programming in college 20 years ago. I'm pretty good with HTML and CSS but I'm lacking in a lot of areas. Hope someone can help me with this. Thanks.