Data structure for ngrams - javascript

I have built an ngram model implementation in Javascript, which works fine. However, I am looking to change my data structure so that I do not have to iterate through all the history each time a new word/character is observed.
Here, I take a seedtext and use it to build ngrams with an order 2.
var ngrams = {};
var order = 2;
var seedtext = "adadwsdawdsadawdsadadasdwdadaaasdsadsdadwdasdasd";
build();
function build(){
for (var i = 0; i < seedtext.length - order; i++) {
var gram = seedtext.substring(i, i + order);
var next = seedtext.charAt(i + order);
if (!ngrams.hasOwnProperty(gram)) {
ngrams[gram] = [];
}
ngrams[gram].push(next);
}
}
console.log(ngrams);
console.log(ngrams["wd"]);
I am looking to have a data structure that holds a record of each observed pattern (for a given order. Each observed pattern should have a next possible observation and its count.
For example, if you run the below code, an output such as this can be seen:
[object Object] {
aa: ["a", "s"],
ad: ["a", "w", "a", "a", "a", "a", "s", "w"],
as: ["d", "d", "d", "d"],
aw: ["d", "d"],
da: ["d", "w", "w", "d", "s", "d", "a", "d", "s", "s"],
ds: ["a", "a", "a", "d"],
dw: ["s", "d", "d"],
sa: ["d", "d", "d"],
sd: ["a", "w", "s", "a", "a"],
wd: ["s", "s", "a", "a"],
ws: ["d"]
}
["s", "s", "a", "a"]
Now, if we take "ad" for example: ngrams["ad"], we get back ["a", "w", "a", "a", "a", "a", "s", "w"].
Clearly, after ad we can either get a w,a or s.
I'd like to group the letters so that ngrams["ad"] returns something like:
{a: 5
w: 2
s :1}
Note that they are in order so that the most frequently occurring letter is at the top, with its count.
I'd like to be able to access the data like so (for example):
ngrams["ad"].a;
ngrams["ad"].w;
ngrams["ad"].s;
and get back 5 for a, 2 for w and 1 for s.
I also want to be able to increment the values as a previously seen pattern is observed again... I also want to be able to remove patterns.
Any ideas?

Here is a working version. Instead of an array, you add another object to store counts of next characters in it.
var ngrams = {};
var order = 2;
var seedtext = "adadwsdawdsadawdsadadasdwdadaaasdsadsdadwdasdasd";
build();
function build(){
for (var i = 0; i < seedtext.length - order; i++) {
var gram = seedtext.substring(i, i + order);
var next = seedtext.charAt(i + order);
if (!ngrams.hasOwnProperty(gram)) {
ngrams[gram] = {};
}
if (!ngrams[gram].hasOwnProperty(next)) {
ngrams[gram][next] = 0;
}
ngrams[gram][next] += 1;
}
}
console.log(ngrams);
console.log(ngrams["wd"]);

Related

How can I move items from an array from one position to another? [duplicate]

This question already has answers here:
How to merge two arrays in JavaScript and de-duplicate items
(89 answers)
Closed last month.
const elementeSelected = ["a", "b"];
const allElements = ["m", "s", "e", "r", "q", "d", "a", "b", "c"];
// result allElements = ["a", "b", "m", "s", "e", "r", "q", "d", "c"];
https://codesandbox.io/s/distracted-lederberg-yji13f?file=/src/index.js:0-175
I tried to reorder the items form allElement. I want the items from elementeSelected to be move from their position in allElements to the first position.
You need to concat the arrays together and remove the ones that were already used. You can do that with spread syntax and with filter. The filter uses includes to check if it is duplicated.
const elementeSelected = ["a", "b"];
const allElements = ["m", "s", "e", "r", "q", "d", "a", "b", "c"];
const result = [...elementeSelected, ...allElements.filter(n => !elementeSelected.includes(n))];
console.log(result);
var allelements = ["m", "s", "e", "r", "q", "d", "a", "b", "c"];
var x= 6;
var pos=0;
var temp=allelements[x];
var i;
for (i=x; i>=pos; i--)
{
allelements[i]=allelements[i-1];
}
allelements[pos]=temp;
var y= 7;
var pos2=1
<!-- begin snippet: js hide: false console: true babel: false -->
var temp2=allelements[y];
var i;
for (i=y; i>=pos2; i--)
{
allelements[i]=allelements[i-1];
}
alllements[pos2]=temp2;
console.log(allelements);

How to push to an array given that the character is not already present in the array in javascript

I'm trying to push to an initially empty array with the condition that the content at two different indexes of two different non-empty arrays don't have the same character and the initially empty array has not already pushed that character earlier
I've tried using the not operator, contains, includes, but nothing seems to work.
var pushToArray = [];
for (var i = 0; i < 5; i++) {
var characters = ["M", "U", "S", "I", "C"];
var moreCharacters = ["F", "R", "I", "E", "N", "D", "L", "Y"];
randomIndex = Math.floor(Math.random() * moreCharacters.length);
// && push to 'pushToArray' if the character is not in 'pushToArray'
if (characters[i] != moreCharacters[randomIndex] && !pushToArray.includes(moreCharacters[randomIndex])) {
pushToArray.push(moreCharacters[randomIndex]);
}
if(arrayContent1("I") == arrayContent2("I")) then don't push
Sample expected results for pushToArray:
["F", "R", "E", "D", "L"]
Sample actual results for pushToArray:
["I", "F", "R", "D", "Y"] I don't want that letter 'I' in there
The test
if (characters[i] != moreCharacters[randomIndex]
will fail only if characters[i] is the picked character - it sounds like you want to make sure that none of the characters match the picked character:
if (!characters.includes(moreCharacters[randomIndex])
If you're only conditionally pushing to the array, then change the for loop to
while (pushToArray.length < 5) {
var pushToArray = [];
while (pushToArray.length < 5) {
var characters = ["M", "U", "S", "I", "C"];
var moreCharacters = ["F", "R", "I", "E", "N", "D", "L", "Y"];
randomIndex = Math.floor(Math.random() * moreCharacters.length);
// && push to 'pushToArray' if the character is not already in there
if (!characters.includes(moreCharacters[randomIndex]) && !pushToArray.includes(moreCharacters[randomIndex])) {
pushToArray.push(moreCharacters[randomIndex]);
}
}
console.log(pushToArray);
But, the logic would be easier to follow if you filtered the characters out of moreCharacters beforehand:
const excludeChars = ["M", "U", "S", "I", "C"];
const inputChars = ["F", "R", "I", "E", "N", "D", "L", "Y"]
.filter(char => !excludeChars.includes(char));
const result = [];
for (let i = 0; i < 6; i++) {
const randIndex = Math.floor(Math.random() * inputChars.length);
const [char] = inputChars.splice(randIndex, 1);
result.push(char);
}
console.log(result);
This code does this:
I would like array3 to only have letters from array2 that are not in array1. I basically don't want any letters that exist in array1
var characters = ["M", "U", "S", "I", "C"];
var moreCharacters = ["F", "R", "I", "E", "N", "D", "L", "Y"];
var pushToArray = [];
var i, l = characters.length;
for (i = 0; i < l; i++) {
randomIndex = Math.floor(Math.random() * moreCharacters.length);
// && push to 'pushToArray' if the character is not in 'pushToArray'
if (!characters.includes(moreCharacters[randomIndex]) && !pushToArray.includes(moreCharacters[randomIndex])) {
pushToArray.push(moreCharacters[randomIndex]);
}
}
console.log(pushToArray);
This can be simply achieved by doing:
var characters = ["M", "U", "S", "I", "C"];
var moreCharacters = ["F", "R", "I", "E", "N", "D", "L", "Y"];
var pushedToArray = [...characters].filter(x => moreCharacters.indexOf(x) === -1);
var finalArray = pushedToArray.filter((item, index) => pushedToArray.indexOf(item) === index);

How to iterate a list reversely and stop with a condition with lodash?

I have a list with some items for example
["a", "b", "c", ..., "x", "y", "z"]
I would like to iterate it but from the end to beggining and push those items into a new variable, and stop when it has length == 3.
For that simple example I would like to have as result within my new var:
["z", "y", "x"]
I'm thinking of .reverse() my array and then iterate it with .each and push my items, but I believe there is a better way to do that with lodash, that I'm not finding.
Maybe I'm not knowing how to search.
Thanks in advance.
You can do it with the function "_.takeRightWhile" from lodash like the code below:
var arr = ["a", "b", "c", "x", "y", "z"];
var reverseArray = [];
_.takeRightWhile(arr, function(item){
reverseArray.push(item)
return reverseArray.length < 3
});
console.log(reverseArray);
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.4/lodash.min.js"></script>
In plain Javascript you could use Array#slice with a negative count for getting a new array from the end and use Array#reverse for a reversed array.
var array = ["a", "b", "c", "x", "y", "z"],
result = array.slice(-3).reverse();
console.log(result);
For processing items, you could use Array#reduceRight.
var array = ["a", "b", "c", "x", "y", "z"],
result = array.slice(-3).reduceRight((r, a) => r.concat(a), []);
console.log(result);
Another solution that iterates the original array:
var arr = ["a", "b", "c", "x", "y", "z"], res=[], count=3;
if (count <= arr.length)
for (var i=0; i<count; i++) res.push(arr[arr.length-1-i]);
console.log(res);

How to check if Strings are in array?

example:
var arr = ["a", "b", "c", "d", "e", "f"];
how to check if "a", "b" and "c" are in array?
i tried indexOf() but i cant check if more than 1 strings are in array...
You are use Array.protoype.every and Array.prototype.indexOf, like this
["a", "b", "c"].every(function(currentItem) {
return arr.indexOf(currentItem) !== -1;
});
This will return true, only if all the elements in ["a", "b", "c"] are present in arr.
try like this:
var arr = ["a", "b", "c", "d", "e", "f"];
var arr1=['a','b','c'];
for(i=0;i<arr1.length;i++){
var a1 = arr.indexOf(arr1[i]);
console.log(a1);
}
or
var a = arr.indexOf("a");
console.log(a);//0
var b = arr.indexOf("b");
console.log(b);//1
var c = arr.indexOf("c");
console.log(c);//2

How doI reduce an array to a specific amount in javascript/jquery/undescore?

I have an array like:
["a", "b", "c", "d", "e"]
Now I want to just have the first 3 items. How would I remove the last two dynamically so that I could also have a 20 letter array, but reduce that down to the first 3 as well.
var a = ["a", "b", "c", "d", "e"];
a.slice(0, 3); // ["a", "b", "c"]
var b = ["a", "b", "c", "d", "e", "f", "g", "h", "i"];
b.slice(0, 3); // ["a", "b", "c"]
How about Array.slice?
var firstThree = myArray.slice(0, 3);
The splice function seems to be what you're after. You could do something like:
myArray.splice(3);
This will remove all items after the third one.
To extract the first three values, use slice:
var my_arr = ["a", "b", "c", "d", "e"];
var new_arr = my_arr.slice(0, 3); // ["a", "b", "c"]
To remove the last values, use splice:
var removed = my_arr.splice(3, my_arr.length-3); // second parameter not required
// my_arr == ["a", "b", "c"]
In underscore.js we can use the first function
_.first(array, [n]) Alias: head
Returns the first element of an array. Passing n will return the first n elements of the array.
_.first(["a", "b", "c", "d", "e"],3);
=> ["a", "b", "c"]

Categories

Resources