Autocomplete using a trie - javascript

I am working on an autocompletion script and was thinking about using a trie. My problem is I want everything that matches to be returned. So for example I type in the letter r I want all entries starting with r to be returned. Then all entries starting with re etc. Is this feasible with a trie and how would it work. Also, if there is a better way I am open to suggestions. The reason I ask is it seems like it would be complicated and a whole lot of processing to return all of the nodes off of say the r branch.
And yes I may be reinventing the wheel, but I would like to learn how it works.

You can absolutely do it using a trie. Here is some code I threw together that can point you in the right direction:
var tokenTree = function (tokenArray) {
var createLetterObject = function (l) {
var pChildren = [];
var getMatchingWords = function (characterArr, availableWords, children) {
if (characterArr.length === 0) {
for (var child in children) {
if ({}.hasOwnProperty.call(children, child)) {
var currentChild = children[child];
var words = currentChild.getWords(characterArr);
for (var pos in words) {
if ({}.hasOwnProperty.call(words, pos)) {
availableWords.push(words[pos]);
}
}
if (currentChild.word) {
availableWords.push(currentChild.word);
}
}
}
} else {
var currentCharacter = characterArr.pop();
getMatchingWords(characterArr, availableWords, children[currentCharacter].children);
}
};
function doGetWords(wordPart) {
var len = wordPart.length;
var ar = [];
var wordList = [];
for (var ii = len - 1; ii >= 0; ii --) {
ar.push(wordPart[ii].toUpperCase());
}
getMatchingWords(ar, wordList, pChildren);
return wordList;
}
return {
letter: l,
children: pChildren,
parent: null,
word: null,
getWords: doGetWords
};
};
var startingPoint = createLetterObject();
function parseWord(wordCharacterArray, parent, fullWord) {
if (wordCharacterArray.length === 0) {
parent.word = fullWord;
return;
}
var currentCharacter = wordCharacterArray.pop().toUpperCase();
if (!parent.children[currentCharacter]) {
parent.children[currentCharacter] = createLetterObject(currentCharacter);
}
parseWord(wordCharacterArray, parent.children[currentCharacter], fullWord);
}
for (var counter in tokenArray) {
if ({}.hasOwnProperty.call(tokenArray, counter)) {
var word = tokenArray[counter];
if (!word) {
continue;
}
var ar = [];
var wordLength = word.length;
for (var ii = wordLength - 1; ii >= 0; ii--) {
ar.push(word[ii]);
}
parseWord(ar, startingPoint, word);
}
}
return startingPoint;
};
Usage
var tokens = ["Token", "words", "whohaa", "mommy", "test", "wicked"];
var tree = tokenTree(tokens);
var currentTokenSet = 'w';
var list = tree.getWords(currentTokenSet);
// it will return words,whohaa,wicked.
console.log(list)
I'm not saying this is anywhere near the best or most efficient way, but it should at least get you pointed in the right direction.
Here is a jsfiddle showing it working: https://jsfiddle.net/es6xp8h9/

Regarding the time to discover items at a root note, if you're doing this for an autocomplete, it's likely you won't be returning too many results per 'match'. If you wanted to trade off space for speed, you could store references to the 'top n' items at each of the nodes. This, of course, would require more time on update

Related

Couchdb searching via multiple queries

I have the following map function to index each specific alphanumeric word inside my document so I can search for it.
Array.prototype.map = function(func) {
var i, r = [];
for(i = 0; i < this.length; i += 1) {
r[i] = func(this[i]);
}
return r;
};
Array.prototype.reduce = function(val, func) {
var i;
for (i = 0; i < this.length; i += 1) {
val = func(val, this[i]);
}
return val;
}
Array.prototype.uniq = function() {
return this.reduce([], function(list, e) {
if (list.indexOf(e) < 0) {
return list.concat([e]);
} else {
return list;
}
});
}
function(doc) {
var tokens;
if (doc.name) {
tokens = doc.name.toLowerCase().split(/[^A-Z0-9\-_]+/i).uniq();
tokens.map(function(token) {
emit(token, doc);
});
}
}
So if I do something like:
db.view('documents/keywords', { key: "hello" });
It will list every document with the word hello in it.
My question is, if I want to find something that has both the word hello and the word world, is there a more efficient way of doing this then doing two queries to my view, and then client side figure out which id's pop up in both queries?
My worry is it will start being troublesome at scale when I have 1,000 hellos and completely different amount of worlds

splitting a string into a multidimensional array

I have a list of strings, I want to check if the string contains a specific word, and if it does split all the words in the string and add it to an associative array.
myString = ['RT #Arsenal: Waiting for the international', 'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish']
wordtoFind = ['#Arsenal']
I want to loop through the wordtoFind and if it is in myString, split up myString into individual words and create an object like
newWord = {#Arsenal:[{RT:1},{Waiting:1},{for:1},{the:1},{international:1}]}
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z].key) > -1){
myString[i].split(" ")
}
}
}
I would say something likes would work, this also counts the amount of occurrences of a word in a sentence. JavaScript does not have associative arrays like PHP for instance. They just have objects or numbered arrays:
var myString = ['RT #Arsenal: Waiting for the international', 'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish'];
var wordtoFind = ['#Arsenal'];
var result = {};
for(var i = 0, l = wordtoFind.length; i < l; i++) {
for(var ii = 0, ll = myString.length; ii < ll; ii++) {
if(myString[ii].indexOf(wordtoFind[i]) !== -1) {
var split = myString[ii].split(' ');
var resultpart = {};
for(var iii = 0, lll = split.length; iii < lll; iii++) {
if(split[iii] !== wordtoFind[i]) {
if(!resultpart.hasOwnProperty(split[iii])) {
resultpart[split[iii]] = 0;
}
resultpart[split[iii]]++;
}
}
result[wordtoFind[i]] = resultpart;
}
}
}
console.log(result);
//{"#Arsenal":{"RT":1,"Waiting":1,"for":1,"the":1,"international":1}}
This method makes use of the forEach-function and callbacks.
The containsWord-function was left with a for-loop for now to reduce some callbacks, this can obviously be changed.
var myString = [
'RT #Arsenal: Waiting for the international',
'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish',
'#Arsenal: one two three four two four three four three four'
];
var wordtoFind = ['#Arsenal'];
// define the preprocessor that is used before the equality check
function preprocessor(word) {
return word.replace(':', '');
}
function findOccurences(array, search, callback, preprocessor) {
var result = {};
var count = 0;
// calculate the maximum iterations
var max = search.length * array.length;
// iterate the search strings that should be matched
search.forEach(function(needle) {
// iterate the array of strings that should be searched in
array.forEach(function(haystack) {
if (containsWord(haystack, needle, preprocessor)) {
var words = haystack.split(' ');
// iterate every word to count the occurences and write them to the result
words.forEach(function(word) {
countOccurence(result, needle, word);
})
}
count++;
// once every iteration finished, call the callback
if (count == max) {
callback && callback(result);
}
});
});
}
function containsWord(haystack, needle, preprocessor) {
var words = haystack.split(' ');
for (var i = 0; i < words.length; i++) {
var word = words[i];
// preprocess a word before it's compared
if (preprocessor) {
word = preprocessor(word);
}
// if it matches return true
if (word === needle) {
return true;
}
}
return false;
}
function countOccurence(result, key, word) {
// add array to object if it doesn't exist yet
if (!result.hasOwnProperty(key)) {
result[key] = [];
}
var entry = result[key];
// set the count to 0 if it doesn't exist yet
if (!entry.hasOwnProperty(word)) {
entry[word] = 0;
}
entry[word]++;
}
// call our function to find the occurences
findOccurences(myString, wordtoFind, function(result) {
// do something with the result
console.log(result);
}, preprocessor);
// output:
/*
{ '#Arsenal':
[ RT: 1,
'#Arsenal:': 2,
Waiting: 1,
for: 1,
the: 1,
international: 1,
one: 1,
two: 2,
three: 3,
four: 4 ] }
*/
Feel free to ask any questions, if the answer needs clarification.
I hope this fits your needs.
You're on the right track. You just need to store the split string into the associative array variable.
var assocArr = [];
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z]) > -1){
myString[i].split(" ").forEach(function(word){
assocArr.push(word);
});
}
}
}
I think the key problem that stuck you is the data structure. The optimal structure should be something like this:
{
#Arsenal:[
{RT:1, Waiting:1, for:1, the:1, international:1},
{xxx:1, yyy:1, zzz:3}, //for there are multiple ones in 'myString' that contain the same '#Arsenal'
{slkj:1, sldjfl:2, lsdkjf:1} //maybe more
]
someOtherWord:[
{},
{},
....
]
}
And the code:
var result = {};
//This function will return an object like {RT:1, Waiting:1, for:1, the:1, international:1}.
function calculateCount(string, key) {
var wordCounts = {};
string.split(" ").forEach(function (word) {
if (word !== key) {
if (wordCounts[word] === undefined) wordCounts[word] = 1;
else wordCounts[word]++;
}
});
return wordCounts;
}
//For each 'word to find' and each string that contain the 'word to find', push in that returned object {RT:1, Waiting:1, for:1, the:1, international:1}.
wordToFind.forEach(function (word) {
var current = result[word] = [];
myString.forEach(function (str) {
if (str.indexOf(word) > -1) {
current.push(
calculateCount(str, word)
);
}
}); //Missed the right parenthesis here
});

Javascript function to convert list of arrays into a tree data structure or a graph

I have a list of arrays of strings :
[FT, LW ,VN ]
[FT ,LW ,NV ]
[FT ,LL ,VN ]
[FT ,LL ,NV ]
[EM ,FT ,LW ]
[EM ,FT ,LL ]
Can someone please help me with a javascript function which processes these arrays and create a tree or graph like structure.
The final output that should look like is :
FT
LW LL
NV,VN,EM VN,NV,EM
I am trying these functions but there seems to be an issue. I get 2 paths to start the tree. FT and EM.
function convertToHierarchy(arry/* array of array of strings */) {
var item, path;
// Discard duplicates and set up parent/child relationships
var children = {};
var hasParent = {};
for (var i = 0; i < arry.length; i++) {
var path = arry[i];
var parent = null;
for (var j = 0; j < path.length; j++) {
var item = path[j];
if (!children[item]) {
children[item] = {};
}
if (parent) {
children[parent][item] = true; /* dummy value */
hasParent[item] = true;
}
parent = item;
}
}
// Now build the hierarchy
var result = [];
for (item in children) {
if (!hasParent[item]) {
result.push(buildNodeRecursive(item, children));
}
}
return result;
}
function buildNodeRecursive(item, children) {
var node = {
id : item,
children : []
};
for ( var child in children[item]) {
node.children.push(buildNodeRecursive(child, children));
}
return node;
}
Will appreciate any help on this.
I guess I can manipulate the list to sort them based on the weight of count. And then use the initial function that I mentioned. Thanks for your help folks.

Best way to group elements in an array with least complexity

I have a JSON array which looks like this:
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9}
{"Type":"Plot","Price":89.8}]
This array contains about 100,000 records. I want the output to be grouped by "Type" and "Price". It should look like this:
var expected_output = [{"Type":"Flat", "Data":[{"Price":100.9, "Total":2},
{"Price":67.5, "Total":1}] },
{"Type":"Room","Data":[{"Price":23.5,"Total":1}]},
{"Type":"Plot","Data":[{"Price":89.8, "Total:1"}]}]
This has to be done in pure javascript and I cannot use libraries like undersore.js. I tried solving the problem but it had like 3 nested for loops which made the complexity as n^4. What could be a better solution for this problem??
The function I have looks like this:
var reduce = function (map_results) {
var results = [];
for (var i in map_results) {
var type_found = 0;
for(var result in results){
if (map_results[i]["Type"] == results[result]["Type"]){
type_found = 1;
var price_found = 0;
for(var data in results[result]["Data"]){
if(map_results[i]["Price"] == results[result]["Data"][data]["Price"]){
price_found = 1;
results[result]["Data"][data]["Total"] +=1;
}
}
if(price_found == 0){
results[result]["Data"].push({"Price":map_results[i]["Price"], "Total":1});
}
}
}
if(type_found == 0){
results.push({"Type":map_results[i]["Type"], "Data":[{"Price":map_results[i]["Price"],"Total":1}]});
}
}
return results;
};
I have a short function that handles the first part of the requested functionality: It maps the map_results to the desired format:
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9},
{"Type":"Plot","Price":89.8}]
var expected_output = map_results.reduce(function(obj, current){
if(!obj[current.Type]){
obj[current.Type] = {'Type':current.Type, 'Data':[]};
}
obj[current.Type].Data.push({'Price':current.Price, 'Total':1});
return obj;
},{})
Then this piece of code is required to calculate the totals, I'm afraid:
for(var type in expected_output){
var d = {};
for(var item in expected_output[type].Data){
d[expected_output[type].Data[item].Price] = (d[expected_output[type].Data[item].Price] || 0) + 1;
}
expected_output[type].Data = [];
for(var i in d){
expected_output[type].Data.push({
'Price':i,
'Total':d[i]
})
}
}
Output:
{
"Flat":{
"Type":"Flat",
"Data":[{"Price":"100.9","Total":2},
{"Price":"67.5","Total":1}]
},
"Room":{
"Type":"Room",
"Data":[{"Price":"23.5","Total":1}]
},
"Plot":{
"Type":"Plot",
"Data":[{"Price":"89.8","Total":1}]
}
}
As the Types and the Prices are unique after grouping I think a structure like {"Flat": {"100.9":2,"67.5":1}, {"Room": {"23.5": 1}}} would be easier to handle. So could do the grouping the following way:
var output = {};
map_results.map(function(el, i) {
output[el["Type"]] = output[el["Type"]] || [];
output[el["Type"]][el["Price"] = (output[el["Type"]][el["Price"]+1) || 1;
});
If you can not handle this structure you could do another mapping to your structure.
As you are iterating the Array one time this should have a complexity of n.
Look here for a working fiddle.
EDIT: So remap everything to your structure. The order of the remapping is far less then the first mapping, because the grouping is already done.
var expected_output = [];
for(type in output) {
var prices = [];
for(price in output[type]) {
prices.push({"Price": price, "Total": output[type][price]);
}
expected_output.push({"Type": type, "Data": prices});
}
Below is yet another effort. Here's a FIDDLE
For performance testing, I also mocked up a JSPerf test with 163840 elements. On Chrome(OSX) original solution is 90% slower than this one.
Few notes:
Feel free to optimize for your case (e.g. take out the hasOwnProperty check on object cloning).
Also, if you need the latest Total as the first element use unshift instead of push to add the obj the beginning of the array.
function groupBy(arr, key, key2) {
var retArr = [];
arr.reduce(function(previousValue, currentValue, index, array){
if(currentValue.hasOwnProperty(key)) {
var kVal = currentValue[key];
if(!previousValue.hasOwnProperty(kVal)) {
previousValue[kVal] = {};
retArr.push(previousValue[kVal]);
previousValue[kVal][key] = kVal;
previousValue[kVal]["Data"] = [];
}
var prevNode = previousValue[kVal];
if(currentValue.hasOwnProperty(key2)) {
var obj = {};
for(var k in currentValue) {
if(currentValue.hasOwnProperty(k) && k!=key)
obj[k] = currentValue[k];
}
obj["Total"] = prevNode["Data"].length + 1;
prevNode["Data"].push(obj);
}
}
return previousValue;
}, {});
return retArr;
}
var map_results = [{"Type":"Flat","Price":100.9},
{"Type":"Room","Price":23.5},
{"Type":"Flat","Price":67.5},
{"Type":"Flat","Price":100.9},
{"Type":"Plot","Price":89.8}];
var expected_output = groupBy(map_results, "Type", "Price");
console.dir(expected_output);
Tried something like this:
var reduce_func = function (previous, current) {
if(previous.length == 0){
previous.push({Type: current.Type, Data:[{Price:current.Price,Total:1}]});
return previous;
}
var type_found = 0;
for (var one in previous) {
if (current.Type == previous[one].Type){
type_found = 1;
var price_found = 0;
for(var data in previous[one].Data){
if(current.Price == previous[one].Data[data].Price){
price_found = 1;
previous[one].Data[data].Total += 1;
}
}
if(price_found == 0){
previous[one].Data.push({Price:current.Price, Total:1});
}
}
}
if(type_found == 0){
previous.push({Type:current.Type, Data:[{Price : current.Price ,Total:1}]});
}
return previous;
}
map_results.reduce(reduce_func,[]);

Javascript: Improve four nested loops?

I have a complex array of objects with nested arrays. The following works to extract certain objects, but it's one of the ugliest things I've written.
Is there some javascript dark magic to do this elegantly?
function getEighthInsertionBlocks() {
var struct = Lifestyle.Pagination.structure;
var blocks = [];
for (var i = 0; i<struct.length; i++) {
var page = struct[i];
var layers = page.children;
for (var j=0; j<layers.length; j++) {
var layer = layers[j];
if (layer.className === 'EighthPageLayer' ) {
var rows = layer.children;
for (var k=0; k<rows.length; k++) {
var row = rows[k];
eBlocks = row.children;
for (var l=0; l<eBlocks.length; l++) {
blocks.push(eBlocks[l]);
}
}
}
}
}
return blocks;
}
Not that I'm a big fan of code golf, but ... this is horrible.
You could write a generic iterator, which would reduce the code into sequential blocks:
var iterator = function(collection, callback){
var length = collection.length;
var results = [];
var result;
for (var i = 0; i < collection.length; i++){
result = callback(colleciton[i], i);
if (result){
results = results.concat(result);
}
}
return results;
};
function getEighthInsertionBlocks() {
var struct = Lifestyle.Pagination.structure;
var layers = iterator(struct, function(page){ return page.children; });
var rows = iterator(layers, function(layer){
return layer.className === 'EighthPageLayer' ? layer.children : null;
});
return iterator(rows, function(eBlocks, index){ return eblocks[index]; });
}
I usually tend to like using forEach for the readability but this is subjective.
function isEighthPageLayer(layer){
return layer.className === "EighthPageLayer"
}
function getEighthInsertionBlocks(struct) {
var blocks = [];
struct.forEach(function(page){
page.layers
.filter(isEighthPageLayer)
.forEach( function(layer) {
layer.children.forEach(function(row){
row.children.forEach(function(eBlocks){
blocks.push(eBlocks);
});
});
});
});
});
return blocks;
}
This is an interesting challenge. To avoid deep nesting, you need a generic iterator that you can use recursively, yet there are a few special cases in your iteration. So, I tried to create a generic iterator that you can pass an options object to in order to specify the special conditions. Here's what I came up with. Since I don't have a sample data set, this is untested, but hopefully you see the idea:
function iterateLevel(data, options, level, output) {
console.log("level:" + level);
console.log(data);
var fn = options[level] && options[level].fn;
for (var i = 0; i < data.length; i++) {
if (!fn || (fn(data[i]) === true)) {
if (level === options.endLevel) {
output.push(data[i]);
} else {
iterateLevel(data[i].children, options, level + 1, output);
}
}
}
}
var iterateOptions = {
"1": {
fn: function(arg) {return arg.className === 'EighthPageLayer'}
},
"endLevel": 3
}
var blocks = [];
iterateLevel(Lifestyle.Pagination.structure, iterateOptions, 0, blocks);
The idea is that the options object can have an optional filter function for each level and it tells you how many levels to go down.
Working demo: http://jsfiddle.net/jfriend00/aQs6h/

Categories

Resources