Order string match results by max match - javascript

I want to search all match in a string and return all result ordered by max match results, let's say I have some strings:
var strArray = [
"This is my number one string",
"Another string that contains number",
"Just for example string"
];
// Results of search "another number" should be:
var resultArrayOfIndexes = [1, 0];
So far I can search in a string but it returns all indexes where is at least one match, but I want the result array to be sorted by max count of matches.
My code:
function findMatch(list, phrase) {
var preparedList = [],
value = "";
if (config.get("list").match.enabled) {
for (var i = 0, length = list.length; i < length; i += 1) {
value = config.get("getValue")(list[i]);
var words = phrase.split(' ');
var listMatchArr = [];
$.each(words, function(idx, word) {
var W = word.replace(/[\W_]+/g, ""); // match on alphaNum chars only
if (match(value, W) && $.inArray(i, listMatchArr) == -1) { //phrase
preparedList.push(list[i]);
listMatchArr.push(i);
};
});
}
} else {
preparedList = list;
}
return preparedList;
}

I'm assuming a case-insensitive search is required.
The following code changes the phrase into an array of individual words, then maps the list to get back an array of objects in the form {index: 0, matches:1}, then filters out the ones where there were no matches, then sorts, then maps again to get just the indices.
function findMatch(list, phrase) {
var searchTerms = phrase.toLowerCase().split(/\s+/);
return list.map(function(v, i) {
v = v.toLowerCase();
return {
index: i,
matches: searchTerms.reduce(function(a, c) {
return a + (v.indexOf(c) !=-1 ? 1 : 0);
}, 0)
};
})
.filter(function(v) { return v.matches > 0; })
.sort(function(a, b) { return b.matches - a.matches; })
.map(function(v) { return v.index; });
}
var strArray = [
"This is my number one string", "Another string that contains number","Just for example string"
];
console.log(findMatch(strArray, "another number"));
Or expand the following for basically the same thing with ES6 features:
function findMatch(list, phrase) {
var searchTerms = phrase.toLowerCase().split(/\s+/);
return list.map(function(v, i) {
v = v.toLowerCase();
return {
index: i,
matches: searchTerms.reduce((a, c) => a + (v.includes(c) ? 1 : 0), 0)
};
})
.filter(v => v.matches > 0)
.sort((a, b) => b.matches - a.matches)
.map(v => v.index);
}
var strArray = [
"This is my number one string", "Another string that contains number","Just for example string"
];
console.log(findMatch(strArray, "another number"));

You can use regex to match your phrase as well as count how many words are matches in your string if you are familiar with regex.
Assume that you want to know how many words were matched as well, you can store it as an array of objects where each object store the count number and the target string.
var strArray = [
"This is my number one string", "Another string that contains number", "Just for example string"
];
function findMatch(list, phrase){
var words = phrase.split(" ");
var pattern = "";
var length = words.length;
// create pattern for regex match
for(var i = 0; i < length; i++){
pattern += words[i];
if(i < length-1){
pattern += "|";
}
}
var counts = [];
var re = new RegExp(pattern,"g");
for(var i = 0; i < list.length; i++){
var count = (list[i].toLowerCase().match(re) || []).length;
//add to array if matched
if(count > 0){
counts.push({count:count,string:list[i]});
}
}
//sort by max match
counts.sort(function(a,b){
return b.count-a.count;
});
console.log(counts);
}
findMatch(strArray, "another number");
The result will look something like:
[ { count: 2, string: 'Another string that contains number' },
{ count: 1, string: 'This is my number one string' },
{ count: 0, string: 'Just for example string' } ]

Related

How can I find the EXACT amount of matching words in a string?

for a project I've written a function which includes the following:
var filtering_words = ['alpha', 'beta', 'gamma'];
//finding matching words
var prohibited_words = filtering_words;
for (var i = 0; i < prohibited_words.length; i++) {
if (value.indexOf(prohibited_words[i]) > -1) {
user_report['matching_words'].push(prohibited_words[i]);
user_report['matching_words_amount'] = user_report['matching_words'].length;
}
}
String: 'alpha beta beta gamma'
For now I just get all the matching words.
So my result would look like that: ['alpha'], ['beta'], ['gamma']
But I would also like to know how often a "filtering_word" is in my string. In this case I would want to know that there are actually 2 betas...
Any idea?
Cheers
Store the results in an Object instead of an Array, so that you could map the filtered word to the number of occurrences.
To find the number of occurrences, use a RegExp with the g flag to get an array of all occurrences (and i flag for a case insensitive search), then get the resulting array length.
var user_report = { matching_words: {} }
var value = 'lambdabetaalphabeta'
var filtering_words = ['alpha', 'beta', 'gamma'];
var prohibited_words = filtering_words;
for (var i = 0; i < prohibited_words.length; i++) {
var matches = (value.match(new RegExp(prohibited_words[i], 'ig')) || []).length
if (matches) {
var matching_words = user_report['matching_words'] || {};
matching_words[prohibited_words[i]] = matches
}
}
user_report['matching_words_amount'] = Object.keys(user_report['matching_words']).length
console.log(user_report)
This code gives you the 'unnecessary' words:
let arr = ['alpha', 'beta', 'gamma', 'beta'];
for(let i = 0;i < arr.length;i++){
for(let j = 0;j < arr.length;j++){
if(i !== j){
if(arr[i] === arr[j]){
let [sameWords] = arr.splice(i, 1)
console.log(sameWords)
}
}
}
}
You can use reduce() to populate a dictionary with prohibited words and their frequencies:
var str = "alpha beta yo alpha beta lorem ipsum gamma alpha",
filtering_words = ['alpha', 'beta', 'beta', 'gamma'];
// divide the str into words, then check if a word is prohibited and add (increment) it to the dictionary
var result = str.split(' ').reduce(function(acc,v)
{
if (filtering_words.indexOf(v)>=0)
{
acc[v] = (acc[v] || 0) + 1;
}
return acc
},{})
console.log(result)
// {alpha: 3, beta: 2, gamma: 1}

Alibaba interview: print a sentence with min spaces

I saw this interview question and gave a go. I got stuck. The interview question is:
Given a string
var s = "ilikealibaba";
and a dictionary
var d = ["i", "like", "ali", "liba", "baba", "alibaba"];
try to give the s with min space
The output may be
i like alibaba (2 spaces)
i like ali baba (3 spaces)
but pick no.1
I have some code, but got stuck in the printing.
If you have better way to do this question, let me know.
function isStartSub(part, s) {
var condi = s.startsWith(part);
return condi;
}
function getRestStr(part, s) {
var len = part.length;
var len1 = s.length;
var out = s.substring(len, len1);
return out;
}
function recPrint(arr) {
if(arr.length == 0) {
return '';
} else {
var str = arr.pop();
return str + recPrint(arr);
}
}
// NOTE: have trouble to print
// Or if you have better ways to do this interview question, please let me know
function myPrint(arr) {
return recPrint(arr);
}
function getMinArr(arr) {
var min = Number.MAX_SAFE_INTEGER;
var index = 0;
for(var i=0; i<arr.length; i++) {
var sub = arr[i];
if(sub.length < min) {
min = sub.length;
index = i;
} else {
}
}
return arr[index];
}
function rec(s, d, buf) {
// Base
if(s.length == 0) {
return;
} else {
}
for(var i=0; i<d.length; i++) {
var subBuf = [];
// baba
var part = d[i];
var condi = isStartSub(part, s);
if(condi) {
// rest string
var restStr = getRestStr(part, s);
rec(restStr, d, subBuf);
subBuf.unshift(part);
buf.unshift(subBuf);
} else {
}
} // end loop
}
function myfunc(s, d) {
var buf = [];
rec(s, d, buf);
console.log('-- test --');
console.dir(buf, {depth:null});
return myPrint(buf);
}
// Output will be
// 1. i like alibaba (with 2 spaces)
// 2. i like ali baba (with 3 spaces)
// we pick no.1, as it needs less spaces
var s = "ilikealibaba";
var d = ["i", "like", "ali", "liba", "baba", "alibaba"];
var out = myfunc(s, d);
console.log(out);
Basically, my output is, not sure how to print it....
[ [ 'i', [ 'like', [ 'alibaba' ], [ 'ali', [ 'baba' ] ] ] ] ]
This problem is best suited for a dynamic programming approach. The subproblem is, "what is the best way to create a prefix of s". Then, for a given prefix of s, we consider all words that match the end of the prefix, and choose the best one using the results from the earlier prefixes.
Here is an implementation:
var s = "ilikealibaba";
var arr = ["i", "like", "ali", "liba", "baba", "alibaba"];
var dp = []; // dp[i] is the optimal solution for s.substring(0, i)
dp.push("");
for (var i = 1; i <= s.length; i++) {
var best = null; // the best way so far for s.substring(0, i)
for (var j = 0; j < arr.length; j++) {
var word = arr[j];
// consider all words that appear at the end of the prefix
if (!s.substring(0, i).endsWith(word))
continue;
if (word.length == i) {
best = word; // using single word is optimal
break;
}
var prev = dp[i - word.length];
if (prev === null)
continue; // s.substring(i - word.length) can't be made at all
if (best === null || prev.length + word.length + 1 < best.length)
best = prev + " " + word;
}
dp.push(best);
}
console.log(dp[s.length]);
pkpnd's answer is along the right track. But word dictionaries tend to be quite large sets, and iterating over the entire dictionary at every character of the string is going to be inefficient. (Also, saving the entire sequence for each dp cell may consume a large amount of space.) Rather, we can frame the question, as we iterate over the string, as: given all the previous indexes of the string that had dictionary matches extending back (either to the start or to another match), which one is both a dictionary match when we include the current character, and has a smaller length in total. Generally:
f(i) = min(
f(j) + length(i - j) + (1 if j is after the start of the string)
)
for all j < i, where string[j] ended a dictionary match
and string[j+1..i] is in the dictionary
Since we only add another j when there is a match and a new match can only extend back to a previous match or to the start of the string, our data structure could be an array of tuples, (best index this match extends back to, total length up to here). We add another tuple if the current character can extend a dictionary match back to another record we already have. We can also optimize by exiting early from the backwards search once the matched substring would be greater than the longest word in the dictionary, and building the substring to compare against the dictionary as we iterate backwards.
JavaScript code:
function f(str, dict){
let m = [[-1, -1, -1]];
for (let i=0; i<str.length; i++){
let best = [null, null, Infinity];
let substr = '';
let _i = i;
for (let j=m.length-1; j>=0; j--){
let [idx, _j, _total] = m[j];
substr = str.substr(idx + 1, _i - idx) + substr;
_i = idx;
if (dict.has(substr)){
let total = _total + 1 + i - idx;
if (total < best[2])
best = [i, j, total];
}
}
if (best[0] !== null)
m.push(best);
}
return m;
}
var s = "ilikealibaba";
var d = new Set(["i", "like", "ali", "liba", "baba", "alibaba"]);
console.log(JSON.stringify(f(s,d)));
We can track back our result:
[[-1,-1,-1],[0,0,1],[4,1,6],[7,2,10],[11,2,14]]
[11, 2, 14] means a total length of 14,
where the previous index in m is 2 and the right index
of the substr is 11
=> follow it back to m[2] = [4, 1, 6]
this substr ended at index 4 (which means the
first was "alibaba"), and followed m[1]
=> [0, 0, 1], means this substr ended at index 1
so the previous one was "like"
And there you have it: "i like alibaba"
As you're asked to find a shortest answer probably Breadth-First Search would be a possible solution. Or you could look into A* Search.
Here is working example with A* (cause it's less bring to do than BFS :)), basically just copied from Wikipedia article. All the "turning string into a graph" magick happens in the getNeighbors function
https://jsfiddle.net/yLeps4v5/4/
var str = 'ilikealibaba'
var dictionary = ['i', 'like', 'ali', 'baba', 'alibaba']
var START = -1
var FINISH = str.length - 1
// Returns all the positions in the string that we can "jump" to from position i
function getNeighbors(i) {
const matchingWords = dictionary.filter(word => str.slice(i + 1, i + 1 + word.length) == word)
return matchingWords.map(word => i + word.length)
}
function aStar(start, goal) {
// The set of nodes already evaluated
const closedSet = {};
// The set of currently discovered nodes that are not evaluated yet.
// Initially, only the start node is known.
const openSet = [start];
// For each node, which node it can most efficiently be reached from.
// If a node can be reached from many nodes, cameFrom will eventually contain the
// most efficient previous step.
var cameFrom = {};
// For each node, the cost of getting from the start node to that node.
const gScore = dictionary.reduce((acc, word) => { acc[word] = Infinity; return acc }, {})
// The cost of going from start to start is zero.
gScore[start] = 0
while (openSet.length > 0) {
var current = openSet.shift()
if (current == goal) {
return reconstruct_path(cameFrom, current)
}
closedSet[current] = true;
getNeighbors(current).forEach(neighbor => {
if (closedSet[neighbor]) {
return // Ignore the neighbor which is already evaluated.
}
if (openSet.indexOf(neighbor) == -1) { // Discover a new node
openSet.push(neighbor)
}
// The distance from start to a neighbor
var tentative_gScore = gScore[current] + 1
if (tentative_gScore >= gScore[neighbor]) {
return // This is not a better path.
}
// This path is the best until now. Record it!
cameFrom[neighbor] = current
gScore[neighbor] = tentative_gScore
})
}
throw new Error('path not found')
}
function reconstruct_path(cameFrom, current) {
var answer = [];
while (cameFrom[current] || cameFrom[current] == 0) {
answer.push(str.slice(cameFrom[current] + 1, current + 1))
current = cameFrom[current];
}
return answer.reverse()
}
console.log(aStar(START, FINISH));
You could collect all possible combinations of the string by checking the starting string and render then the result.
If more than one result has the minimum length, all results are taken.
It might not work for extrema with string who just contains the same base string, like 'abcabc' and 'abc'. In this case I suggest to use the shortest string and update any part result by iterating for finding longer strings and replace if possible.
function getWords(string, array = []) {
words
.filter(w => string.startsWith(w))
.forEach(s => {
var rest = string.slice(s.length),
temp = array.concat(s);
if (rest) {
getWords(rest, temp);
} else {
result.push(temp);
}
});
}
var string = "ilikealibaba",
words = ["i", "like", "ali", "liba", "baba", "alibaba"],
result = [];
getWords(string);
console.log('all possible combinations:', result);
console.log('result:', result.reduce((r, a) => {
if (!r || r[0].length > a.length) {
return [a];
}
if (r[0].length === a.length) {
r.push(a);
}
return r;
}, undefined))
Use trie data structure
Construct a trie data structure based on the dictionary data
Search the sentence for all possible slices and build a solution tree
Deep traverse the solution tree and sort the final combinations
const sentence = 'ilikealibaba';
const words = ['i', 'like', 'ali', 'liba', 'baba', 'alibaba',];
class TrieNode {
constructor() { }
set(a) {
this[a] = this[a] || new TrieNode();
return this[a];
}
search(word, marks, depth = 1) {
word = Array.isArray(word) ? word : word.split('');
const a = word.shift();
if (this[a]) {
if (this[a]._) {
marks.push(depth);
}
this[a].search(word, marks, depth + 1);
} else {
return 0;
}
}
}
TrieNode.createTree = words => {
const root = new TrieNode();
words.forEach(word => {
let currentNode = root;
for (let i = 0; i < word.length; i++) {
currentNode = currentNode.set(word[i]);
}
currentNode.set('_');
});
return root;
};
const t = TrieNode.createTree(words);
function searchSentence(sentence) {
const marks = [];
t.search(sentence, marks);
const ret = {};
marks.map(mark => {
ret[mark] = searchSentence(sentence.slice(mark));
});
return ret;
}
const solutionTree = searchSentence(sentence);
function deepTraverse(tree, sentence, targetLen = sentence.length) {
const stack = [];
const sum = () => stack.reduce((acc, mark) => acc + mark, 0);
const ret = [];
(function traverse(tree) {
const keys = Object.keys(tree);
keys.forEach(key => {
stack.push(+key);
if (sum() === targetLen) {
const result = [];
let tempStr = sentence;
stack.forEach(mark => {
result.push(tempStr.slice(0, mark));
tempStr = tempStr.slice(mark);
});
ret.push(result);
}
if(tree[key]) {
traverse(tree[key]);
}
stack.pop();
});
})(tree);
return ret;
}
const solutions = deepTraverse(solutionTree, sentence);
solutions.sort((s1, s2) => s1.length - s2.length).forEach((s, i) => {
console.log(`${i + 1}. ${s.join(' ')} (${s.length - 1} spaces)`);
});
console.log('pick no.1');

Counting words in javascript and push it into an object

I want to achieve a javascript program that count through a word and return the word and the number of times it appears eg {hello : 2, "#hello":1, world : 1, toString:1}
below is my code but i only get the total number of words
function words(str) {
app = {};
return str.split(" ").length;
}
console.log(words("hello world"));
Use reduce to iterate the words array, and count the instances:
function words(str) {
return str.split(" ").reduce(function(count, word) {
count[word] = count.hasOwnProperty(word) ? count[word] + 1 : 1;
return count;
}, {});
}
console.log(words("reserved words like prototype and toString ok? Yes toString is fine"));
An ES6 approach that reduces over an array of items such as strings and returns the count:
const strFrequency = function (stringArr) {
return stringArr.reduce((count, word) => {
count[word] = (count[word] || 0) + 1;
return count;
}, {})
}
let names = ["Bob", "Bill", "Bo", "Ben", "Bob", "Brett", "Ben", "Bill", "Bo", "Ben", "Bob", "Ben"];
console.log(strFrequency(names));
// => {Bob: 3, Bill: 2, Bo: 2, Ben: 4, Brett: 1}
function words(str){
var words = [];
//check if words list is empty if so then insert the first word into the array
if(!words.length){
var word = str.split(" ")[0];
words.push({"word":word, "occurences":1});
}
//convert string to array so you can iterate through it
str = str.split(" ");
//iterate through the array starting from the first position because word at the position 0 is already in the array
for(var i = 1; i<str.length; i++){
//iterate through the words list to the see if the word has appeared yet
var wordExists = false;
for(var j = 0; j<words.length; j++){
if(str[i] == words[j].word){
//word exists in word so count one up
words[j].occurences += 1;
//used to prevent the word from being inserted twice
wordExists = true;
break;
}
}
//insert new word in words if it
if(!wordExists){
words.push({"word":str[i], "occurences":1});
}
}
return words;
}
This is the code and what I passed through #ori
function words(str) {
var adjustedStr = str.replace(/\n|\t|\s\s+/g, ' ');
return adjustedStr.split(' ').reduce(function(count, word) {
count[word] = (count[word] || 0) + 1;
return count;
}, {});
}
console.log(words("reserved words like prototype and toString ok?"));
it consoled out
{toString: "function toString() { [native code] }1"}
To allow reserved words in object literals without using the hasOwnerProperty, you could use a null constructor for the Object.
eg.
function words(str) {
var adjustedStr = str.replace(/\n|\t|\s\s+/g, ' ');
return adjustedStr.split(' ').reduce(function(count, word) {
count[word] = (count[word] || 0) + 1;
return count;
}, Object.create(null));
}
console.log(words("reserved words like prototype and toString ok?"));

Algorithm for counting occurrences of words in string

I am trying to build an algorithm to see, which words occurr the most in comments.
Therefore I came up with this (in Javascript):
var analyze = function(comments){
var detectedWords = [];
var result = {};
comments.forEach(function(comment){
var words = comment.message.split(" ");
words.forEach(function(word){
word = word.toLowerCase();
if(word !== ""){
if(detectedWords.indexOf(word) === -1){
detectedWords.push(word);
result[detectedWords.indexOf(word)] = {"name":word,"count":1};
}else{
result[detectedWords.indexOf(word)].count++;
}
}
});
});
return _.orderBy(result, ['count'], ['desc']);
}
Can the algorithm be optimized further? (toLowerCase() outside the inner loop?
In next step I would define a "blacklist" or words that are not interesting like "the, is, I, am, are,..."
You could use a hash table for reference to the array item for a faster access to the count object. result is now an array, which is now sortable.
var analyze = function (comments) {
var result = [],
hash = {};
comments.forEach(function (comment) {
var words = comment.message.split(" ");
words.forEach(function (word) {
word = word.toLowerCase();
if (word !== "") {
if (!hash[word]) {
hash[word] = { name: word, count: 0 };
result.push(hash[word]);
}
hash[word].count++;
}
});
});
return result.sort(function (a, b) { return b.count - a.count;});
//return _.orderBy(result, ['count'], ['desc']);
}
console.log(analyze([{ message: 'a b c d a v d e f g q' }]));

splitting a string into a multidimensional array

I have a list of strings, I want to check if the string contains a specific word, and if it does split all the words in the string and add it to an associative array.
myString = ['RT #Arsenal: Waiting for the international', 'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish']
wordtoFind = ['#Arsenal']
I want to loop through the wordtoFind and if it is in myString, split up myString into individual words and create an object like
newWord = {#Arsenal:[{RT:1},{Waiting:1},{for:1},{the:1},{international:1}]}
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z].key) > -1){
myString[i].split(" ")
}
}
}
I would say something likes would work, this also counts the amount of occurrences of a word in a sentence. JavaScript does not have associative arrays like PHP for instance. They just have objects or numbered arrays:
var myString = ['RT #Arsenal: Waiting for the international', 'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish'];
var wordtoFind = ['#Arsenal'];
var result = {};
for(var i = 0, l = wordtoFind.length; i < l; i++) {
for(var ii = 0, ll = myString.length; ii < ll; ii++) {
if(myString[ii].indexOf(wordtoFind[i]) !== -1) {
var split = myString[ii].split(' ');
var resultpart = {};
for(var iii = 0, lll = split.length; iii < lll; iii++) {
if(split[iii] !== wordtoFind[i]) {
if(!resultpart.hasOwnProperty(split[iii])) {
resultpart[split[iii]] = 0;
}
resultpart[split[iii]]++;
}
}
result[wordtoFind[i]] = resultpart;
}
}
}
console.log(result);
//{"#Arsenal":{"RT":1,"Waiting":1,"for":1,"the":1,"international":1}}
This method makes use of the forEach-function and callbacks.
The containsWord-function was left with a for-loop for now to reduce some callbacks, this can obviously be changed.
var myString = [
'RT #Arsenal: Waiting for the international',
'We’re hungry for revenge #_nachomonreal on Saturday\'s match and aiming for a strong finish',
'#Arsenal: one two three four two four three four three four'
];
var wordtoFind = ['#Arsenal'];
// define the preprocessor that is used before the equality check
function preprocessor(word) {
return word.replace(':', '');
}
function findOccurences(array, search, callback, preprocessor) {
var result = {};
var count = 0;
// calculate the maximum iterations
var max = search.length * array.length;
// iterate the search strings that should be matched
search.forEach(function(needle) {
// iterate the array of strings that should be searched in
array.forEach(function(haystack) {
if (containsWord(haystack, needle, preprocessor)) {
var words = haystack.split(' ');
// iterate every word to count the occurences and write them to the result
words.forEach(function(word) {
countOccurence(result, needle, word);
})
}
count++;
// once every iteration finished, call the callback
if (count == max) {
callback && callback(result);
}
});
});
}
function containsWord(haystack, needle, preprocessor) {
var words = haystack.split(' ');
for (var i = 0; i < words.length; i++) {
var word = words[i];
// preprocess a word before it's compared
if (preprocessor) {
word = preprocessor(word);
}
// if it matches return true
if (word === needle) {
return true;
}
}
return false;
}
function countOccurence(result, key, word) {
// add array to object if it doesn't exist yet
if (!result.hasOwnProperty(key)) {
result[key] = [];
}
var entry = result[key];
// set the count to 0 if it doesn't exist yet
if (!entry.hasOwnProperty(word)) {
entry[word] = 0;
}
entry[word]++;
}
// call our function to find the occurences
findOccurences(myString, wordtoFind, function(result) {
// do something with the result
console.log(result);
}, preprocessor);
// output:
/*
{ '#Arsenal':
[ RT: 1,
'#Arsenal:': 2,
Waiting: 1,
for: 1,
the: 1,
international: 1,
one: 1,
two: 2,
three: 3,
four: 4 ] }
*/
Feel free to ask any questions, if the answer needs clarification.
I hope this fits your needs.
You're on the right track. You just need to store the split string into the associative array variable.
var assocArr = [];
for(z=0; z <wordtoFind.length; z++){
for ( i = 0 ; i < myString.length; i++) {
if (myString[i].indexOf(wordtoFind[z]) > -1){
myString[i].split(" ").forEach(function(word){
assocArr.push(word);
});
}
}
}
I think the key problem that stuck you is the data structure. The optimal structure should be something like this:
{
#Arsenal:[
{RT:1, Waiting:1, for:1, the:1, international:1},
{xxx:1, yyy:1, zzz:3}, //for there are multiple ones in 'myString' that contain the same '#Arsenal'
{slkj:1, sldjfl:2, lsdkjf:1} //maybe more
]
someOtherWord:[
{},
{},
....
]
}
And the code:
var result = {};
//This function will return an object like {RT:1, Waiting:1, for:1, the:1, international:1}.
function calculateCount(string, key) {
var wordCounts = {};
string.split(" ").forEach(function (word) {
if (word !== key) {
if (wordCounts[word] === undefined) wordCounts[word] = 1;
else wordCounts[word]++;
}
});
return wordCounts;
}
//For each 'word to find' and each string that contain the 'word to find', push in that returned object {RT:1, Waiting:1, for:1, the:1, international:1}.
wordToFind.forEach(function (word) {
var current = result[word] = [];
myString.forEach(function (str) {
if (str.indexOf(word) > -1) {
current.push(
calculateCount(str, word)
);
}
}); //Missed the right parenthesis here
});

Categories

Resources