Find all occurrences of word in array - javascript

I'm trying to find the number of total occurrences of a word in an array.
Here I found a solution and altered it a bit:
var dataset = ["word", "a word", "another word"];
var search = "word";
var count = dataset.reduce(function(n, val) {
return n + (val === search);
}, 0);
Here is the fiddle.
But, instead of 3 I only get 1. So it only finds the first element, which is just word. But how to find all elements, containing word?

Try this:
var dataset = ["word", "a word", "another word"];
var search = "word";
count = 0;
jQuery(dataset).each(function(i, v){ if(v.indexOf(search) != -1) {count ++} });
Here, count will be 3.

You have to use String.prototype.indexOf() that return the index of the first occurence of the substring in the string, or -1 if not found:
var dataset = ["word", "a word", "another word"];
var search = "word";
var count = dataset.reduce(function(n, val) {
return n + (val.indexOf(search) > -1 ? 1 : 0);
}, 0);

The operator === means: equal object and equal type.
If you are looking for each array element containing the 'search' word you need to use a different operator.
So, another possible approach, using filter and indexOf, is:
var dataset = ["word", "a word", "another word"];
var search = "word";
var count = dataset.filter(function(val) {
return val.indexOf(search) != -1;
}).length;
document.write('N. words: ' + count)

You could use String#indexOf.
var dataset = ["word", "a word", "another word"],
search = "word",
count = dataset.reduce(function(r, a) {
return r + !!~a.indexOf(search);
}, 0);
document.write(count);

Here is a one-line ES6 Arrow Function.
const countOccurrence = (arr,elem) => arr.filter((v) => v===elem).length;
Testing
const countOccurrence = (arr,elem) => arr.filter((v) => v===elem).length;
//Testing
console.log(countOccurrence([1,2,3,4,5,6,1,8,9,1,1],1));
console.log(countOccurrence(['w','o','w'],'w'));
console.log(countOccurrence(['hello','world','hello'],'hello'));

Related

Javascript Search for a match and output the whole string that the matching word is apart of

how would you go about outputting the found output, including the rest of the string its apart of? The array is just full of strings. Thanks
var searchingfor = document.getElementById('searchfield').value;
var searchingforinlowerCase = searchingfor.toLowerCase();
var searchDiv = document.getElementById('searchDiv');
var convertarraytoString = appointmentArr.toString();
var arraytolowerCase = convertarraytoString.toLowerCase();
var splitarrayString = arraytolowerCase.split(',')
if(search(searchingforinlowerCase, splitarrayString) == true) {
alert( searchingforinlowerCase + ' was found at index' + searchLocation(searchingforinlowerCase,splitarrayString) + ' Amount of times found = ' +searchCount(searchingforinlowerCase,splitarrayString));
function search(target, arrayToSearchIn) {
var i;
for (i=0; i<arrayToSearchIn.length; i++)
{ if (arrayToSearchIn[i] == target && target !=="")
return true;
}
Try this
if(search(searchingforinlowerCase, appointmentArr) == true) {
alert( searchingforinlowerCase + ' was found at index' + searchLocation(searchingforinlowerCase,splitarrayString) + ' Amount of times found = ' +searchCount(searchingforinlowerCase,splitarrayString));
function search(target, arrayToSearchIn) {
var i;
for (i=0; i<arrayToSearchIn.length; i++)
{ if (arrayToSearchIn[i].indexOf(target >= 0))
return true;
}
return false;
}
This code will help you find that a match is present. You can update code to display full text where match was found. Original posted code was comparing entire string rather than partial match.
You can do like this
var test = 'Hello World';
if (test.indexOf('Wor') >= 0)
{
/* found substring Wor */
}
In your posted code you are converting Array to string and then again converting it back to Array using split(). That is unnecessary. search can be invoked as
search(searchingforinlowerCase, appointmentArr);
Try utilizing Array.prototype.filter() , String.prototype.indexOf()
// input string
var str = "america";
// array of strings
var arr = ["First Name: John, Location:'america'", "First Name: Jane, Location:'antarctica'"];
// filter array of strings
var res = arr.filter(function(value) {
// if `str` found in `value` , return string from `arr`
return value.toLowerCase().indexOf(str.toLowerCase()) !== -1
});
// do stuff with returned single , or strings from `arr`
console.log(res, res[0])
The following will look for a word in an array of strings and return all the strings that match the word. Is this something you are looking for?
var a = ["one word", "two sentence", "three paragraph", "four page", "five chapter", "six section", "seven book", "one, two word", "two,two sentence", "three, two paragraph", "four, two page", "five, two chapter",];
function search(needle, haystack){
var results = [];
haystack.forEach(function(str){
if(str.indexOf(needle) > -1){
results.push(str);
}
});
return results.length ? results : '';
};
var b = search("word", a);
console.log(b);
Here's the fiddle to try.

Tokenize in JavaScript

If I have a string, how can I split it into an array of words and filter out some stopwords? I only want words of length 2 or greater.
If my string is
var text = "This is a short text about StackOverflow.";
I can split it with
var words = text.split(/\W+/);
But using split(/\W+/), I get all words. I could check if the words have a length of at least 2 with
function validate(token) {
return /\w{2,}/.test(token);
}
but I guess I could do this smarter/faster with regexp.
I also have an array var stopwords = ['has', 'have', ...] which shouldn't be allowed in the array.
Actually, if I can find a way to filter out stopwords, I could just add all letters a, b, c, ..., z to the stopwords array to only accept words with at least 2 characters.
I would do what you started: split by /W+/ and then validate each token (length and stopwords) in the array by using .filter().
var text = "This is a short text about StackOverflow.";
var stopwords = ['this'];
var words = text.split(/\W+/).filter(function(token) {
token = token.toLowerCase();
return token.length >= 2 && stopwords.indexOf(token) == -1;
});
console.log(words); // ["is", "short", "text", "about", "StackOverflow"]
You could easily tweak a regex to look for words >= 2 characters, but there's no point if you're already going to need to post-process to remove stopwords (token.length will be faster than any fancy regex you write).
Easy with Ramda:
var text = "This is a short text about how StackOverflow has gas.";
var stopWords = ['have', 'has'];
var isLongWord = R.compose(R.gt(R.__, 2), R.length);
var isGoWord = R.compose(R.not, R.contains(R.__, stopWords));
var tokenize = R.compose(R.filter(isGoWord), R.filter(isLongWord), R.split(' '));
tokenize(text); // ["This", "short", "text", "about", "how", "StackOverflow", "gas."]
http://bit.ly/1V5bVrP
What about splitting on something like this if you want to use a pure regex approach:
\W+|\b\w{1,2}\b
https://regex101.com/r/rB4cJ4/1
Something like this?
function filterArray(a, num_words, stop_words) {
b = [];
for (var ct = 0; ct <= a.length - 1; ct++) {
if (!(a[ct] <= num_words) && !ArrayContains[a[ct], stop_words) {
b.push(a[ct]);
}
}
return b
}
function ArrayContains(word, a) {
for (var ct = 0; ct <= a.length - 1; ct++) {
if (word == a[ct]) {
return true
}
return false
}
}
var words = "He walks the dog";
var stops = ["dog"]
var a = words.split(" ");
var f = filterArray(a, 2, stops);
This should be help
(?:\b\W*\w\W*\b)+|\W+
output:
ThisisashorttextaboutStackOverflow. A..Zabc..xyz.
where is matched string.

Count occurrence times of each character in string

I have a string like this:
(apple,apple,orange,banana,strawberry,strawberry,strawberry). I want to count the number of occurrences for each of the characters, e.g. banana (1) apple(2) and strawberry(3). how can I do this?
The closest i could find was something like, which i dont know how to adapt for my needs:
function countOcurrences(str, value){
var regExp = new RegExp(value, "gi");
return str.match(regExp) ? str.match(regExp).length : 0;
}
Here is the easiest way to achieve that by using arrays.. without any expressions or stuff. Code is fairly simple and self explanatory along with comments:
var str = "apple,apple,orange,banana,strawberry,strawberry,strawberry";
var arr = str.split(','); //getting the array of all fruits
var counts = {}; //this array will contain count of each element at it's specific position, counts['apples']
arr.forEach(function(x) { counts[x] = (counts[x] || 0)+1; }); //checking and addition logic.. e.g. counts['apples']+1
alert("Apples: " + counts['apple']);
alert("Oranges: " + counts['orange']);
alert("Banana: " + counts['banana']);
alert("Strawberry: " + counts['strawberry']);
See the DEMO here
You can try
var wordCounts = str.split(",").reduce(function(result, word){
result[word] = (result[word] || 0) + 1;
return result;
}, {});
wordCounts will be a hash {"apple":2, "orange":1, ...}
You can print it as the format you like.
See the DEMO http://repl.it/YCO/10
You can use split also:
function getCount(str,d) {
return str.split(d).length - 1;
}
getCount("fat math cat", "at"); // return 3

Javascript: find all occurrences of word in text document

I'm trying to write a Javascript function to find indices of all occurrences of a word in a text document. Currently this is what I have--
//function that finds all occurrences of string 'needle' in string 'haystack'
function getMatches(haystack, needle) {
if(needle && haystack){
var matches=[], ind=0, l=needle.length;
var t = haystack.toLowerCase();
var n = needle.toLowerCase();
while (true) {
ind = t.indexOf(n, ind);
if (ind == -1) break;
matches.push(ind);
ind += l;
}
return matches;
}
However, this gives me a problem since this matches the occurrences of the word even when it's part of a string. For example, if the needle is "book" and haystack is "Tom wrote a book. The book's name is Facebook for dummies", the result is the index of 'book', 'book's' and 'Facebook', when I want only the index of 'book'. How can I accomplish this? Any help is appreciated.
Here's the regex I propose:
/\bbook\b((?!\W(?=\w))|(?=\s))/gi
To fix your problem. Try it with the exec() method. The regexp I provided will also consider words like "booklet" that occur in the example sentence you provided:
function getMatches(needle, haystack) {
var myRe = new RegExp("\\b" + needle + "\\b((?!\\W(?=\\w))|(?=\\s))", "gi"),
myArray, myResult = [];
while ((myArray = myRe.exec(haystack)) !== null) {
myResult.push(myArray.index);
}
return myResult;
}
Edit
I've edited the regexp to account for words like "booklet" as well. I've also reformatted my answer to be similar to your function.
You can do some testing here
Try this:
function getMatches(searchStr, str) {
var ind = 0, searchStrL = searchStr.length;
var index, matches = [];
str = str.toLowerCase();
searchStr = searchStr.toLowerCase();
while ((index = str.indexOf(searchStr, ind)) > -1) {
matches.push(index);
ind = index + searchStrL;
}
return matches;
}
indexOf returns the position of the first occurrence of book.
var str = "Tom wrote a book. The book's name is Facebook for dummies";
var n = str.indexOf("book");
I don't know what is going on there but I can offer a better solution using a regex.
function getMatches(haystack, needle) {
var regex = new RegExp(needle.toLowerCase(), 'g'),
result = [];
haystack = haystack.toLowerCase();
while ((match = regex.exec(haystack)) != null) {
result.push(match.index);
}
return result;
}
Usage:
getMatches('hello hi hello hi hi hi hello hi hello john hi hi', 'hi');
Result => [6, 15, 18, 21, 30, 44, 47]
Conserning your book vs books problem, you just need to provide "book " with a space.
Or in the function you could do.
needle = ' ' + needle + ' ';
The easiest way might be using text.match(RegX) function. For example you can write something like this for a case insensitive search:
"This is a test. This is a Test.".match(/test/gi)
Result:
(2) ['test', 'Test']
Or this one for case sensitive scenarios:
"This is a test. This is a Test.".match(/test/g)
Result:
['test']
let myControlValue=document.getElementById('myControl').innerText;
document.getElementById('searchResult').innerText=myControlValue.match(/test/gi)
<p id='myControl'>This is a test. Just a Test
</p>
<span><b>Search Result:</b></span>
<div id='searchResult'></div>

Javascript and regex: split string and keep the separator

I have a string:
var string = "aaaaaa<br />† bbbb<br />‡ cccc"
And I would like to split this string with the delimiter <br /> followed by a special character.
To do that, I am using this:
string.split(/<br \/>&#?[a-zA-Z0-9]+;/g);
I am getting what I need, except that I am losing the delimiter.
Here is the example: http://jsfiddle.net/JwrZ6/1/
How can I keep the delimiter?
I was having similar but slight different problem. Anyway, here are examples of three different scenarios for where to keep the deliminator.
"1、2、3".split("、") == ["1", "2", "3"]
"1、2、3".split(/(、)/g) == ["1", "、", "2", "、", "3"]
"1、2、3".split(/(?=、)/g) == ["1", "、2", "、3"]
"1、2、3".split(/(?!、)/g) == ["1、", "2、", "3"]
"1、2、3".split(/(.*?、)/g) == ["", "1、", "", "2、", "3"]
Warning: The fourth will only work to split single characters. ConnorsFan presents an alternative:
// Split a path, but keep the slashes that follow directories
var str = 'Animation/rawr/javascript.js';
var tokens = str.match(/[^\/]+\/?|\//g);
Use (positive) lookahead so that the regular expression asserts that the special character exists, but does not actually match it:
string.split(/<br \/>(?=&#?[a-zA-Z0-9]+;)/g);
See it in action:
var string = "aaaaaa<br />† bbbb<br />‡ cccc";
console.log(string.split(/<br \/>(?=&#?[a-zA-Z0-9]+;)/g));
If you wrap the delimiter in parantheses it will be part of the returned array.
string.split(/(<br \/>&#?[a-zA-Z0-9]+);/g);
// returns ["aaaaaa", "<br />†", "bbbb", "<br />‡", "cccc"]
Depending on which part you want to keep change which subgroup you match
string.split(/(<br \/>)&#?[a-zA-Z0-9]+;/g);
// returns ["aaaaaa", "<br />", "bbbb", "<br />", "cccc"]
You could improve the expression by ignoring the case of letters
string.split(/()&#?[a-z0-9]+;/gi);
And you can match for predefined groups like this: \d equals [0-9] and \w equals [a-zA-Z0-9_]. This means your expression could look like this.
string.split(/<br \/>(&#?[a-z\d]+;)/gi);
There is a good Regular Expression Reference on JavaScriptKit.
If you group the split pattern, its match will be kept in the output and it is by design:
If separator is a regular expression with capturing parentheses, then
each time separator matches, the results (including any undefined
results) of the capturing parentheses are spliced into the output
array.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split#description
You don't need a lookahead or global flag unless your search pattern uses one.
const str = `How much wood would a woodchuck chuck, if a woodchuck could chuck wood?`
const result = str.split(/(\s+)/);
console.log(result);
// We can verify the result
const isSame = result.join('') === str;
console.log({ isSame });
You can use multiple groups. You can be as creative as you like and what remains outside the groups will be removed:
const str = `How much wood would a woodchuck chuck, if a woodchuck could chuck wood?`
const result = str.split(/(\s+)(\w{1,2})\w+/);
console.log(result, result.join(''));
answered it here also JavaScript Split Regular Expression keep the delimiter
use the (?=pattern) lookahead pattern in the regex
example
var string = '500x500-11*90~1+1';
string = string.replace(/(?=[$-/:-?{-~!"^_`\[\]])/gi, ",");
string = string.split(",");
this will give you the following result.
[ '500x500', '-11', '*90', '~1', '+1' ]
Can also be directly split
string = string.split(/(?=[$-/:-?{-~!"^_`\[\]])/gi);
giving the same result
[ '500x500', '-11', '*90', '~1', '+1' ]
I made a modification to jichi's answer, and put it in a function which also supports multiple letters.
String.prototype.splitAndKeep = function(separator, method='seperate'){
var str = this;
if(method == 'seperate'){
str = str.split(new RegExp(`(${separator})`, 'g'));
}else if(method == 'infront'){
str = str.split(new RegExp(`(?=${separator})`, 'g'));
}else if(method == 'behind'){
str = str.split(new RegExp(`(.*?${separator})`, 'g'));
str = str.filter(function(el){return el !== "";});
}
return str;
};
jichi's answers 3rd method would not work in this function, so I took the 4th method, and removed the empty spaces to get the same result.
edit:
second method which excepts an array to split char1 or char2
String.prototype.splitAndKeep = function(separator, method='seperate'){
var str = this;
function splitAndKeep(str, separator, method='seperate'){
if(method == 'seperate'){
str = str.split(new RegExp(`(${separator})`, 'g'));
}else if(method == 'infront'){
str = str.split(new RegExp(`(?=${separator})`, 'g'));
}else if(method == 'behind'){
str = str.split(new RegExp(`(.*?${separator})`, 'g'));
str = str.filter(function(el){return el !== "";});
}
return str;
}
if(Array.isArray(separator)){
var parts = splitAndKeep(str, separator[0], method);
for(var i = 1; i < separator.length; i++){
var partsTemp = parts;
parts = [];
for(var p = 0; p < partsTemp.length; p++){
parts = parts.concat(splitAndKeep(partsTemp[p], separator[i], method));
}
}
return parts;
}else{
return splitAndKeep(str, separator, method);
}
};
usage:
str = "first1-second2-third3-last";
str.splitAndKeep(["1", "2", "3"]) == ["first", "1", "-second", "2", "-third", "3", "-last"];
str.splitAndKeep("-") == ["first1", "-", "second2", "-", "third3", "-", "last"];
An extension function splits string with substring or RegEx and the delimiter is putted according to second parameter ahead or behind.
String.prototype.splitKeep = function (splitter, ahead) {
var self = this;
var result = [];
if (splitter != '') {
var matches = [];
// Getting mached value and its index
var replaceName = splitter instanceof RegExp ? "replace" : "replaceAll";
var r = self[replaceName](splitter, function (m, i, e) {
matches.push({ value: m, index: i });
return getSubst(m);
});
// Finds split substrings
var lastIndex = 0;
for (var i = 0; i < matches.length; i++) {
var m = matches[i];
var nextIndex = ahead == true ? m.index : m.index + m.value.length;
if (nextIndex != lastIndex) {
var part = self.substring(lastIndex, nextIndex);
result.push(part);
lastIndex = nextIndex;
}
};
if (lastIndex < self.length) {
var part = self.substring(lastIndex, self.length);
result.push(part);
};
// Substitution of matched string
function getSubst(value) {
var substChar = value[0] == '0' ? '1' : '0';
var subst = '';
for (var i = 0; i < value.length; i++) {
subst += substChar;
}
return subst;
};
}
else {
result.add(self);
};
return result;
};
The test:
test('splitKeep', function () {
// String
deepEqual("1231451".splitKeep('1'), ["1", "231", "451"]);
deepEqual("123145".splitKeep('1', true), ["123", "145"]);
deepEqual("1231451".splitKeep('1', true), ["123", "145", "1"]);
deepEqual("hello man how are you!".splitKeep(' '), ["hello ", "man ", "how ", "are ", "you!"]);
deepEqual("hello man how are you!".splitKeep(' ', true), ["hello", " man", " how", " are", " you!"]);
// Regex
deepEqual("mhellommhellommmhello".splitKeep(/m+/g), ["m", "hellomm", "hellommm", "hello"]);
deepEqual("mhellommhellommmhello".splitKeep(/m+/g, true), ["mhello", "mmhello", "mmmhello"]);
});
I've been using this:
String.prototype.splitBy = function (delimiter) {
var
delimiterPATTERN = '(' + delimiter + ')',
delimiterRE = new RegExp(delimiterPATTERN, 'g');
return this.split(delimiterRE).reduce((chunks, item) => {
if (item.match(delimiterRE)){
chunks.push(item)
} else {
chunks[chunks.length - 1] += item
};
return chunks
}, [])
}
Except that you shouldn't mess with String.prototype, so here's a function version:
var splitBy = function (text, delimiter) {
var
delimiterPATTERN = '(' + delimiter + ')',
delimiterRE = new RegExp(delimiterPATTERN, 'g');
return text.split(delimiterRE).reduce(function(chunks, item){
if (item.match(delimiterRE)){
chunks.push(item)
} else {
chunks[chunks.length - 1] += item
};
return chunks
}, [])
}
So you could do:
var haystack = "aaaaaa<br />† bbbb<br />‡ cccc"
var needle = '<br \/>&#?[a-zA-Z0-9]+;';
var result = splitBy(haystack , needle)
console.log( JSON.stringify( result, null, 2) )
And you'll end up with:
[
"<br />† bbbb",
"<br />‡ cccc"
]
Most of the existing answers predate the introduction of lookbehind assertions in JavaScript in 2018. You didn't specify how you wanted the delimiters to be included in the result. One typical use case would be sentences delimited by punctuation ([.?!]), where one would want the delimiters to be included at the ends of the resulting strings. This corresponds to the fourth case in the accepted answer, but as noted there, that solution only works for single characters. Arbitrary strings with the delimiters appended at the end can be formed with a lookbehind assertion:
'It is. Is it? It is!'.split(/(?<=[.?!])/)
/* [ 'It is.', ' Is it?', ' It is!' ] */
I know that this is a bit late but you could also use lookarounds
var string = "aaaaaa<br />† bbbb<br />‡ cccc";
var array = string.split(/(?<=<br \/>)/);
console.log(array);
I've also came up with this solution. No regex needed, very readable.
const str = "hello world what a great day today balbla"
const separatorIndex = str.indexOf("great")
const parsedString = str.slice(separatorIndex)
console.log(parsedString)

Categories

Resources