I was trying to count words in a text in this way:
function WordCount(str) {
var totalSoFar = 0;
for (var i = 0; i < WordCount.length; i++)
if (str(i) === " ") { // if a space is found in str
totalSoFar = +1; // add 1 to total so far
}
totalsoFar += 1; // add 1 to totalsoFar to account for extra space since 1 space = 2 words
}
console.log(WordCount("Random String"));
I think I have got this down pretty well, except I think that the if statement is wrong. The part that checks if str(i) contains a space and adds 1.
Edit:
I found out (thanks to Blender) that I can do this with a lot less code:
function WordCount(str) {
return str.split(" ").length;
}
console.log(WordCount("hello world"));
Use square brackets, not parentheses:
str[i] === " "
Or charAt:
str.charAt(i) === " "
You could also do it with .split():
return str.split(' ').length;
Try these before reinventing the wheels
from Count number of words in string using JavaScript
function countWords(str) {
return str.trim().split(/\s+/).length;
}
from http://www.mediacollege.com/internet/javascript/text/count-words.html
function countWords(s){
s = s.replace(/(^\s*)|(\s*$)/gi,"");//exclude start and end white-space
s = s.replace(/[ ]{2,}/gi," ");//2 or more space to 1
s = s.replace(/\n /,"\n"); // exclude newline with a start spacing
return s.split(' ').filter(function(str){return str!="";}).length;
//return s.split(' ').filter(String).length; - this can also be used
}
from Use JavaScript to count words in a string, WITHOUT using a regex
- this will be the best approach
function WordCount(str) {
return str.split(' ')
.filter(function(n) { return n != '' })
.length;
}
Notes From Author:
You can adapt this script to count words in whichever way you like.
The important part is s.split(' ').length — this counts the
spaces.
The script attempts to remove all extra spaces (double spaces etc) before counting.
If the text contains two words without a space between them, it will count them as one word, e.g. "First sentence
.Start of next sentence".
One more way to count words in a string. This code counts words that contain only alphanumeric characters and "_", "’", "-", "'" chars.
function countWords(str) {
var matches = str.match(/[\w\d\’\'-]+/gi);
return matches ? matches.length : 0;
}
After cleaning the string, you can match non-whitespace characters or word-boundaries.
Here are two simple regular expressions to capture words in a string:
Sequence of non-white-space characters: /\S+/g
Valid characters between word boundaries: /\b[a-z\d]+\b/g
The example below shows how to retrieve the word count from a string, by using these capturing patterns.
/*Redirect console output to HTML.*/document.body.innerHTML='';console.log=function(s){document.body.innerHTML+=s+'\n';};
/*String format.*/String.format||(String.format=function(f){return function(a){return f.replace(/{(\d+)}/g,function(m,n){return"undefined"!=typeof a[n]?a[n]:m})}([].slice.call(arguments,1))});
// ^ IGNORE CODE ABOVE ^
// =================
// Clean and match sub-strings in a string.
function extractSubstr(str, regexp) {
return str.replace(/[^\w\s]|_/g, '')
.replace(/\s+/g, ' ')
.toLowerCase().match(regexp) || [];
}
// Find words by searching for sequences of non-whitespace characters.
function getWordsByNonWhiteSpace(str) {
return extractSubstr(str, /\S+/g);
}
// Find words by searching for valid characters between word-boundaries.
function getWordsByWordBoundaries(str) {
return extractSubstr(str, /\b[a-z\d]+\b/g);
}
// Example of usage.
var edisonQuote = "I have not failed. I've just found 10,000 ways that won't work.";
var words1 = getWordsByNonWhiteSpace(edisonQuote);
var words2 = getWordsByWordBoundaries(edisonQuote);
console.log(String.format('"{0}" - Thomas Edison\n\nWord count via:\n', edisonQuote));
console.log(String.format(' - non-white-space: ({0}) [{1}]', words1.length, words1.join(', ')));
console.log(String.format(' - word-boundaries: ({0}) [{1}]', words2.length, words2.join(', ')));
body { font-family: monospace; white-space: pre; font-size: 11px; }
Finding Unique Words
You could also create a mapping of words to get unique counts.
function cleanString(str) {
return str.replace(/[^\w\s]|_/g, '')
.replace(/\s+/g, ' ')
.toLowerCase();
}
function extractSubstr(str, regexp) {
return cleanString(str).match(regexp) || [];
}
function getWordsByNonWhiteSpace(str) {
return extractSubstr(str, /\S+/g);
}
function getWordsByWordBoundaries(str) {
return extractSubstr(str, /\b[a-z\d]+\b/g);
}
function wordMap(str) {
return getWordsByWordBoundaries(str).reduce(function(map, word) {
map[word] = (map[word] || 0) + 1;
return map;
}, {});
}
function mapToTuples(map) {
return Object.keys(map).map(function(key) {
return [ key, map[key] ];
});
}
function mapToSortedTuples(map, sortFn, sortOrder) {
return mapToTuples(map).sort(function(a, b) {
return sortFn.call(undefined, a, b, sortOrder);
});
}
function countWords(str) {
return getWordsByWordBoundaries(str).length;
}
function wordFrequency(str) {
return mapToSortedTuples(wordMap(str), function(a, b, order) {
if (b[1] > a[1]) {
return order[1] * -1;
} else if (a[1] > b[1]) {
return order[1] * 1;
} else {
return order[0] * (a[0] < b[0] ? -1 : (a[0] > b[0] ? 1 : 0));
}
}, [1, -1]);
}
function printTuples(tuples) {
return tuples.map(function(tuple) {
return padStr(tuple[0], ' ', 12, 1) + ' -> ' + tuple[1];
}).join('\n');
}
function padStr(str, ch, width, dir) {
return (width <= str.length ? str : padStr(dir < 0 ? ch + str : str + ch, ch, width, dir)).substr(0, width);
}
function toTable(data, headers) {
return $('<table>').append($('<thead>').append($('<tr>').append(headers.map(function(header) {
return $('<th>').html(header);
})))).append($('<tbody>').append(data.map(function(row) {
return $('<tr>').append(row.map(function(cell) {
return $('<td>').html(cell);
}));
})));
}
function addRowsBefore(table, data) {
table.find('tbody').prepend(data.map(function(row) {
return $('<tr>').append(row.map(function(cell) {
return $('<td>').html(cell);
}));
}));
return table;
}
$(function() {
$('#countWordsBtn').on('click', function(e) {
var str = $('#wordsTxtAra').val();
var wordFreq = wordFrequency(str);
var wordCount = countWords(str);
var uniqueWords = wordFreq.length;
var summaryData = [
[ 'TOTAL', wordCount ],
[ 'UNIQUE', uniqueWords ]
];
var table = toTable(wordFreq, ['Word', 'Frequency']);
addRowsBefore(table, summaryData);
$('#wordFreq').html(table);
});
});
table {
border-collapse: collapse;
table-layout: fixed;
width: 200px;
font-family: monospace;
}
thead {
border-bottom: #000 3px double;;
}
table, td, th {
border: #000 1px solid;
}
td, th {
padding: 2px;
width: 100px;
overflow: hidden;
}
textarea, input[type="button"], table {
margin: 4px;
padding: 2px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
<h1>Word Frequency</h1>
<textarea id="wordsTxtAra" cols="60" rows="8">Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.
Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth.</textarea><br />
<input type="button" id="countWordsBtn" value="Count Words" />
<div id="wordFreq"></div>
I think this method is more than you want
var getWordCount = function(v){
var matches = v.match(/\S+/g) ;
return matches?matches.length:0;
}
String.prototype.match returns an array, we can then check the length,
I find this method to be most descriptive
var str = 'one two three four five';
str.match(/\w+/g).length;
The easiest way I've find so far is to use a regex with split.
var calculate = function() {
var string = document.getElementById('input').value;
var length = string.split(/[^\s]+/).length - 1;
document.getElementById('count').innerHTML = length;
};
<textarea id="input">My super text that does 7 words.</textarea>
<button onclick="calculate()">Calculate</button>
<span id="count">7</span> words
This will handle all of the cases and is as efficient as possible. (You don't want split(' ') unless you know beforehand that there are no spaces of greater length than one.):
var quote = `Of all the talents bestowed upon men,
none is so precious as the gift of oratory.
He who enjoys it wields a power more durable than that of a great king.
He is an independent force in the world.
Abandoned by his party, betrayed by his friends, stripped of his offices,
whoever can command this power is still formidable.`;
function wordCount(text = '') {
return text.split(/\S+/).length - 1;
};
console.log(wordCount(quote));//59
console.log(wordCount('f'));//1
console.log(wordCount(' f '));//1
console.log(wordCount(' '));//0
The answer given by #7-isnotbad is extremely close, but doesn't count single-word lines. Here's the fix, which seems to account for every possible combination of words, spaces and newlines.
function countWords(s){
s = s.replace(/\n/g,' '); // newlines to space
s = s.replace(/(^\s*)|(\s*$)/gi,''); // remove spaces from start + end
s = s.replace(/[ ]{2,}/gi,' '); // 2 or more spaces to 1
return s.split(' ').length;
}
function countWords(str) {
var regEx = /([^\u0000-\u007F]|\w)+/g;
return str.match(regEx).length;
}
Explanation:
/([^\u0000-\u007F]|\w) matches word characters - which is great -> regex does the heavy lifting for us. (This pattern is based on the following SO answer: https://stackoverflow.com/a/35743562/1806956 by #Landeeyo)
+ matches the whole string of the previously specified word characters - so we basically group word characters.
/g means it keeps looking till the end.
str.match(regEx) returns an array of the found words - so we count its length.
For those who want to use Lodash can use the _.words function:
var str = "Random String";
var wordCount = _.size(_.words(str));
console.log(wordCount);
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.min.js"></script>
Here's my approach, which simply splits a string by spaces, then for loops the array and increases the count if the array[i] matches a given regex pattern.
function wordCount(str) {
var stringArray = str.split(' ');
var count = 0;
for (var i = 0; i < stringArray.length; i++) {
var word = stringArray[i];
if (/[A-Za-z]/.test(word)) {
count++
}
}
return count
}
Invoked like so:
var str = "testing strings here's a string --.. ? // ... random characters ,,, end of string";
wordCount(str)
(added extra characters & spaces to show accuracy of function)
The str above returns 10, which is correct!
Accuracy is also important.
What option 3 does is basically replace all the but any whitespaces with a +1 and then evaluates this to count up the 1's giving you the word count.
It's the most accurate and fastest method of the four that I've done here.
Please note it is slower than return str.split(" ").length; but it's accurate when compared to Microsoft Word.
See file ops/s and returned word count below.
Here's a link to run this bench test.
https://jsbench.me/ztk2t3q3w5/1
// This is the fastest at 111,037 ops/s ±2.86% fastest
var str = "All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.";
function WordCount(str) {
return str.split(" ").length;
}
console.log(WordCount(str));
// Returns 241 words. Not the same as Microsoft Word count, of by one.
// This is the 2nd fastest at 46,835 ops/s ±1.76% 57.82% slower
var str = "All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.";
function WordCount(str) {
return str.split(/(?!\W)\S+/).length;
}
console.log(WordCount(str));
// Returns 241 words. Not the same as Microsoft Word count, of by one.
// This is the 3rd fastest at 37,121 ops/s ±1.18% 66.57% slower
var str = "All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.";
function countWords(str) {
var str = str.replace(/\S+/g,"\+1");
return eval(str);
}
console.log(countWords(str));
// Returns 240 words. Same as Microsoft Word count.
// This is the slowest at 89 ops/s 17,270 ops/s ±2.29% 84.45% slower
var str = "All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.";
function countWords(str) {
var str = str.replace(/(?!\W)\S+/g,"1").replace(/\s*/g,"");
return str.lastIndexOf("");
}
console.log(countWords(str));
// Returns 240 words. Same as Microsoft Word count.
This one-liner is pretty simple and counts words accurately even if there is more than one whitespace between them:
return string.split(/\s+/).length;
Regex Explanation
Part of Expression
Explanation
\s
Matches any whitespace character
+
Matches the previous token between one and unlimited times
There may be a more efficient way to do this, but this is what has worked for me.
function countWords(passedString){
passedString = passedString.replace(/(^\s*)|(\s*$)/gi, '');
passedString = passedString.replace(/\s\s+/g, ' ');
passedString = passedString.replace(/,/g, ' ');
passedString = passedString.replace(/;/g, ' ');
passedString = passedString.replace(/\//g, ' ');
passedString = passedString.replace(/\\/g, ' ');
passedString = passedString.replace(/{/g, ' ');
passedString = passedString.replace(/}/g, ' ');
passedString = passedString.replace(/\n/g, ' ');
passedString = passedString.replace(/\./g, ' ');
passedString = passedString.replace(/[\{\}]/g, ' ');
passedString = passedString.replace(/[\(\)]/g, ' ');
passedString = passedString.replace(/[[\]]/g, ' ');
passedString = passedString.replace(/[ ]{2,}/gi, ' ');
var countWordsBySpaces = passedString.split(' ').length;
return countWordsBySpaces;
}
its able to recognise all of the following as separate words:
abc,abc = 2 words,
abc/abc/abc = 3 words (works with forward and backward slashes),
abc.abc = 2 words,
abc[abc]abc = 3 words,
abc;abc = 2 words,
(some other suggestions I've tried count each example above as only 1 x word)
it also:
ignores all leading and trailing white spaces
counts a single-letter followed by a new line, as a word - which I've found some of the suggestions given on this page don't count, for example:
a
a
a
a
a
sometimes gets counted as 0 x words, and other functions only count it as 1 x word, instead of 5 x words)
if anyone has any ideas on how to improve it, or cleaner / more efficient - then please add you 2 cents!
Hope This Helps Someone out.
let leng = yourString.split(' ').filter(a => a.trim().length > 0).length
Here's a function that counts number of words in an HTML code:
$(this).val()
.replace(/(( )|(<[^>]*>))+/g, '') // remove html spaces and tags
.replace(/\s+/g, ' ') // merge multiple spaces into one
.trim() // trim ending and beginning spaces (yes, this is needed)
.match(/\s/g) // find all spaces by regex
.length // get amount of matches
I'm not sure if this has been said previously, or if it's what is needed here, but couldn't you make the string an array and then find the length?
let randomString = "Random String";
let stringWords = randomString.split(' ');
console.log(stringWords.length);
var str = "Lorem ipsum dolor sit amet consectetur adipisicing elit. Labore illum fuga magni exercitationem porro? Eaque tenetur tempora nesciunt laborum deleniti, quidem nemo consequuntur voluptate alias ad soluta, molestiae, voluptas libero!" ;
let count = (str.match(/\s/g) || []).length;
console.log(count + 1 );
countWords =(str )=>{
let count = ( str.match(/\s/g) || [] ).length;
count = (count == 0 ) ? 0 : count +1 ;
return count
}
function countWords(str) {
str = str.replace(/(^\s*)|(\s*$)/gi,"");
str = str.replace(/[ ]{2,}/gi," ");
str = str.replace(/\n /,"\n");
return str.split(' ').length;
}
document.write(countWords(" this function remove extra space and count the real string lenth"));
I know its late but this regex should solve your problem. This will match and return the number of words in your string. Rather then the one you marked as a solution, which would count space-space-word as 2 words even though its really just 1 word.
function countWords(str) {
var matches = str.match(/\S+/g);
return matches ? matches.length : 0;
}
You got some mistakes in your code.
function WordCount(str) {
var totalSoFar = 0;
for (var i = 0; i < str.length; i++) {
if (str[i] === " ") {
totalSoFar += 1;
}
}
return totalSoFar + 1; // you need to return something.
}
console.log(WordCount("Random String"));
There is another easy way using regular expressions:
(text.split(/\b/).length - 1) / 2
The exact value can differ about 1 word, but it also counts word borders without space, for example "word-word.word". And it doesn't count words that don't contain letters or numbers.
I think this answer will give all the solutions for:
Number of characters in a given string
Number of words in a given string
Number of lines in a given string
function NumberOf() {
var string = "Write a piece of code in any language of your choice that computes the total number of characters, words and lines in a given text. \n This is second line. \n This is third line.";
var length = string.length; //No of characters
var words = string.match(/\w+/g).length; //No of words
var lines = string.split(/\r\n|\r|\n/).length; // No of lines
console.log('Number of characters:',length);
console.log('Number of words:',words);
console.log('Number of lines:',lines);
}
NumberOf();
First you need to find length of the given string by string.length
Then you can find number of words by matching them with string string.match(/\w+/g).length
Finally you can split each line like this string.length(/\r\n|\r|\n/).length
I hope this can help those who are searching for these 3 answers.
If you want to count specific words
function countWholeWords(text, keyword) {
const times = text.match(new RegExp(`\\b${keyword}\\b`, 'gi'));
if (times) {
console.log(`${keyword} occurs ${times.length} times`);
} else {
console.log(keyword + " does not occurs")
}
}
const text = `
In a professional context it often happens that private or corporate clients corder a publication to be
made and presented with the actual content still not being ready. Think of a news blog that's
filled with content hourly on the day of going live. However, reviewers tend to be distracted
by comprehensible content, say, a random text copied from a newspaper or the internet.
`
const wordsYouAreLookingFor = ["random", "cat", "content", "reviewers", "dog", "with"]
wordsYouAreLookingFor.forEach((keyword) => countWholeWords(text, keyword));
// random occurs 1 times
// cat does not occurs
// content occurs 3 times
// reviewers occurs 1 times
// dog does not occurs
// with occurs 2 times
You can use this algorithm :
app.js :
const TextArea = document.querySelector('textarea');
const CountContainer = document.querySelector('#demo');
TextArea.addEventListener('keypress', () => {
let TextValue = TextArea.value.split(' ').join('-').split('\n').join('-').split('-');
let WordCountArray = TextValue.filter(el => {
return el != '';
});
let WordSen = WordCountArray.length <= 1 ? 'Word' : 'Words';
console.log(WordCountArray);
CountContainer.textContent = WordCountArray.length + ' ' + WordSen;
});
TextArea.addEventListener('keyup', function () {
if (this.value === '') CountContainer.textContent = '0 Word';
});
HTML index page for test:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
</head>
<body>
<textarea cols="30" rows="10"></textarea>
<div id="demo"></div>
<script src="app.js"></script>
</body>
</html>
Adapted from internals-in answer
It handles the edge case ' ' as well
export const countWords = (str: string) => {
str = str.trim();
if (!str.length) {
return str.length
}
return str.trim().split(/\s+/).length;
}
Jest tests
test("countwords", () => {
expect(countWords(' ')).toBe(0)
expect(countWords('78 7 ')).toBe(2)
expect(countWords('78 7 ')).toBe(2)
expect(countWords('aa, , 7')).toBe(3)
expect(countWords('aa, , \n \n \t 7 \n 4')).toBe(4)
});
This snippet will compute how many words are in the sentence:
let para = "hello world I am javascript";
console.log(para.split(" ").filter((x) => x !== "").length)
<textarea name="myMessage" onkeyup="wordcount(this.value)"></textarea>
<script type="text/javascript">
var cnt;
function wordcount(count) {
var words = count.split(/\s/);
cnt = words.length;
var ele = document.getElementById('w_count');
ele.value = cnt;
}
document.write("<input type=text id=w_count size=4 readonly>");
</script>
function totalWordCount() {
var str ="My life is happy"
var totalSoFar = 0;
for (var i = 0; i < str.length; i++)
if (str[i] === " ") {
totalSoFar = totalSoFar+1;
}
totalSoFar = totalSoFar+ 1;
return totalSoFar
}
console.log(totalWordCount());
function WordCount(str) {
var totalSoFar = 0;
for (var i = 1; i < str.length; i++) {
if (str[i] === " ") {
totalSoFar ++;
}
}
return totalSoFar;
}
console.log(WordCount("hi my name is raj));
Related
Okay, to start with I should mention this is a very small personal project, and I've only have a handful of coding classes several years ago now. I can figure out a lot of the (very) basics, but have a hard time troubleshooting. I'm in a little bit over my head here, and need a dumbed down solution.
I'm trying to put together a VERY simple translator that takes in a word or sentence from the user via a text input box, puts each word of the string into an array, translates each word in order, then spits out each translated word in the order it was input. For example, typing "I like cats" would output "Ich mag Katze" in German.
I've got most of it, but I CAN'T get anything but the first array element to translate. It comes out like "Ich like cats".
I've used a loop, probably because I'm an amateur and don't know another way of doing this, and I'd rather not use any libraries or anything. This is a very small project I want to have a couple of friends utilize locally; and I know there has to be some very simple code that will just take a string, put it into an array, swap one word for another word, and then output the results, but I'm damned if I can make it work.
What I currently have is the closest I've gotten, but like I said, it doesn't work. I've jerry-rigged the loop and clearly that's the totally wrong approach, but I can't see the forest for the trees. If you can help me, please make it "Javascript for Babies" picture book levels of simple, I cannot stress enough how inexperienced I am. This is just supposed to be a fun little extra thing for my D&D group.
function checkForTranslation(input, outputDiv) {
var input = document.getElementById("inputTextField").value;
var outputDiv = document.getElementById("translationOutputDiv");
input = input.toLowerCase();
//puts user input into an array and then outputs it word by word
const myArray = input.split(" "); //added .split, thank you James, still otherwise broken
let output = "";
let translation = "";
for (let i = 0; i < myArray.length; i++) {
output += myArray[i]; //up to here, this works perfectly to put each word in the string into an array
//prints all words but doesnt translate the second onwards
translation += myArray[i];
if (output == "") {
//document.getElementById("print2").innerHTML = "Translation Here";
}
else if (output == "apple") {
translation = "x-ray";
}
else if (output == "banana") {
translation = "yak";
}
else {
translation = "???";
}
output += " "; //adds a space when displaying original user input
} // END FOR LOOP
document.getElementById("print").innerHTML = output; //this outputs the original user input to the screen
document.getElementById("print3").innerHTML = translation; //this should output the translated output to the screen
} // END FUNCTION CHECKFORTRANSLATION
What it looks like
P.S. I'm not worried about Best Practices here, this is supposed to be a quickie project that I can send to a couple friends and they can open the HTML doc, saved locally, in their browser when they want to mess around with it if they want their half-orc character to say "die by my hammer!" or something. If you have suggestions for making it neater great, but I'm not worried about a mess, no one is going to be reading this but me, and hopefully once it's fixed I'll never have to read it again either!
Since it is a manual simple translation, you should just create a "dictionary" and use it to get the translations.
var dictionary = {
"apple": "x-ray",
"banana": "yak"
}
function checkForTranslation() {
var input = document.getElementById("inputTextField").value.toLowerCase();
var words = input
.split(' ') // split string to words
.filter(function(word) { // remove empty words
return word.length > 0
});
var translatedWords = words.map(function(word) {
var wordTranslation = dictionary[word]; // get from dictionary
if (wordTranslation) {
return wordTranslation;
} else { // if word was not found in dictionary
return "???";
}
});
var translatedText = translatedWords.join(' ');
document.getElementById("translationOutputDiv").innerHTML = translatedText;
}
document.getElementById('translate').addEventListener('click', function() {
checkForTranslation();
});
<input type="text" id="inputTextField" />
<button id="translate">translate</button>
<br/>
<hr />
<div id="translationOutputDiv"></div>
Or if you want it a little more organized, you could use
const dictionary = {
"apple": "x-ray",
"banana": "yak"
}
function getTranslation(string) {
return string
.toLowerCase()
.split(' ')
.filter(word => word)
.map(word => dictionary[word] || '???')
.join(' ');
}
function translate(inputEl, outputEl) {
outputEl.innerHTML = getTranslation(inputEl.value);
}
document.querySelector('#translate').addEventListener('click', function() {
const input = document.querySelector('#inputTextField');
const output = document.querySelector('#translationOutputDiv');
translate(input, output);
});
<input type="text" id="inputTextField" />
<button id="translate">translate</button>
<br/>
<hr />
<div id="translationOutputDiv"></div>
I have seen multiple posts on here finding and highlighting strings but none have worked as expected yet. Below is my script currently:
var str = 'word';
var divs= document.getElementsByClassName('strings');
for (var i = 0, len = divs.length; i < len; ++i) {
if(divs[i].innerHTML.indexOf(str) !== -1) {
// something
console.log('YES');
str.replace(/(\w+) (\w+)/, '<div class="strings">$1</div> <div class="strings">$2</div>');
}else{
console.log('NO');
}
}
HTML:
<div class="strings">word word words</div>
Ideally this would highlight every instance of the string in my div each time the js is run as a function.
The code you posted is on the right track and regex replace is convenient, but be very careful that, in addition to using the correct logic, you're not opening yourself up to XSS attacks or regex escaping problems by sanitizing your input field (although the XSS issue would be mainly problematic if the user supplies the target/source text).
Use the "gi" flags on the regex to make your search case-insensitive (I used a checkbox to toggle), and feel free to loop over multiple text areas you'd like to search when updating (I left it as one for simplicity). Add \b to the regex to enforce strict word boundaries (also toggleable in the below example). You can also use basically any element or styling on your highlighted element. <mark> seems most semantic.
Lastly, it's worth ensuring that the search term doesn't consist of an empty string, which would add a lot of garbage highlight tags between every character of the text.
const escapeHTML = html => {
const ta = document.createElement("textarea");
ta.textContent = html;
return ta.innerHTML;
};
const escapeRegex = s => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
const highlight = (searchTerm, originalText, caseIns, boundaries) => {
const pattern = boundaries ? `(\\b${searchTerm}\\b)` : `(${searchTerm})`;
return searchTerm ? originalText.replace(
RegExp(pattern, "g" + (caseIns ? "i" : "")), "<mark>$1</mark>"
) : originalText;
};
const output = document.querySelector("#output");
const originalText = output.innerText;
let caseIns = false;
let boundaries = false;
let searchTerm = "";
document.querySelector("#ignore-case").addEventListener("change", e => {
caseIns = e.target.checked;
output.innerHTML = highlight(searchTerm, originalText, caseIns, boundaries);
});
document.querySelector("#word-boundaries").addEventListener("change", e => {
boundaries = e.target.checked;
output.innerHTML = highlight(searchTerm, originalText, caseIns, boundaries);
});
document.querySelector("input").addEventListener("keyup", e => {
searchTerm = escapeHTML(escapeRegex(e.target.value));
output.innerHTML = highlight(searchTerm, originalText, caseIns, boundaries);
});
div:first-child {
display: flex;
align-items: center;
margin-bottom: 1em;
}
span {
margin-left: 1em;
}
mark { /* add styling here */
border-radius: 2px;
}
<div>
<input placeholder="search term" />
<span>Ignore case? <input type="checkbox" id="ignore-case" /></span>
<span>Word boundaries? <input type="checkbox" id="word-boundaries" /></span>
</div>
<div id="output">Fourscore and seven years ago our fathers brought forth, on this continent, a new nation, conceived in liberty, and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived, and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting-place for those who here gave their lives, that that nation might live. It is altogether fitting and proper that we should do this. But, in a larger sense, we cannot dedicate, we cannot consecrate—we cannot hallow—this ground. The brave men, living and dead, who struggled here, have consecrated it far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us—that from these honored dead we take increased devotion to that cause for which they here gave the last full measure of devotion—that we here highly resolve that these dead shall not have died in vain—that this nation, under God, shall have a new birth of freedom, and that government of the people, by the people, for the people, shall not perish from the earth.</div>
You're using the replace() method on the needle and not on the hay stack. You want to find the str in the innerHTML attribute and then replace the innerHTML attribute with a copy that is surrounding the given str found with `' tags.
Because you're using a variable for the regex search you need to first make a regex object and inject the needle string into it. also give it the g flag so it matches every instance of the found regex:
var regex = new RegExp(`${str}`, ['g']);
Then you manipulate the innerHTML attribute of the div element:
divs[i].innerHTML = divs[i].innerHTML.replace(regex, `<span class"highlighted">${str}</span>`);
Now the script will look for the word and wrap it a span with .highlighted class.
So now all that's left to do is fix the css to handle it:
.highlighted {
background-color: yellow;
}
I am a fresh with JavaScript. I just tried a lot, but did not get the answer and information to show how to count occurrence of multiple sub-string in a long string at one time.
Further information: I need get the occurrence of these sub-string and if the number of their occurrence to much, I need replace them at one time,so I need get the occurrence at one time.
Here is an example:
The long string Text as below,
Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.
The sub-string is a question, but what I need is to count each word occurrence in this sub-string at one time. for example, the word "name","NFL","championship","game" and "is","the" in this string.
What is the name of NFL championship game?
One of problems is some sub-string is not in the text, and some have shown many times.(which I might replaced it)
The Code I have tried as below, it is wrong, I have tried many different ways but no good results.
$(".showMoreFeatures").click(function(){
var text= $(".article p").text(); // This is to get the text.
var textCount = new Array();
// Because I use match, so for the first word "what", will return null, so
this is to avoid this null. and I was plan to get the count number, if it is
more than 7 or even more, I will replace them.
var qus = item2.question; //This is to get the sub-string
var checkQus = qus.split(" "); // I split the question to words
var newCheckQus = new Array();
// This is the array I was plan put the sub-string which count number less than 7, which I really needed words.
var count = new Array();
// Because it is a question as sub-string and have many words, so I wan plan to get their number and put them in a array.
for(var k =0; k < checkQus.length; k++){
textCount = text.match(checkQus[k],"g")
if(textCount == null){
continue;
}
for(var j =0; j<checkQus.length;j++){
count[j] = textCount.length;
}
//count++;
}
I was tried many different ways, and searched a lot, but no good results. The above code just want to show what I have tried and my thinking(might totally wrong). But actually it is not working , if you know how to implement it,solve my problem, please just tell me, no need to correct my code.
Thanks very much.
If I have understood the question correctly then it seems you need to count the number of times the words in the question (que) appear in the text (txt)...
var txt = "Super Bowl 50 was an American ...etc... Arabic numerals 50.";
var que = "What is the name of NFL championship game?";
I'll go through this in vanilla JavaScript and you can transpose it for JQuery as required.
First of all, to focus on the text we can make things a little simpler by changing the strings to lowercase and removing some of the punctuation.
// both strings to lowercase
txt = txt.toLowerCase();
que = que.toLowerCase();
// remove punctuation
// using double \\ for proper regular expression syntax
var puncArray = ["\\,", "\\.", "\\(", "\\)", "\\!", "\\?"];
puncArray.forEach(function(P) {
// create a regular expresion from each punctuation 'P'
var rEx = new RegExp( P, "g");
// replace every 'P' with empty string (nothing)
txt = txt.replace(rEx, '');
que = que.replace(rEx, '');
});
Now we can create a cleaner array from str and que as well as a hash table from que like so...
// Arrays: split at every space
var txtArray = txt.split(" ");
var queArray = que.split(" ");
// Object, for storing 'que' counts
var queObject = {};
queArray.forEach(function(S) {
// create 'queObject' keys from 'queArray'
// and set value to zero (0)
queObject[S] = 0;
});
queObject will be used to hold the words counted. If you were to console.debug(queObject) at this point it would look something like this...
console.debug(queObject);
/* =>
queObject = {
what: 0,
is: 0,
the: 0,
name: 0,
of: 0,
nfl: 0,
championship: 0,
game: 0
}
*/
Now we want to test each element in txtArray to see if it contains any of the elements in queArray. If the test is true we'll add +1 to the equivalent queObject property, like this...
// go through each element in 'queArray'
queArray.forEach(function(A) {
// create regular expression for testing
var rEx = new RegExp( A );
// test 'rEx' against elements in 'txtArray'
txtArray.forEach(function(B) {
// is 'A' in 'B'?
if (rEx.test(B)) {
// increase 'queObject' property 'A' by 1.
queObject[A]++;
}
});
});
We use RegExp test method here rather than String match method because we just want to know if "is A in B == true". If it is true then we increase the corresponding queObject property by 1. This method will also find words inside words, such as 'is' in 'San Francisco' etc.
All being well, logging queObject to the console will show you how many times each word in the question appeared in the text.
console.debug(queObject);
/* =>
queObject = {
what: 0
is: 2
the: 17
name: 0
of: 2
nfl: 1
championship: 0
game: 4
}
*/
Hoped that helped. :)
See MDN for more information on:
Array.forEach()
Object.keys()
RegExp.test()
So, I'm new to programming, but I'm trying to learn JavaScript. Currently I'm working on a project where I'm trying to parse a large text file (the 154 sonnets of Shakespeare found here) into an object array, in the following data structure:
var obj = {
property 1: [ 'value 1',
'value 2',
],
property 2: [ 'value 1',
'value 2',
],
etc., where roman numerals represent object properties and each line of the sonnet represents a value in each property's array.
I must use regular expressions to parse through the text file. So far I've been searching for the correct regexp to demarcate the text, but I don't know if I'm going about this the right way. Ultimately I want to create a drop down menu where each value in the list is a sonnet.
Edit: I'm actually now taking the source text from this url: http://pizzaboys.biz/xxx/sonnets.php
and doing the same thing as above, but instead of doing a $get I've put the text into a variable...
I've tried this:
$(document).ready(function(){
var data = new SonnetizerArray();
});
function SonnetizerArray(){
this.data = [];
var rawText = "text from above link"
var rx = /^\\n[CDILVX]/$\\n/g;
var array_of_sonnets = rawText.exec(rx);
for (var i = 0; i < array_of_sonnets.length; i ++){
var s = $.split(array_of_sonnets[i]);
if (s.length > 0) this.data.push(s);
}
}
Description
This regex will parse the text into a roman numeral and body. The body can then be split on the new line \n.
^\s+\b([CDMLXVI]{1,12})\b(?:\r|\n|$).*?(?:^.*?)(^.*?)(?=^\s+\b([MLXVI]{1,12})\b(?:\r|\n|$)|\Z)
Capture Groups
Group 0 get the entire matching section
gets the roman numeral
gets the body of the section, not including the roman numeral
Javascript Code Example:
Sample text pulled from your link
VII
Lo! in the orient when the gracious light
Lifts up his burning head, each under eye
Doth homage to his new-appearing sight,
VIII
Music to hear, why hear'st thou music sadly?
Sweets with sweets war not, joy delights in joy:
Why lov'st thou that which thou receiv'st not gladly,
Or else receiv'st with pleasure thine annoy?
IX
Is it for fear to wet a widow's eye,
That thou consum'st thy self in single life?
Ah! if thou issueless shalt hap to die,
The world will wail thee like a makeless wife;
Example code
<script type="text/javascript">
var re = /^\s+\b([MLXVI]{1,12})\b(?:\r|\n|$).*?(?:^.*?)(^.*?)(?=^\s+\b([MLXVI]{1,12})\b(?:\r|\n|$)|\Z)/;
var sourcestring = "source string to match with pattern";
var results = [];
var i = 0;
for (var matches = re.exec(sourcestring); matches != null; matches = re.exec(sourcestring)) {
results[i] = matches;
for (var j=0; j<matches.length; j++) {
alert("results["+i+"]["+j+"] = " + results[i][j]);
}
i++;
}
</script>
Sample output
$matches Array:
(
[0] => Array
(
[0] => VII
Lo! in the orient when the gracious light
Lifts up his burning head, each under eye
Doth homage to his new-appearing sight,
[1] =>
VIII
Music to hear, why hear'st thou music sadly?
Sweets with sweets war not, joy delights in joy:
Why lov'st thou that which thou receiv'st not gladly,
Or else receiv'st with pleasure thine annoy?
[2] =>
IX
Is it for fear to wet a widow's eye,
That thou consum'st thy self in single life?
Ah! if thou issueless shalt hap to die,
The world will wail thee like a makeless wife;
)
[1] => Array
(
[0] => VII
[1] => VIII
[2] => IX
)
[2] => Array
(
[0] =>
Lo! in the orient when the gracious light
Lifts up his burning head, each under eye
Doth homage to his new-appearing sight,
[1] =>
Music to hear, why hear'st thou music sadly?
Sweets with sweets war not, joy delights in joy:
Why lov'st thou that which thou receiv'st not gladly,
Or else receiv'st with pleasure thine annoy?
[2] =>
Is it for fear to wet a widow's eye,
That thou consum'st thy self in single life?
Ah! if thou issueless shalt hap to die,
The world will wail thee like a makeless wife;
)
[3] => Array
(
[0] => VIII
[1] => IX
[2] =>
)
)
Roman numeral validation
The above expression only tests the roman numeral string is composed of roman numeral characters, it doesn't actually validate the number is valid. If you need to validate the roman numeral is correctly formatted too, then you could use this expression.
^\s+\b(M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3}))\b(?:\r|\n|$).*?(?:^.*?)(^.*?)(?=^\s+\b([MLXVI]{1,12})\b(?:\r|\n|$)|\Z)
These strings may be long paragraphs, so I'm not sure it's best to split the entire string with a space delimiter. I'm trying to get, say, the first 10 words and wrap them in a span:
'<span class="easing">' + string + '</span>'
Then rejoin that with the second half of the original split. Suggestions on a super efficient way to do this? It would affect at most three paragraphs on the page at a time.
EDITED
Here's a kicker — The split should occur after the 9th word OR at the end of the first sentence (if that sentence is less than 9 words).
EXAMPLE
var origString = 'Coming into the world on Elvis’ birthday with a doctor named Presley seemed fortuitous until, wielding the silvery smooth scalpel in his aged unsteady hand, the doctor sliced through the walls of my mother’s uterus and into my unborn skin. Inside the warm soothing waters of my mother’s womb, inside the silent weightlessness, I was safe. Then the prick of cold steel marked the first in a series of rude awakenings. I was scarred for life even before birth.';
var newString = '<span="easing">Coming into the world on Elvis’ birthday with a doctor</span> named Presley seemed fortuitous until, wielding the silvery smooth scalpel in his aged unsteady hand, the doctor sliced through the walls of my mother’s uterus and into my unborn skin. Inside the warm soothing waters of my mother’s womb, inside the silent weightlessness, I was safe. Then the prick of cold steel marked the first in a series of rude awakenings. I was scarred for life even before birth.';
Or with a short sentence that starts the paragraph:
var origString = '“Is he okay? Tell me everything’s okay” she pleas, her desperate need to confirm my health competing with her own need for consolation.';
var newString = '<span class="easing">“Is he okay?</span> Tell me everything’s okay” she pleas, her desperate need to confirm my health competing with her own need for consolation.';
Considering you are only going to be scanning at most about 100 chars (unless you have URIs or very long words) then scanning character by character is quite optimal. You could optimise this by using .indexOf() in certain places, but you'd loose what you gained in having to check for each different character that could terminate a sentence.
function spanomatic ( str, words ) {
var i, l, c;
for ( i=0, l=str.length; i<l; i++ ) {
c = str.charAt(i);
if ( c == ' ' ) {
if ( words-- <= 0 ) {
str = '<span>'+str.substring(0,i)+'</span>'+str.substring(i);
break;
}
}
else if ( ('?!.;:').indexOf(c) != -1 ) {
str = '<span>'+str.substring(0,i)+'</span>'+str.substring(i);
break;
}
}
return str;
}
spanomatic ( 'Pass your string here', 9 );
(The above code assumes your text will always be correctly gramatically termintated (i.e. contain at least one of ?!.;:) - if not then it would be possible for a paragraph with less than 9 words to end up spanless. This could be fixed by a few changes however...)
note for future readers
If you're going for a 'super efficient' way of doing string searching avoid Regular Expressions (unless you really need their power). The accepted answer for this question is concise and nicely put together function - don't get me wrong - but it's about 70% slower than just scanning the string with a for loop (in my tests on FireFox & Chrome at least)... and that's even when comparing after moving the Regular Expression definitions outside of Bergi's function (i.e. using pre-compiled regexps rather than recreating them every time the function is called).
http://jsperf.com/compare-regexp-vs-char-scanning
return string.replace(/.+?[,.?!]|.+$/, function(match, index, string){
var words = match.split(/\s+/);
words[ words.length<10 ? words.length-1 : 9 ] += '</span>';
return '<span class="easing">' + words.join(" ");
});
This matches the first sentence-like thing (or the whole string - unless linebreaks), and wraps the first 10 words of it in that span. Works for both your sample inputs, but also on smaller ones. Returns the empty string for an empty string, change the regex to …|.*$ if you want an empty span.
Here. It's a bit code-golfy though. Sorry.
$( 'p' ).html(function ( i, text ) {
var re = /(.+?)\s/g, c = 0, res;
while ( res = re.exec( text ) ) if ( ++c === 10 || res[1].slice( -1 ) === '.' ) break;
var p = re.lastIndex;
return '<span class="easing">' + text.slice( 0, p ) + '</span>' + text.slice( p );
});
Live demo: http://jsfiddle.net/3DaEV/
How about this code:
var str = 'asda adsfadsf asdfadfadsf adsfsdafadf. adfadfadfad adfadfdaf adfadfadf adfadf \afsgasfggasfg SFGDFGDSFGH dfghdsghdgas hadghdagh';
var sentences = [], words = str.split(' ');
for (var i = 0; i < 9; i++) {
if (words[i].lastIndexOf('.') !== -1) {
sentences.push(words[i]);
break;
} else {
sentences.push(words[i]);
}
}
words.slice(sentences.length, words.length);
$('<span>' + sentences.join(' ') + '</span>').appendTo($('#log'));
I have it under fiddle so you can test. You would want to do this in a loop with the remainder of arr1.
Update:
If it's not just the full stop but also ?!:;etc. then create a RegExp and test instead of doing lastIndexOf('.')