Getting line number from index of character in file - javascript

I have a string input which consists of words. I am using regex.exec (g) to get all the words by function getWord(input)
So my input may look like this:
word word2
someword blah
What I get from from exec is object containing index of match. So it is array like:
[ 'word', index: 0, input: "..."]
...
[ 'someword', index: 11, input: "..."]
...
What I need is to easily calculate that word "someword" is on line 2 by using the index(11) (as I don't have any other value telling me what is the number of lines)
Here is what I came up with: Match '\n's until you match \n with higher index then is index of word. Not sure if this may not be problematic in 10k lines file.
Snippet for idea:
getLineFromIndex: (index, input) ->
regex = /\n/g
line = 1
loop
match = regex.exec(input)
break if not match? or match.index > index
line++
return line
Kinda big optimalization can be done here. I can save the regex and last match, so I won't iterate all the input every time I want to check for line number. Regex will then be executed only when the last match has lower index then current index.
This is the final idea with optimization:
###
#variable content [String] is input content
###
getLineFromIndex: (index) ->
#lineMatcher = #lineMatcher || /\n/g
#lastLine = #lastLine || 1
if #eof isnt true
#lastMatch = #lastMatch || #lineMatcher.exec(#content)
if #eof or index < #lastMatch.index
return #lastLine
else
match = #lineMatcher.exec(#content)
if not #eof and match is null
#eof = true
else
#lastMatch = match
#lastLine++
return #lastLine

Cut input (a.substr(0, 11)).
Split it (a.substr(0, 11).split('\n')).
Count it (a.substr(0, 11).split('\n').length).

Your pseudo-code seems to do the job.
But I do not see how you can infer the line number by the offset of the searched word.
I would split the input text by lines, then look over the array for the searched word, and if found return the line index.
var input= "word word2 \n"+
"someword blah";
function getLinesNumberOf( input, word){
var line_numbers=[];
input.split("\n").forEach(function(line, index){
if( line.indexOf(word)>=0 ) line_numbers.push(index);
});
return line_numbers;
}
console.log(getLinesNumberOf(input,"someword"));
I have add support for multiple occurences of the searched word.
edit
To avoid too memory consumption with large inputs, you can parse sequentially (for the same avantanges of SAX vs DOM):
function getLinesNumberOf( word, input ){
input+= "\n";//make sure to not miss the last line;
var line_numbers=[], current_line=0;
var startline_offset=0;
do{
//get the offset next of the next breakline
endline_offset= input.indexOf("\n",startline_offset);
//get the offset of the searched word in the line
word_offset= input.substring(startline_offset,endline_offset).indexOf(word, 0);
//check if the searched word has been found and if it has been found on current_line
if( word_offset >= 0 && word_offset < endline_offset ) {
//if true the current_line is stored
line_numbers.push(current_line);
}
//the offset of the next line is just after the breakline offset
startline_offset= endline_offset+1;
current_line++;
}while(endline_offset>=0);//we continue while a breakline is found
console.log(line_numbers);
}

Related

JS Regex returning -1 & 0

I was tasked with the following:
take a string
print each of the vowels on a new line (in order) then...
print each of the consonants on a new line (in order)
The problem I found was with the regex. I originally used...
/[aeiouAEIOU\s]/g
But this would return 0 with a vowel and -1 with a consonant (so everything happened in reverse).
I really struggled to understand why and couldn't for the life of me find the answer. In the end it was simple enough to just invert the string but I want to know why this is happening the way it is. Can anyone help?
let i;
let vowels = /[^aeiouAEIOU\s]/g;
let array = [];
function vowelsAndConsonants(s) {
for(i=0;i<s.length;i++){
//if char is vowel then push to array
if(s[i].search(vowels)){
array.push(s[i]);
}
}
for(i=0;i<s.length;i++){
//if char is cons then push to array
if(!s[i].search(vowels)){
array.push(s[i]);
}
}
for(i=0;i<s.length;i++){
console.log(array[i]);
}
}
vowelsAndConsonants("javascript");
if(vowels.test(s[i])){ which will return true or false if it matches, or
if(s[i].search(vowels) !== -1){ and if(s[i].search(vowels) === -1){
is what you want if you want to fix your code.
-1 is not falsey so your if statement will not function correctly. -1 is what search returns if it doesn't find a match. It has to do this because search() returns the index position of the match, and the index could be anywhere from 0 to Infinity, so only negative numbers are available to indicate non-existent index:
MDN search() reference
Below is a RegEx that matches vowel OR any letter OR other, effectively separating out vowel, consonant, everything else into 3 capture groups. This makes it so you don't need to test character by character and separate them out manually.
Then iterates and pushes them into their respective arrays with a for-of loop.
const consonants = [], vowels = [], other = [];
const str = ";bat cat set rat. let ut cut mut,";
for(const [,cons,vow,etc] of str.matchAll(/([aeiouAEIOU])|([a-zA-Z])|(.)/g))
cons&&consonants.push(cons) || vow&&vowels.push(vow) || typeof etc === 'string'&&other.push(etc)
console.log(
consonants.join('') + '\n' + vowels.join('') + '\n' + other.join('')
)
There are a couple of inbuilt functions available:
let some_string = 'Mary had a little lamb';
let vowels = [...some_string.match(/[aeiouAEIOU\s]/g)];
let consonents = [...some_string.match(/[^aeiouAEIOU\s]/g)];
console.log(vowels);
console.log(consonents);
I think that you don't understand correctly how your regular expression works. In the brackets you have only defined a set of characters you want to match /[^aeiouAEIOU\s]/g and further by using the caret [^]as first in your group, you say that you want it to match everything but the characters in the carets. Sadly you don't provide an example of input and expected output, so I am only guessing, but I thing you could do the following:
let s = "avndexleops";
let keep_vowels = s.replace(/[^aeiouAEIOU\s]/g, '');
console.log(keep_vowels);
let keep_consonants = s.replace(/[aeiouAEIOU\s]/g, '');
console.log(keep_consonants);
Please provide example of expected input and output.
You used:
/[^aeiouAEIOU\s]/g
Instead of:
/[aeiouAEIOU\s]/g
^ means "not", so your REGEX /[^aeiouAEIOU\s]/g counts all the consonants.

Splitting a string based on max character length, but keep words into account

So In my program I can receive strings of all kinds of lengths and send them on their way to get translated. If those strings are of a certain character length I receive an error, so I want to check & split those strings if necessary before that. BUT I can't just split the string in the middle of a word, the words themself also need to be intact & taken into account.
So for example:
let str = "this is an input example of one sentence that contains a bit of words and must be split"
let splitStringArr = [];
// If string is larger than X (for testing make it 20) characters
if(str.length > 20) {
// Split string sentence into smaller strings, keep words intact
//...
// example of result would be
// splitStringArr = ['this is an input', 'example of one sentence' 'that contains...', '...']
// instead of ['this is an input exa' 'mple of one senten' 'ce that contains...']
}
But I'm not sure how to split a sentence and still keep into account the sentence length.
Would a solution for this be to iterate over the string, add every word to it and check every time if it is over the maximum length, otherwise start a new array index, or are there better/existing methods for this?
You can use match and lookahead and word boundaries, |.+ to take care string at the end which are less then max length at the end
let str = "this is an input example of one sentence that contains a bit of words and must be split"
console.log(str.match(/\b[\w\s]{20,}?(?=\s)|.+$/g))
Here's an example using reduce.
const str = "this is an input example of one sentence that contains a bit of words and must be split";
// Split up the string and use `reduce`
// to iterate over it
const temp = str.split(' ').reduce((acc, c) => {
// Get the number of nested arrays
const currIndex = acc.length - 1;
// Join up the last array and get its length
const currLen = acc[currIndex].join(' ').length;
// If the length of that content and the new word
// in the iteration exceeds 20 chars push the new
// word to a new array
if (currLen + c.length > 20) {
acc.push([c]);
// otherwise add it to the existing array
} else {
acc[currIndex].push(c);
}
return acc;
}, [[]]);
// Join up all the nested arrays
const out = temp.map(arr => arr.join(' '));
console.log(out);
What you are looking for is lastIndexOf
In this example, maxOkayStringLength is the max length the string can be before causing an error.
myString.lastIndexOf(/\s/,maxOkayStringLength);
-- edit --
lastIndexOf doesn't take a regex argument, but there's another post on SO that has code to do this:
Is there a version of JavaScript's String.indexOf() that allows for regular expressions?
I would suggest:
1) split string by space symbol, so we get array of words
2) starting to create string again selecting words one by one...
3) if next word makes the string exceed the maximum length we start a new string with this word
Something like this:
const splitString = (str, lineLength) => {
const arr = ['']
str.split(' ').forEach(word => {
if (arr[arr.length - 1].length + word.length > lineLength) arr.push('')
arr[arr.length - 1] += (word + ' ')
})
return arr.map(v => v.trim())
}
const str = "this is an input example of one sentence that contains a bit of words and must be split"
console.log(splitString(str, 20))

Split a string into an array of 36 characters in each line without breaking words

I am trying to split a string into an array of 36 characters in each index. Also the words should not break during this split. I am using below code to split a string without breaking words but this checks the next space after 36 is reached. My requirement is if the first index reaches 36 character , then it should check the previous space in that line and move this word to next index in array .
For example if I have a string "This is the new content for developing" This should be split into two lines as
[0]- This is the new content for
[1]-developing
Currently the below code splits this in a single line like
[0]- This is the new content for developing
var count; var len=36;
var curr = len; var prev = 0;
while (data[curr]) {
if (data[curr++] == ' ') {
splitArr.push(data.substring(prev,curr));
prev = curr;
curr += len;
}
}
splitArr.push(data.substr(prev));
What I can use instead of data[curr++] in if condition to get the white space before 36 characters?
Thanks for your help in advance.
Personally I would break the string into words, and add them to batches until we'd pass the max size (36), at which point we start a new batch.
We split the string into words with .split(). I use regex instead of a regular .split(" ") because I want to include the spaces when I split the string.
As we iterate through the items, we look at the last item. Would adding this string to that last item be more than 36 characters? If so, it starts a new item. If not, it adds it to the previous one.
To iterate and combine the items, I elected to use Array.reduce().
const str = "This is a demonstration of how your code might work with a longer text string. ";
const charLimit = 36;
let result = str
.split(/(\s+)/)
.reduce((output, item) => {
let last = output.pop() || ""; //get the last item
return last.length + item.length > charLimit //would adding the current item to it exceed 36 chars?
? [...output, last, item] //Yes: start a new item
: [...output, last + item] //No: add to previous item
}, []);
console.log("Result:");
console.log(result);
console.log("With lengths:");
console.log(result.map(i => ({string: i, length: i.length})));
console.log("Trimmed:");
console.log(result.map(i => i.trim()));
Simplest way i can think of using word boundaries
(?:^|\b)[\w .]{1,36}(?:\b|$)
let str = `This is a demonstration of how your code might work with a longer text string`
let op = str.match(/(?:^|\b)[\w .]{1,36}(?:\b|$)/gi)
console.log(op)
Regex demo

Parse strings with a regex and store the result in an array or a string

I need some help to improve my code :
I am beginner with regex system.
I would like to fecth NUMBER below in script and store it in a string or an array to moment that output "NUMBER1,NUMBER1_NUMBER2_,NUMBER2" I don't understand why, i would like jsut NUMBER at the end ;
function fetchnumber(){
extract = "";
for(picture = 1 ; picture < 5; picture++){
// get background image as a string as this :
// url('http://www.mywebsite.com/directory/image_NUMBER_.png');
var NumberOfPicture = document.getElementById(picture).style.backgroundImage ;
reg = /\_(.*)\_/;
extract += reg.exec(NumberOfPicture);
}
}
I write this small example for you. Hope this help you.
var inputString = 'http://www.mywebsite.com/directory/image_123_.png';
var imageNumber = (/image_([^_]+)_\.\w{3}$/.exec(inputString) || [,false])[1];
// so into imageNumber variable you will have a 123 or false if there is no mach
if (imageNumber) {
// here you can do something with finded
// part of text.
}
I wish you luck with the implementation.
You asked why there is [1] instead [0]. The explanation is that we need to have
the same behavior when there is no match of regex. This is quote from MDN
The exec() method executes a search for a match in a specified string.
Returns a result array, or null.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
So. If there is match for regular expression the returned array will consist
from matched string located at zero index ([0]) and with first backreference at first index [1] back references are symbols between brackets (sddsd). But if there is no match we will pass [,false] as output so we will expect the result
into first array index [1]. Try the code with different input. For example:
var inputString = 'some text some text ';
var imageNumber = (/image_([^_]+)_\.\w{3}$/.exec(inputString) || [,false])[1];
// so into imageNumber variable you will have a 123 or false if there is no mach
if (imageNumber) {
// here you can do something with finded
// part of text.
}
So .. In this case the condition will not be executed at all. I mean:
if (imageNumber) {
// here you can do something with finded
// part of text.
}

Detect repeating letter in an string in Javascript

code for detecting repeating letter in a string.
var str="paraven4sr";
var hasDuplicates = (/([a-zA-Z])\1+$/).test(str)
alert("repeating string "+hasDuplicates);
I am getting "false" as output for the above string "paraven4sr". But this code works correctly for the strings like "paraaven4sr". i mean if the character repeated consecutively, code gives output as "TRUE". how to rewrite this code so that i ll get output as "TRUE" when the character repeats in a string
JSFIDDLE
var str="paraven4sr";
var hasDuplicates = (/([a-zA-Z]).*?\1/).test(str)
alert("repeating string "+hasDuplicates);
The regular expression /([a-zA-Z])\1+$/ is looking for:
([a-zA-Z]]) - A letter which it captures in the first group; then
\1+ - immediately following it one or more copies of that letter; then
$ - the end of the string.
Changing it to /([a-zA-Z]).*?\1/ instead searches for:
([a-zA-Z]) - A letter which it captures in the first group; then
.*? - zero or more characters (the ? denotes as few as possible); until
\1 - it finds a repeat of the first matched character.
If you have a requirement that the second match must be at the end-of-the-string then you can add $ to the end of the regular expression but from your text description of what you wanted then this did not seem to be necessary.
Try this:
var str = "paraven4sr";
function checkDuplicate(str){
for(var i = 0; i < str.length; i++){
var re = new RegExp("[^"+ str[i] +"]","g");
if(str.replace(re, "").length >= 2){
return true;
}
}
return false;
}
alert(checkDuplicate(str));
Here is jsfiddle
To just test duplicate alphanumeric character (including underscore _):
console.log(/(\w)\1+/.test('aab'));
Something like this?
String.prototype.count=function(s1) {
return (this.length - this.replace(new RegExp(s1,"g"), '').length) / s1.length;
}
"aab".count("a") > 1
EDIT: Sorry, just read that you are not searching for a function to find whether a letter is found more than once but to find whether a letter is a duplicate. Anyway, I leave this function here, maybe it can help. Sorry ;)

Categories

Resources