Regex to capture Ids from text

Regex to capture Ids from text - javascript

I have the following regex where I am trying to capture the Ids of each start comment. But for some reason I am only able to capture the first one. It won't grab the Id of the nested comment. It only prints 1000 to the console. I am trying to get it to capture both 1000 and 2000. Can anyone spot the error in my regex?
<script type="text/javascript">
function ExtractText() {
var regex = /\<!--Start([0-9]{4})-->([\s\S]*?)<!--End[0-9]{4}-->/gm;
var match;
while (match = regex.exec($("#myHtml").html())) {
console.log(match[1]);
}
}
</script>
<div id="myHtml">
<!--Start1000-->Text on<!--Start2000-->the left<!--End1000-->Text on the right<!--End2000-->
</div>
Based on Mike Samuel's answer I updated my JS to the following:
function GetAllIds() {
var regex = /<!--Start([0-9]{4})-->([\s\S]*?)<!--End\1-->/g;
var text = $("#myHtml").html();
var match;
while (regex.test(text)) {
text = text.replace(
regex,
function (_, id, content) {
console.log(id);
return content;
});
}
}

In
<!--Start1000-->Text on<!--Start2000-->the left<!--End1000-->Text on the right<!--End2000-->
the "1000" region overlaps the "2000" region, but the exec loop only finds non-overlapping matches since each call to exec with the same regex and string starts at the end of the last match. To solve this problem, try
var regex = /<!--Start([0-9]{4})-->([\s\S]*?)<!--End\1-->/g;
for (var s = $("#myHtml").html(), sWithoutComment;
// Keep going until we fail to replace a comment bracketed chunk
// with the chunk minus comments.
true;
s = sWithoutComment) {
// Replace one group of non-overlapping comment pairs.
sWithoutComment = s.replace(
regex,
function (_, id, content) {
console.log(id);
// Replace the whole thing with the body.
return content;
});
if (s === sWithoutComment) { break; }
}

You can use grouping and then another regexp:
var regex = /(<!--Start)([0-9]{4})/ig;
var str = document.getElementById('myHtml').innerHTML;
var matches = str.match(regex);
for(var i=0;i<matches.length;i++){
var m = matches[i];
var num = m.match(/(\d+)/)[1];
console.log(num);
}

Related

JavaScript - How to underline the first character of every word

Sentence: Mr Blue has a blue house and a blue car.
JavaScript Code:
`
function myFunction(){
var str = document.getElementById("sentence").innerHTML;
for(var i=0; i<str.length;i++){
if(i == 0 )
{
var res = str.replace(str.charAt(i+1), "<u>"+str.charAt(i+1)+"</u>");
document.getElementById("sentence").innerHTML = res;
}else if(str.charAt(i) == " ")
{
var res = str.replace(str.charAt(i+1), "<u>"+str.charAt(i+1)+"</u>");
document.getElementById("sentence").innerHTML = res;
}
}`
Problem: I am trying to underline the first character in every word of a paragraph when a button is pressed. With the code I have only the C from car is underlined. if anyone can see where I have gone wrong or if there is a better way of doing this problem.
Thanks

Your code is working partially correct. It works on every word of the string but is getting updated again with next word.
You can make it simple by using a regex.
str.replace(/([^\s]+)/g,function(str){
return "<u>"+str.charAt(0)+"</u>" + str.slice(1);
})
Explanation of the regex:
(: Capturing group start
[^: Negated set - Match anything which is not in the set.
\s: Matches any whitespace character (spaces)
]: set ends
): Capturing group ends
Looking at the syntax of the javascript replace function:
str.replace(regexp|substr, newSubstr|function)
function (replacement)
A function to be invoked to create the new substring to be used to
replace the matches to the given regexp or substr. The arguments
supplied to this function are match, submatch, offset and the
original string. (Source: MDN)
Edit: You are still not using the accepted solution in your answer properly. This is how your function will look now:
<script>
function myFunction() {
var str = document.getElementById("Sentence").innerHTML;
var result = str.replace(/([^\s]+)/g,function(str){
return "<u>"+str.charAt(0)+"</u>" + str.slice(1);
});
document.getElementById("Sentence").innerHTML = result;
}
</script>
and if you are comfortable with ES6:
<script>
function myFunction() {
const str = document.getElementById("Sentence").innerHTML;
const result = str.replace(/([^\s]+)/g, p1 => `<u>${p1.charAt(0)}</u>${p1.slice(1)}` );
document.getElementById("Sentence").innerHTML = result;
}
</script>

You store the mutation inside res, but you don't update str, so at the next iteration the previous changes get discarded. Instead just mutate str:
str = str.replace(str.charAt(i+1), "<u>"+str.charAt(i+1)+"</u>");
And then after the loop is done reflect it to the DOM once:
document.getElementById("sentence").innerHTML = str;
How i would solve that:
function underline(selector) {
const el = document.querySelectorAll(selector);
let result = "";
for(const word of el.innerHTML.split(" ")) {
result += `<u>${word[0]}</u>${word.substr(1)}`;
}
el.innerHtML = result;
}
underline("#sentence");

var str = document.getElementById("sentence").innerHTML;
var words = str.match(/\S*/g);
var words2 = [];
words.forEach(function(item, i, arr) {
if (item!="")
{
words2.push(item.replace(item.charAt(1), "<u>"+item.charAt(1)+"</a>"));
}
});
document.getElementById("sentence").innerHTML = words2.join(" ");

Thanks guys for the help, greatly appreciated.
Full Solutions:
<!DOCTYPE html>
<html>
<body>
<p id="Sentence">Mr Blue has a blue house and a blue car.</p>
<button onclick="myFunction()">Click</button>
<script>
function myFunction() {
var str = document.getElementById("Sentence").innerHTML;
var res = str.replace(/([^\s]+)/g,function(str){
return "<u>"+str.charAt(0)+"</u>" + str.slice(1);
});
document.getElementById("Sentence").innerHTML = res;
}
</script>
</body>
</html>

Split sentence by space mixed up my index

I'm facing some problem while trying to send text to some spelling API.
The API return the corrections based on the words index, for example:
sentence:
"hello hoow are youu"
So the API index the words by numbers like that and return the correction based on that index:
0 1 2 3
hello hoow are youu
API Response that tell me which words to correct:
1: how
3: you
On the code I using split command to break the sentence into words array so I will be able to replace the misspelled words by their index.
string.split(" ");
My problem is that the API trim multiple spaces between words into one space, and by doing that the API words index not match my index. (I would like to preserve the spaces on the final output)
Example of the problem, sentence with 4 spaces between words:
Hello howw are youu?
0 1 2 3 4 5 6 7
hello hoow are youu
I thought about looping the words array and determine if the element is word or space and then create something new array like that:
indexed_words[0] = hello
indexed_words[0_1] = space
indexed_words[0_2] = space
indexed_words[0_3] = space
indexed_words[0_4] = space
indexed_words[0_5] = space
indexed_words[0_6] = space
indexed_words[0_7] = space
indexed_words[1] = how
indexed_words[2] = are
indexed_words[3] = you?
That way I could replace the misspelled words easily and than rebuild the sentence back with join command but the problem but the problem that I cannot use non-numeric indexes (its mixed up the order of the array)
Any idea how I can keep the formatting (spaces) but still correct the words?
Thanks

in that case you have very simple solution:L
$(document).ready(function(){
var OriginalSentence="howw are you?"
var ModifiedSentence="";
var splitstring=OriginalSentence.split(' ')
$.each(splitstring,function(i,v){
if(v!="")
{
//pass this word to your api and appedn it to sentance
ModifiedSentence+=APIRETURNVALUE//api return corrected value;
}
else{
ModifiedSentence+=v;
}
});
alert(ModifiedSentence);
});

Please review this one:
For string manipulation like this, I would highly recommend you to use Regex
Use online regex editor for faster try and error like here https://regex101.com/.
here I use /\w+/g to match every words if you want to ignore 1 or two words we can use /\w{2,}/g or something like that.
var str = "Hello howw are youu?";
var re = /\w+/g
var words = str.match(re);
console.log("Returning valus")
words.forEach(function(word, index) {
console.log(index + " -> " + word);
})
Correction
Just realize that you need to keep spacing as it is, please try this one:
I used your approach to change all to space. create array for its modified version then send to your API (I dunno that part). Then get returned data from API, reconvert it back to its original formating string.
var ori = `asdkhaskd asdkjaskdjaksjd askdjaksdjalsd a ksjdhaksjdhasd asdjkhaskdas`;
function replaceMeArr(str, match, replace) {
var s = str,
reg = match || /\s/g,
rep = replace || ` space `;
return s.replace(reg, rep).split(/\s/g);
}
function replaceMeStr(arr, match, replace) {
var a = arr.join(" "),
reg = match || /\sspace\s/g,
rep = replace || " ";
return a.replace(reg, rep);
}
console.log(`ori1: ${ori}`);
//can use it like this
var modified = replaceMeArr(ori);
console.log(`modi: ${modified.join(' ')}`);
//put it back
var original = replaceMeStr(modified);
console.log(`ori2: ${original}`);

Updated
var str = "Hello howw are youu?";
var words = str.split(" ");
// Getting an array without spaces/empty values
// send it to your API call
var requestArray = words.filter(function(word){
if (word) {
return word;
}
});
console.log("\nAPI Response that tell me which words to correct:");
console.log("6: how\n8: you");
var response = {
"1": "how",
"3": "you"
}
//As you have corrected words index, Replace those words in your "requestArray"
for (var key in response) {
requestArray[key] = response[key];
}
//now we have array of non-empty & correct spelled words. we need to put back empty (space's) value back in between this array
var count = 0;
words.forEach(function(word, index){
if (word) {
words[index] = requestArray[count];
count++;
}
})
console.log(words);
Correct me, if i was wrong.
Hope this helps :)

Try this JSFiddle
, Happy coding :)
//
// ReplaceMisspelledWords
//
// Created by Hilal Baig on 21/11/16.
// Copyright © 2016 Baigapps. All rights reserved.
//
var preservedArray = new Array();
var splitArray = new Array();
/*Word Object to preserve my misspeled words indexes*/
function preservedObject(pIndex, nIndex, title) {
this.originalIndex = pIndex;
this.apiIndex = nIndex;
this.title = title;
}
/*Preserving misspeled words indexes in preservedArray*/
function savePreserveIndexes(str) {
splitArray = str.split(" ");
//console.log(splitArray);
var x = 0;
for (var i = 0; i < splitArray.length; i++) {
if (splitArray[i].length > 0) {
var word = new preservedObject(i, x, splitArray[i]);
preservedArray.push(word);
x++;
}
}
};
function replaceMisspelled(resp) {
for (var key in resp) {
for (var i = 0; i < preservedArray.length; i++) {
wObj = preservedArray[i];
if (wObj.apiIndex == key) {
wObj.title = resp[key];
splitArray[wObj.originalIndex] = resp[key];
}
}
}
//console.log(preservedArray);
return correctedSentence = splitArray.join(" ");
}
/*Your input string to be corrected*/
str = "Hello howw are youu";
console.log(str);
savePreserveIndexes(str);
/*API Response in json of corrected words*/
var apiResponse = '{"1":"how","3":"you" }';
resp = JSON.parse(apiResponse);
//console.log(resp);
/*Replace misspelled words by corrected*/
console.log(replaceMisspelled(resp)); //Your solution

Javascript get query string values using non-capturing group

Given this query string:
?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0
How can I extract the values from only these param types 'product_cat, product_tag, attr_color, attr_size' returning only 'mens-jeans,shirts,fall,classic-style,charcoal,brown,large,x-small?
I tried using a non-capturing group for the param types and capturing group for just the values, but its returning both.
(?:product_cats=|product_tags=|attr\w+=)(\w|,|-)+

You can collect tha values using
(?:product_cats|product_tags|attr\w+)=([\w,-]+)
Mind that a character class ([\w,-]+) is much more efficient than a list of alternatives ((\w|,|-)*), and we avoid the issue of capturing just the last single character.
Here is a code sample:
var re = /(?:product_cats|product_tags|attr\w+)=([\w,-]+)/g;
var str = '?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0';
var res = [];
while ((m = re.exec(str)) !== null) {
res.push(m[1]);
}
document.getElementById("res").innerHTML = res.join(",");
<div id="res"/>

You can always use a jQuery method param.

You can use following simple regex :
/&\w+=([\w,-]+)/g
Demo
You need to return the result of capture group and split them with ,.
var mystr="?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0
";
var myStringArray = mystr.match(/&\w+=([\w,-]+)/g);
var arrayLength = myStringArray.length-1; //-1 is because of that the last match is 0
var indices = [];
for (var i = 0; i < arrayLength; i++) {
indices.push(myStringArray[i].split(','));
}

Something like
/(?:product_cats|product_tag|attr_color|attr_size)=[^,]+/g
(?:product_cats|product_tag|attr_color|attr_size) will match product_cats or product_tag or attr_color or attr_size)
= Matches an equals
[^,] Negated character class matches anything other than a ,. Basically it matches till the next ,
Regex Demo
Test
string = "?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0";
matched = string.match(/(product_cats|product_tag|attr_color|attr_size)=[^,]+/g);
for (i in matched)
{
console.log(matched[i].split("=")[1]);
}
will produce output as
mens-jeans
charcoal
large

There is no need for regular expressions. just use splits and joins.
var s = '?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0';
var query = s.split('?')[1],
pairs = query.split('&'),
allowed = ['product_cats', 'product_tags', 'attr_color', 'attr_size'],
results = [];
$.each(pairs, function(i, pair) {
var key_value = pair.split('='),
key = key_value[0],
value = key_value[1];
if (allowed.indexOf(key) > -1) {
results.push(value);
}
});
console.log(results.join(','));
($.each is from jQuery, but can easily be replaced if jQuery is not around)

Regex remove repeated characters from a string by javascript

I have found a way to remove repeated characters from a string using regular expressions.
function RemoveDuplicates() {
var str = "aaabbbccc";
var filtered = str.replace(/[^\w\s]|(.)\1/gi, "");
alert(filtered);
}
Output: abc
this is working fine.
But if str = "aaabbbccccabbbbcccccc" then output is abcabc.
Is there any way to get only unique characters or remove all duplicates one?
Please let me know if there is any way.

A lookahead like "this, followed by something and this":
var str = "aaabbbccccabbbbcccccc";
console.log(str.replace(/(.)(?=.*\1)/g, "")); // "abc"
Note that this preserves the last occurrence of each character:
var str = "aabbccxccbbaa";
console.log(str.replace(/(.)(?=.*\1)/g, "")); // "xcba"
Without regexes, preserving order:
var str = "aabbccxccbbaa";
console.log(str.split("").filter(function(x, n, s) {
return s.indexOf(x) == n
}).join("")); // "abcx"

This is an old question, but in ES6 we can use Sets. The code looks like this:
var test = 'aaabbbcccaabbbcccaaaaaaaasa';
var result = Array.from(new Set(test)).join('');
console.log(result);

Live replacement for regular expressions with Javascript

I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.

If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}

You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...

If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation").addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();
And the fiddle is : http://jsfiddle.net/jFYuV

In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"

Develop Reference

JavaScript is the programming language of the Web.

Regex to capture Ids from text - javascript

You can use grouping and then another regexp: var regex = /(<!--Start)([0-9]{4})/ig; var str = document.getElementById('myHtml').innerHTML; var matches = str.match(regex); for(var i=0;i<matches.length;i++){ var m = matches[i]; var num = m.match(/(\d+)/)[1]; console.log(num); }

Related

JavaScript - How to underline the first character of every word

Split sentence by space mixed up my index

Javascript get query string values using non-capturing group

Regex remove repeated characters from a string by javascript

Live replacement for regular expressions with Javascript

Categories

Resources