Javascript Regex - match int after string in for loop - javascript

I am trying to capture the counts associated with the keywords in the string txt. All the keywords are loaded into an array ahead of time.
This code is in jquery/javascript. I cannot hard code string keywords so that is why they are stored in an array. Please assist me in finding what goes in place of "Reg Expression" before and/or after the keyword variable within the loop.
The html br can be used to end that regexmatch in that iteration of the loop.
Trying to end up with keywordCount = "2, 5, 11"
//String I need to search through
var txt = "Edit Req'd2<br>Errors5<br>Pndg App11<br>";
//array of keywords I can use to find associated counts to keywords
var keyword = ["Edit Req'd", "Errors", "Pndg App"];
//empty string declared before loop
var keywordCount = '';
for (i = 0; i < keyword.length; i++) {
// takes the comma off end of first entry in array
// might not be needed or another method might be better?
keyword[i] = $.trim(keyword[i]);
//regex expression generated using keyword and unknown expression
var regexmatch = RegExp("Reg Expression" + keyword + "Reg Expression")
//use regex expression to generate string containing counts
keywordCount += (txt.match(regexmatch)) + ",";
}

Here is the example which may helps you in achieving your required output.
//String I need to search through
var txt = "Edit Req'd2<br>Errors5<br>Pndg App11<br>";
//array of keywords I can use to find associated counts to keywords
var keyword = ["Edit Req'd", "Errors", "Pndg App"];
//empty string declared before loop
var keywordCount = '';
var i = 0;
var keywordCount = "";
var splittedValue = "";
while (i < keyword.length) {
if (txt.indexOf(keyword[i]/i)) {
splittedValue = txt.split(keyword[i]);
if (keywordCount === "") {
keywordCount = splittedValue[1].split("<br>")[0];
} else {
keywordCount += ", " + splittedValue[1].split("<br>")[0];
}
}
i += 1;
}
console.log(keywordCount);

I would use a digit/numeric range to match the number ([0-9]; this is pretty basic regex stuff), and use a group to match any of the keywords:
Any number of digits: [0-9]+
Any of your keywords: (something|somethingelse|other)
You can use capture-groups as well to have match() return them separately:
var keyword = ["Edit Req'd", "Errors", "Pndg App"];
var regexmatch = RegExp('(\b' + keyword.join('|') + ')([0-9]+)', 'g')
(note that we use the word-boundary \b to make sure keyword is not part of a longer word, and the 'g' flag for global to signal we want multiple results)
Now, match() will only match one result, and we want matches for every keyword, so we’ll use exec() instead:
var txt = "Edit Req'd2<br>Errors5<br>Pndg App11<br>";
var keyword = ["Edit Req'd", "Errors", "Pndg App"];
var regex = RegExp('(\b' + keyword.join('|') + ')([0-9]+)', 'g')
var match = regex.exec( txt ); // ...find the first match
var numbers = [];
while ( match !== null ) {
numbers.push( match[ 2 ]); // #0 of exec() its return-value is the result as a whole: "Edit Req'd2"
// #1 is the value of the first capture-group: "Edit Req'd"
// #2 is the value of the second capture-group: "2"
match = regex.exec( txt ); // ...find the next match
}
console.log("the numbers are:", numbers);
Lastly, do note that regular expressions may look cool, they are not always the fastest (performance-wise). If performance matters a lot, you could use (for example) indexOf() instead.
From your question it seems like you could brush up on your knowledge of regular expressions a little bit. There’s a ton of articles around (just search for “regular expressions basics” or “regex 101”) – like this one:
https://medium.com/factory-mind/regex-tutorial-a-simple-cheatsheet-by-examples-649dc1c3f285*

Related

How to define a line break in extendscript for Adobe Indesign

I am using extendscript to build some invoices from downloaded plaintext emails (.txt)
At points in the file there are lines of text that look like "Order Number: 123456" and then the line ends. I have a script made from parts I found on this site that finds the end of "Order Number:" in order to get a starting position of a substring. I want to use where the return key was hit to go to the next line as the second index number to finish the substring. To do this, I have another piece of script from the helpful people of this site that makes an array out of the indexes of every instance of a character. I will then use whichever array object is a higher number than the first number for the substring.
It's a bit convoluted, but I'm not great with Javascript yet, and if there is an easier way, I don't know it.
What is the character I need to use to emulate a return key in a txt file in javascript for extendscript for indesign?
Thank you.
I have tried things like \n and \r\n and ^p both with and without quotes around them but none of those seem to show up in the array when I try them.
//Load Email as String
var b = new File("~/Desktop/Test/email.txt");
b.open('r');
var str = "";
while (!b.eof)
str += b.readln();
b.close();
var orderNumberLocation = str.search("Order Number: ") + 14;
var orderNumber = str.substring(orderNumberLocation, ARRAY NUMBER GOES HERE)
var loc = orderNumberLocation.lineNumber
function indexes(source, find) {
var result = [];
for (i = 0; i < source.length; ++i) {
// If you want to search case insensitive use
// if (source.substring(i, i + find.length).toLowerCase() == find) {
if (source.substring(i, i + find.length) == find) {
result.push(i);
}
}
alert(result)
}
indexes(str, NEW PARAGRAPH CHARACTER GOES HERE)
I want all my line breaks to show up as an array of indexes in the variable "result".
Edit: My method of importing stripped all line breaks from the document. Using the code below instead works better. Now \n works.
var file = File("~/Desktop/Test/email.txt", "utf-8");
file.open("r");
var str = file.read();
file.close();
You need to use Regular Expressions. Depending on the fields do you need to search, you'l need to tweek the regular expressions, but I can give you a point. If the fields on the email are separated by new lines, something like that will work:
var str; //your string
var fields = {}
var lookFor = /(Order Number:|Adress:).*?\n/g;
str.replace(lookFor, function(match){
var order = match.split(':');
var field = order[0].replace(/\s/g, '');//remove all spaces
var value = order[1];
fields[field]= value;
})
With (Order Number:|Adress:) you are looking for the fields, you can add more fields separated the by the or character | ,inside the parenthessis. The .*?\n operators matches any character till the first break line appears. The g flag indicates that you want to look for all matches. Then you call str.replace, beacause it allows you to perfom a single task on each match. So, if the separator of the field and the value is a colon ':', then you split the match into an array of two values: ['Order number', 12345], and then, store that matches into an object. That code wil produce:
fields = {
OrderNumber: 12345,
Adresss: "my fake adress 000"
}
Please try \n and \r
Example: indexes(str, "\r");
If i've understood well, wat you need is to str.split():
function indexes(source, find) {
var order;
var result = [];
var orders = source.split('\n'); //returns an array of strings: ["order: 12345", "order:54321", ...]
for (var i = 0, l = orders.length; i < l; i++)
{
order = orders[i];
if (order.match(/find/) != null){
result.push(i)
}
}
return result;
}

Javascript regex replace with different values

I'd like to know if it is possible to replace every matching pattern in the string with not one but different values each time.
Let's say I found 5 matches in a text and I want to replace first match with a string, second match with another string, third match with another and so on... is it achievable?
var synonyms = ["extremely", "exceedingly", "exceptionally", "especially", "tremendously"];
"I'm very upset, very distress, very agitated, very annoyed and very pissed".replace(/very/g, function() {
//replace 5 matches of the keyword every with 5 synonyms in the array
});
You may try to replace the matches inside a replace callback function:
var synonyms = ["extremely", "exceedingly", "exceptionally", "especially", "tremendously"];
var cnt = 0;
console.log("I'm very upset, very distress, very agitated, very annoyed and very pissed (and very anxious)".replace(/very/g, function($0) {
if (cnt === synonyms.length) cnt = 0;
return synonyms[cnt++]; //replace 5 matches of the keyword every with 5 synonyms in the array
}));
If you have more matches than there are items in the array, the cnt will make sure the array items will be used from the first one again.
A simple recursive approach. Be sure your synonyms array has enough elements to cover all matches in your string.
let synonyms = ["extremely", "exceedingly", "exceptionally"]
let yourString = "I'm very happy, very joyful, and very handsome."
let rex = /very/
function r (s, i) {
let newStr = s.replace(rex, synonyms[i])
if (newStr === s)
return s
return r(newStr, i+1)
}
r(yourString, 0)
I would caution that if your replacement would also match your regex, you need to add an additional check.
function replaceExpressionWithSynonymsInText(text, regX, synonymList) {
var
list = [];
function getSynonym() {
if (list.length <= 0) {
list = Array.from(synonymList);
}
return list.shift();
}
return text.replace(regX, getSynonym);
}
var
synonymList = ["extremely", "exceedingly", "exceptionally", "especially", "tremendously"],
textSource = "I'm very upset, very distress, very agitated, very annoyed and very pissed",
finalText = replaceExpressionWithSynonymsInText(textSource, (/very/g), synonymList);
console.log("synonymList : ", synonymList);
console.log("textSource : ", textSource);
console.log("finalText : ", finalText);
The advantages of the above approach are, firstly one does not alter the list of synonyms,
secondly working internally with an ever new copy of the provided list and shifting it,
makes additional counters obsolete and also provides the opportunity of being able to
shuffle the new copy (once it has been emptied), thus achieving a more random replacement.
Using the example you've provided, here's what I would do.
First I would set up some variables
var text = "I'm very upset, very distress, very agitated, very annoyed and very pissed";
var regex = /very/;
var synonyms = ["extremely", "exceedingly", "exceptionally", "especially", "tremendously"];
Then count the number of matches
var count = text.match(/very/g).length;
Then I would run a loop to replace the matches with the values from the array
for(var x = 0; x < count; x++) {
text = text.replace(regex, synonyms[x]);
}
You can do it with the use of Replace() function, where you use 'g' option for global matching (finds all occurrences of searched expression). For the second argument you can use a function which returns values from your predefined array.
Here is a little fiddle where you can try it out.
var str = "test test test";
var rep = ["one", "two", "three"];
var ix = 0;
var res = str.replace(/test/g, function() {
if (ix == rep.length)
ix = 0;
return rep[ix++];
});
$("#result").text(res);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p id="result">
Result...
</p>
Yes it is achievable. There may be a more efficient answer than this, but the brute force way is to double the length of your regex. i.e. Instead of searching just A, search (/A){optionalText}(/A) and then replace /1 /2 as needed. If you need help with the regex itself, provide some code for what you're searching for and someone with more rep than me can probably comment the actual regexp.

I want to remove comma between two double quote and allowed space between two double quote

I have one search string like txtKeyword and it will accept all the below case for highlighting skills
1.C# JAVA
2."C#" AND "JAVA"
5.C# "JAVA"
6.C#,'JAVA'
7.C#,"JAVA"
8.C#,"JAVA",'PHP'
9.C#,"JAVA" AND PHP
In all above case C# and JAVA are highlighted..but for
"MY SQL","SQL SERVER"
it will not highlight MY SQL and SQL SERVER because of space between two double quote
so final array is like ["MY,SQL,SQL,SERVER"]
But i need final array like ["MY SQL,SQL SERVER,C#,Java"]
Question in logical terms-
I want to remove comma between two double quote and allowed space between two double quote,and remove all spaces outside two double quote and split array with comma
Here is my code snippet
//Grab basic search string on resume result list
var grabBasicSearchKeyword = $("#txtKeyword").val();
if (grabBasicSearchKeyword!=null)
{
//Remove Unnecessary Characters From Basic Search String
var finalBasicSearchString = grabBasicSearchKeyword.replace(/[`~!#$%^&*()_|\-=?;:'"<>\{\}\[\]\\\/]/gi, '');
//Check And Remove: "OR","AND","NOT","or","and" & "not" From Basic Search String
if (finalBasicSearchString.indexOf('OR') || finalBasicSearchString.indexOf('AND') || finalBasicSearchString.indexOf('NOT') || finalBasicSearchString.indexOf('or') || finalBasicSearchString.indexOf('and') || finalBasicSearchString.indexOf('not'))
{
var findOR = finalBasicSearchString.replace(/OR/g, ' ');
var findor = findOR.replace(/or/g, ' ');
var findAND = findor.replace(/AND/g, ' ');
var findand = findAND.replace(/and/g, ' ');
var findNOT = findand.replace(/NOT/g, ' ');
var findnot = findNOT.replace(/not/g, ' ');
var removeDoubleQuote = findnot.replace(/"/g, '');
var removeBackSlash = removeDoubleQuote.replace(/\\/g, '');
var removeComma = removeBackSlash.replace(/,/g, ' ');
var finalArray = removeComma.split(" ");
for (var j = 0; j < finalArray.length; j++) {
if (masterSkillArray.contains(finalArray[j])) {
basicSearchHighlightArray.push(finalArray[j]);
}
}
$(".skil-list").highlight(basicSearchHighlightArray);
$(".post").highlight(basicSearchHighlightArray);
}
}
You'll want to use the 'ignore case' flag i for your logical operator patterns to cut down on the duplicate regexing.
You can do all of the logical replacements in a single regex - /\b(?:OR|AND|NOT)\b/gi (the i flag making the matches case insensitive).
You'll need to treat quoted strings separately. In this fiddle the quoted strings are matched and used to create a new string with only those terms. A second new string is then created by removing the quoted strings from the original input (as well as commas). The strings are then appended and split on ,.
As shown by the fiddle, rather than creating several variables, you can simply chain your replacements.

Live replacement for regular expressions with Javascript

I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.
If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}
You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...
If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation"​​​​​​​).addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();​
And the fiddle is : http://jsfiddle.net/jFYuV
In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"

Javascript regex pattern array with loop

I have attempted to create a function that will replace multiple regular expression values from an array. This works if the array does not contain quotation marks of any kind, this is problematic when I want to use a comma in my pattern. So I've been trying to find an alternative way of serving the pattern with no luck. Any ideas?
function removeCharacters(str){
//ucpa = unwanted character pattern array
//var ucpa = [/{/g,/}/g,/--/g,/---/g,/-/g,/^.\s/];
var ucpa = ["/{/","/}/","/--/","/---/","/-/","/^.\s/","/^,\s/"];
for (var i = 0; i < ucpa.length; i++){
//does not work
var pattern = new RegExp(ucpa[i],"g");
var str = str.replace(pattern, " ");
}
return str;
}
WORKING:
function removeCharacters(str){
//ucpa = unwanted character pattern array
var ucpa = [/{/g,/}/g,/--/g,/---/g,/-/g,/^.\s/,/^,\s/];
for (var i = 0; i < ucpa.length; i++){
var str = str.replace(ucpa[i], " ");
}
return str;
}
REFINED:
function removeCharacters(str){
var pattern = /[{}]|-{1,3}|^[.,]\s/g;
str = str.replace(pattern, " ");
return str;
}
The RegExp constructor takes raw expressions, not wrapped in / characters.
Therefore, all of your regexes contain two /s each, which isn't what you want.
Instead, you should make an array of actual regex literals:
var ucpa = [ /.../g, /",\/.../g, ... ];
You can also wrap all that into a single regex:
var str = str.replace(/[{}]|-{1,3}|^[.,]\s/g, " ")
although I'm not sure if that's exactly what you want since some of your regexes are nonsensical, for example ,^\s could never match.

Categories

Resources