I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.
If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}
You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...
If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation").addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();
And the fiddle is : http://jsfiddle.net/jFYuV
In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"
Related
I want the replaceFunction to run only one time. For now works correctly only on first time, E-1 return Ε-1 (APPLE) but when user try to edit text field again system detect
Ε-1 and return Ε-1 (APPLE) (APPLE)..
td.onchange = function(e) {
this.value = this.value.replace(/(\E-(\d+))/g, replaceFunction);
function replaceFunction(match) {
// add additional rules here for more cases
if (match === "E-1") return "Ε-1 (APPLE)";
if (match === "E-2") return "Ε-2 (SUMSUNG)";
.
.
.
if(match === "E-99") return "Ε-99 (LG)";
return match;
}
}
How I stop this?
You can use something like this one more condition:
if (match === "E-1" && match !== "Ε-1 (APPLE)") return "Ε-1 (APPLE)";
this can be optimized, if you put the mapping into object:
var map = {
"E-1": "Ε-1 (APPLE)",
...
}
if (map[match] && !map[match] !== match) { return map[match]; }
and for this to work you will need regex that also match the word after in bracket:
var names = ['APPLE', 'SAMSUNG'];
var re = new RegExp('(E-(\\d+))(?! \\((?:' + names.join('|') + ')\\))', 'g');
Yet another solution is to use only array (this will only work if you E-NUM match index in array)
var names = ['APPLE', 'SAMSUNG'];
var re = new RegExp('(E-(\\d+))(?! \\((?:' + names.join('|') + ')\\))', 'g');
// regex explanation, same as yours but \\d is because it's a string
// we create negative look ahead so we check if next text
// after E-1 is not " (" and any of the names.
// we use (?: to group what's inside it's the same as with ()
// but the value will not be captured so there will be
// no param in function for this group
// so this regex will be the same as yours but will not match "E-1 (APPLE)"
// only "E-1"
this.value = this.value.replace(re, replaceFunction);
function replaceFunction(match, eg, num) {
// convert string to number E starts
var i = parseInt(num, 10) - 1;
if (i <= names.length) {
return match + ' (' + names[i] + ')';
}
}
the regex and function can be created outside of the change function, so it don't create new function on each change.
When replacing, also optionally lookahead for a space and parentheses that come after. This way, in the replacer function, you can check to see if what follows is already the value you want (eg, (APPLE)). If it is, then do nothing - otherwise, replace with the new string:
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^)]+)\)|)/g, replaceFunction);
function replaceFunction(match, digits, followingString) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString) {
return match;
}
return `E-${digits} (${replacement})`;
}
}
<input id="td">
What /E-(\d+)(?= \(([^)]+)\)|)/ does is:
E- - Match E-
(\d+) - Capture digits in a group
(?= \(([^)]+)\)|) Lookahead for either:
\(([^)]+)\) A literal (, followed by non-) characters, followed by ). If this is matched, the non-) characters will be the second capture group
| - OR match the empty string (so that the lookahead works)
The digits will be the first capture group; the digits variable in the callback. The non-) characters will be the second capture group; the followingString variable in the callback.
If you also want to permit the final ) to be deleted, then make the final ) optional, and also make sure the character set does not match spaces (so that the space following APPLE, with no end ), doesn't get matched):
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^) ]+)\)?|)/g, replaceFunction);
function replaceFunction(match, digits, followingString) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString) {
return match;
}
console.log(followingString)
return `E-${digits} (${replacement})`;
}
}
<input id="td">
If you want to permit any number of characters before the final ) to be deleted, then check if the replacement startsWith the following string:
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^) ]+)\)?|)/g, replaceFunction);
function replaceFunction(match, digits, followingString, possibleTrailingParentheses) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString || replacement.startsWith(followingString)) {
return match;
}
return `E-${digits} (${replacement})`;
}
}
<input id="td">
I have made this very simplified version of a translation tool similar to Google Translate. The idea is to build this simple tool for a minority language in sweden called "jamska". The app is built up with a function that takes the string from a textarea with the ID #svenska and replaces words in the string using RegExp.
I've made an array called arr that's used in a for loop of the function as a dictionary. Each array item looks like this: var arr = [["eldröd", "eillrau"], ["oväder", "over"] ...]. The first word in each array item is in swedish, and the second word is in jamska. If the RegExp finds a matching word in the loop it replaces that word using this code:
function translate() {
var str = $("#svenska").val();
var newStr = "";
for (var i = 0; i < arr.length; i++) {
var replace = arr[i][0];
var replaceWith = arr[i][1];
var re = new RegExp('(^|[^a-z0-9åäö])' + replace + '([^a-z0-9åäö]|$)', 'ig');
str = str.replace(re, "$1" + replaceWith + '$2');
}
$("#jamska").val(str);
}
The translate() is then called in an event handler for when the #svenska textarea gets a keyup, like this: $("#svenska").keyup(function() { translate(); });
The translated string is then assigned as the value of another textarea with the ID #jamska. So far, so good.
I have a problem though: if the translated word in jamska also is a word in swedish, the function translates that word too. This problem is occurring because I'm assigning the variable str to the translated version of the same variable, using: str = str.replace(re, "$1" + replaceWith + '$2');. The function is using the same variable over and over again to perform the translation.
Example:
The swedish word "brydd" is "fel" in jamska. "Fel" is also a word in swedish, so the word that I get after the translation is "felht", since the swedish word "fel" is "felht" in jamska.
Does anyone have any idea for how to work around this problem?
Instead of looking for each Jamska word in the input and replacing them with the respective translation, I would recommend to find any word ([a-z0-9åäö]+) in your text and replace this word either with its translation if one is found in the dictionary or with itself otherwise:
//var arr = [["eldröd", "eillrau"], ["oväder", "over"] ...]
// I'd better use dictionary instead of array to define your dictionary
var dict = {
eldröd: "oväder",
eillrau: "over"
// ...
};
var str = "eldröd test eillrau eillrau oväder over";
var translated = str.replace(/[a-z0-9åäö]+/ig, function(m) {
var word = m.toLowerCase();
var trans = dict[word];
return trans === undefined ? word : trans;
});
console.log(translated);
Update:
If dictionary keys may be represented by phrases (i.e. technically appear as strings with spaces), the regex should be extended to include all these phrases explicitly. So the final regex would look like
(?:phrase 1|phrase 2|etc...)(?![a-z0-9åäö])|[a-z0-9åäö]+
It will try to match one of the phrases explicitly first and only then single words. The (?![a-z0-9åäö]) lookbehind helps to filter out phrases immediately followed by letters (e.g. varken bättre eller sämreåäö).
Phrases immediately preceded by letters are implicitly filtered out by the fact that a match is either the fist one (and therefore is not preceded by any letter) or it's not the first and therefore the previous one is separated from the current by some spaces.
//var arr = [["eldröd", "eillrau"], ["oväder", "over"] ...]
// I'd better use dictionary instead of array to define your dictionary
var dict = {
eldröd: "oväder",
eillrau: "over",
bättre: "better",
"varken bättre eller sämre": "vär å int viller",
"test test": "double test"
// ...
};
var str = "eldröd test eillrau eillrau oväder over test test ";
str += "varken bättre eller sämre ";
str += "don't trans: varken bättre eller sämreåäö";
str += "don't trans again: åäövarken bättre eller sämre";
var phrases = Object.keys(dict)
.filter(function(k) { return /\s/.test(k); })
.sort(function(a, b) { return b.length - a.length; })
.join('|');
var re = new RegExp('(?:' + phrases + ')(?![a-z0-9åäö])|[a-z0-9åäö]+', 'ig');
var translated = str.replace(re, function(m) {
var word = m.toLowerCase();
var trans = dict[word];
return trans === undefined ? word : trans;
});
console.log(translated);
I'm trying to clean strings which has been transformed from word text but I'm stuck on removing special character '…'
By click on button "clean", script removes all dots and only one special character, however I need to remove all of them
Where is my mistake?
Here is my code and plunker with struggles
$scope.string = "My transformed string ………….........…...."
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
var reg = /\./g;
var hellip = /…/g
angular.forEach(em, function (item) {
if(item.innerText.match(reg)){
item.innerText = process(item.innerText)
}
if (item.innerText.match(hellip)){
item.innerText = item.innerText.replace("…", "")
}
});
};
function process( str ) {
return str.replace( /^([^.]*\.)(.*)$/, function ( a, b, c ) {
return b + c.replace( /\./g, '' );
});
}
There's a few problems here, but they can all be resolved by simply reducing the code to a single regex replace within process that will handle both periods and … entities:
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
angular.forEach(em, function (item) {
item.innerText = process(item.innerText)
});
};
function process( str ) {
return str.replace( /\.|…/g, '');
}
});
Plunker demo
You replace every occurrence of . in process, but only replace … once.
I don't see why don't you just do something like .replace(/(\.|…)/g, ''); the g modifier makes sure every match is replaced.
You can do both replacements by first replacing the occurrences of … with one point (because it might be the only thing you find), and then replacing any sequence of points by one:
function process( str ) {
return str.replace(/…/g, '.').replace(/\.\.+/g, '.');
}
var test="My transformed string ………….........…....";
console.log(process(test));
One of the reasons your code did not replace everything, is that you used a string as find argument, which will result in one replacement only. By using the regular expression as find argument you can get the effect of the g modifier.
On my website I have a commentary field, where people can write whatever they want. To prevent spam and unserious comments, I'm using an angular filter in this way:
<span>{{comment | mouthWash}}</span>
The angular filter fetches an array containing banned words and scans through the input string and replaces all the occurences of the fetched words. The code for the filter is as below:
app.filter('mouthWash', function($http) {
var badWords;
$http.get('js/objects/bad-words.json').success(function (data) {
badWords = data;
});
return function(input) {
angular.forEach(badWords, function(word){
var regEx = new RegExp(word);
input = input.replace(regEx, "mooh");
});
return input;
};
});
bad-words.json is something like this:
["fuck", "ass", "shit", etc...]
So as an example <span>{{ "fuck this" | mouthWash}}</span> is outputted as <span>mooh this</span>
This is working perfectly, except that I want it to ignore whitespaces, to make it more bullet proof. I do not have much experience with regex, so if anyone had a simple soloution to this, I would be really grateful.
just change new RegExp(word, "ig"); to new RegExp("ig");
working example:
var words = ['pig', 'dog', '', ' ', 'cow'];
words.forEach(function(word) {
var regEx = new RegExp("ig");
word = word.replace(regEx, "mooh");
console.log(word);
});
Output:
"pmooh"
"dog"
""
" "
"cow"
This is the code I ended up with:
app.filter('mouthWash', function($http) {
var badWords;
$http.get('js/objects/bad-words.json').success(function (data) {
badWords = data;
});
return function(input) {
angular.forEach(badWords, function(word){
var str = word.substring(0,1)+"\\s*";
for (var i = 1; i < word.length - 1; i++) str = str + word.substring(i,i+1)+"\\s*";
str = str + word.substring(word.length - 1,word.length);
var regEx = new RegExp(str, "gi");
input = input.replace(regEx, "mooh");
});
return input;
};
});
I created a for loop that would loop through every character of the banned word, adding the character together with \s* (so that spaces was ignored) to a string.
for (var i = 1; i < word.length - 1; i++) str = str + word.substring(i,i+1)+"\\s*";
Then created a regExp from the string, by using the regExp constructor with the string as first parameter and "gi" as second, to make the regExp global and case insensitive.
var regEx = new RegExp(str, "gi");
Then that regex was used to search through input string and replace all matches with "mooh".
I'm trying to write a "suggestion search box" and I cannot find a solution that allows to highlight a substring with javascript keeping the original case.
For example if I search for "ca" I search server side in a case insensitive mode and I have the following results:
Calculator
calendar
ESCAPE
I would like to view the search string in all the previous words, so the result should be:
Calculator
calendar
ESCAPE
I tried with the following code:
var reg = new RegExp(querystr, 'gi');
var final_str = 'foo ' + result.replace(reg, '<b>'+querystr+'</b>');
$('#'+id).html(final_str);
But obviously in this way I loose the original case!
Is there a way to solve this problem?
Use a function for the second argument for .replace() that returns the actual matched string with the concatenated tags.
Try it out: http://jsfiddle.net/4sGLL/
reg = new RegExp(querystr, 'gi');
// The str parameter references the matched string
// --------------------------------------v
final_str = 'foo ' + result.replace(reg, function(str) {return '<b>'+str+'</b>'});
$('#' + id).html(final_str);
JSFiddle Example with Input: https://jsfiddle.net/pawmbude/
ES6 version
const highlight = (needle, haystack) =>
haystack.replace(
new RegExp(needle, 'gi'),
(str) => `<strong>${str}</strong>`
);
nice results with
function str_highlight_text(string, str_to_highlight){
var reg = new RegExp(str_to_highlight, 'gi');
return string.replace(reg, function(str) {return '<span style="background-color:#ffbf00;color:#fff;"><b>'+str+'</b></span>'});
}
and easier to remember...
thx to user113716: https://stackoverflow.com/a/3294644/2065594
While the other answers so far seem simple, they can't be really used in many real world cases as they don't handle proper text HTML escaping and RegExp escaping. If you want to highlight every possible snippet, while escaping the text properly, a function like that would return all elements you should add to your suggestions box:
function highlightLabel(label, term) {
if (!term) return [ document.createTextNode(label) ]
const regex = new RegExp(term.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&'), 'gi')
const result = []
let left, match, right = label
while (match = right.match(regex)) {
const m = match[0], hl = document.createElement('b'), i = match.index
hl.innerText = m
left = right.slice(0, i)
right = right.slice(i + m.length)
result.push(document.createTextNode(left), hl)
if (!right.length) return result
}
result.push(document.createTextNode(right))
return result
}
string.replace fails in the general case. If you use .innerHTML, replace can replace matches in tags (like a tags). If you use .innerText or .textContent, it will remove any tags there were previously in the html. More than that, in both cases it damages your html if you want to remove the highlighting.
The true answer is mark.js (https://markjs.io/). I just found this - it is what I have been searching for for such a long time. It does just what you want it to.
I do the exact same thing.
You need to make a copy.
I store in the db a copy of the real string, in all lower case.
Then I search using a lower case version of the query string or do a case insensitive regexp.
Then use the resulting found start index in the main string, plus the length of the query string, to highlight the query string within the result.
You can not use the query string in the result since its case is not determinate. You need to highlight a portion of the original string.
.match() performs case insensitive matching and returns an array of the matches with case intact.
var matches = str.match(queryString),
startHere = 0,
nextMatch,
resultStr ='',
qLength = queryString.length;
for (var match in matches) {
nextMatch = str.substr(startHere).indexOf(match);
resultStr = resultStr + str.substr(startHere, nextMatch) + '<b>' + match + '</b>';
startHere = nextMatch + qLength;
}
I have found a easiest way to achieve it. JavaScript regular expression remembers the string it matched. This feature can be used here.
I have modified the code a bit.
reg = new RegExp("("+querystr.trim()+")", 'gi');
final_str = 'foo ' + result.replace(reg, "<b>&1</b>");
$('#'+id).html(final_str);
Highlight search term and anchoring to first occurence - Start
function highlightSearchText(searchText) {
var innerHTML = document.documentElement.innerHTML;
var replaceString = '<mark>'+searchText+'</mark>';
var newInnerHtml = this.replaceAll(innerHTML, searchText, replaceString);
document.documentElement.innerHTML = newInnerHtml;
var elmnt = document.documentElement.getElementsByTagName('mark')[0]
elmnt.scrollIntoView();
}
function replaceAll(str, querystr, replace) {
var reg = new RegExp(querystr, 'gi');
var final_str = str.replace(reg, function(str) {return '<mark>'+str+'</mark>'});
return final_str
}
Highlight search term and anchoring to first occurence - End