replace all commas within a quoted string - javascript

is there any way to capture and replace all the commas within a string contained within quotation marks and not any commas outside of it. I'd like to change them to pipes, however this:
/("(.*?)?,(.*?)")/gm
is only getting the first instance:
JSBIN

If callbacks are okay, you can go for something like this:
var str = '"test, test2, & test3",1324,,,,http://www.asdf.com';
var result = str.replace(/"[^"]+"/g, function (match) {
return match.replace(/,/g, '|');
});
console.log(result);
//"test| test2| & test3",1324,,,,http://www.asdf.com

This is very convoluted compared to regular expression version, however, I wanted to do this if just for the sake of experiment:
var PEG = require("pegjs");
var parser = PEG.buildParser(
["start = seq",
"delimited = d:[^,\"]* { return d; }",
"quoted = q:[^\"]* { return q; }",
"quote = q:[\"] { return q; }",
"comma = c:[,] { return ''; }",
"dseq = delimited comma dseq / delimited",
"string = quote dseq quote",
"seq = quoted string seq / quoted quote? quoted?"].join("\n")
);
function flatten(array) {
return (array instanceof Array) ?
[].concat.apply([], array.map(flatten)) :
array;
}
flatten(parser.parse('foo "bar,bur,ber" baz "bbbr" "blerh')).join("");
// 'foo "barburber" baz "bbbr" "blerh'
I don't advise you to do this in this particular case, but maybe it will create some interest :)
PS. pegjs can be found here: (I'm not an author and have no affiliation, I simply like PEG) http://pegjs.majda.cz/documentation

Related

How to get value in $1 in regex to a variable for further manipulation [duplicate]

You can backreference like this in JavaScript:
var str = "123 $test 123";
str = str.replace(/(\$)([a-z]+)/gi, "$2");
This would (quite silly) replace "$test" with "test". But imagine I'd like to pass the resulting string of $2 into a function, which returns another value. I tried doing this, but instead of getting the string "test", I get "$2". Is there a way to achieve this?
// Instead of getting "$2" passed into somefunc, I want "test"
// (i.e. the result of the regex)
str = str.replace(/(\$)([a-z]+)/gi, somefunc("$2"));
Like this:
str.replace(regex, function(match, $1, $2, offset, original) { return someFunc($2); })
Pass a function as the second argument to replace:
str = str.replace(/(\$)([a-z]+)/gi, myReplace);
function myReplace(str, group1, group2) {
return "+" + group2 + "+";
}
This capability has been around since Javascript 1.3, according to mozilla.org.
Using ESNext, quite a dummy links replacer but just to show-case how it works :
let text = 'Visit http://lovecats.com/new-posts/ and https://lovedogs.com/best-dogs NOW !';
text = text.replace(/(https?:\/\/[^ ]+)/g, (match, link) => {
// remove ending slash if there is one
link = link.replace(/\/?$/, '');
return `${link.substr(link.lastIndexOf('/') +1)}`;
});
document.body.innerHTML = text;
Note: Previous answer was missing some code. It's now fixed + example.
I needed something a bit more flexible for a regex replace to decode the unicode in my incoming JSON data:
var text = "some string with an encoded 's' in it";
text.replace(/&#(\d+);/g, function() {
return String.fromCharCode(arguments[1]);
});
// "some string with an encoded 's' in it"
If you would have a variable amount of backreferences then the argument count (and places) are also variable. The MDN Web Docs describe the follwing syntax for sepcifing a function as replacement argument:
function replacer(match[, p1[, p2[, p...]]], offset, string)
For instance, take these regular expressions:
var searches = [
'test([1-3]){1,3}', // 1 backreference
'([Ss]ome) ([A-z]+) chars', // 2 backreferences
'([Mm][a#]ny) ([Mm][0o]r[3e]) ([Ww][0o]rd[5s])' // 3 backreferences
];
for (var i in searches) {
"Some string chars and many m0re w0rds in this test123".replace(
new RegExp(
searches[i]
function(...args) {
var match = args[0];
var backrefs = args.slice(1, args.length - 2);
// will be: ['Some', 'string'], ['many', 'm0re', 'w0rds'], ['123']
var offset = args[args.length - 2];
var string = args[args.length - 1];
}
)
);
}
You can't use 'arguments' variable here because it's of type Arguments and no of type Array so it doesn't have a slice() method.

How can I replace multiple characters in a string?

I want to create a regex with following logic:
1., If string contains T replace it with space
2., If string contains Z remove Z
I wrote two regex already, but I can't combine them:
string.replace(/\T/g,' ') && string.replace(/\Z/g,'');
EDIT: I want the regex code to be shorter
Doesn't seem this even needs regex. Just 2 chained replacements would do.
var str = '[T] and [Z] but not [T] and [Z]';
var result = str.replace('T',' ').replace('Z','');
console.log(result);
However, a simple replace only replaces the first occurence.
To replace all, regex still comes in handy. By making use of the global g flag.
Note that the characters aren't escaped with \. There's no need.
var str = '[T] and [Z] and another [T] and [Z]';
var result = str.replace(/T/g,' ').replace(/Z/g,'');
console.log(result);
// By using regex we could also ignore lower/upper-case. (the i flag)
// Also, if more than 1 letter needs replacement, a character class [] makes it simple.
var str2 = '(t) or (Ⓣ) and (z) or (Ⓩ). But also uppercase (T) or (Z)';
var result2 = str2.replace(/[tⓉ]/gi,' ').replace(/[zⓏ]/gi,'');
console.log(result2);
But if the intention is to process really big strings, and performance matters?
Then I found out in another challenge that using an unnamed callback function inside 1 regex replace can prove to be faster. When compared to using 2 regex replaces.
Probably because if it's only 1 regex then it only has to process the huge string once.
Example snippet:
console.time('creating big string');
var bigstring = 'TZ-'.repeat(2000000);
console.timeEnd('creating big string');
console.log('bigstring length: '+bigstring.length);
console.time('double replace big string');
var result1 = bigstring.replace(/[t]/gi,'X').replace(/[z]/gi,'Y');
console.timeEnd('double replace big string');
console.time('single replace big string');
var result2 = bigstring.replace(/([t])|([z])/gi, function(m, c1, c2){
if(c1) return 'X'; // if capture group 1 has something
return 'Y';
});
console.timeEnd('single replace big string');
var smallstring = 'TZ-'.repeat(5000);
console.log('smallstring length: '+smallstring.length);
console.time('double replace small string');
var result3 = smallstring.replace(/T/g,'X').replace(/Z/g,'Y');
console.timeEnd('double replace small string');
console.time('single replace small string');
var result4 = smallstring.replace(/(T)|(Z)/g, function(m, c1, c2){
if(c1) return 'X';
return 'Y';
});
console.timeEnd('single replace small string');
Do you look for something like this?
ES6
var key = {
'T': ' ',
'Z': ''
}
"ATAZATA".replace(/[TZ]/g, (char) => key[char] || '');
Vanilla
"ATAZATA".replace(/[TZ]/g,function (char) {return key[char] || ''});
or
"ATAZATA".replace(/[TZ]/g,function (char) {return char==='T'?' ':''});
you can capture both and then decide what to do in the callback:
string.replace(/[TZ]/g,(m => m === 'T' ? '' : ' '));
var string = 'AZorro Tab'
var res = string.replace(/[TZ]/g,(m => m === 'T' ? '' : ' '));
console.log(res)
-- edit --
Using a dict substitution you can also do:
var string = 'AZorro Tab'
var dict = { T : '', Z : ' '}
var re = new RegExp(`[${ Object.keys(dict).join('') }]`,'g')
var res = string.replace(re,(m => dict[m] ) )
console.log(res)
Second Update
I have developed the following function to use in production, perhaps it can help someone else. It's basically a loop of the native's replaceAll Javascript function, it does not make use of regex:
function replaceMultiple(text, characters){
for (const [i, each] of characters.entries()) {
const previousChar = Object.keys(each);
const newChar = Object.values(each);
text = text.replaceAll(previousChar, newChar);
}
return text
}
Usage is very simple:
const text = '#Please send_an_information_pack_to_the_following_address:';
const characters = [
{
"#":""
},
{
"_":" "
},
]
const result = replaceMultiple(text, characters);
console.log(result); //'Please send an information pack to the following address:'
Update
You can now use replaceAll natively.
Outdated Answer
Here is another version using String Prototype. Enjoy!
String.prototype.replaceAll = function(obj) {
let finalString = '';
let word = this;
for (let each of word){
for (const o in obj){
const value = obj[o];
if (each == o){
each = value;
}
}
finalString += each;
}
return finalString;
};
'abc'.replaceAll({'a':'x', 'b':'y'}); //"xyc"

How to clean string from word characters, Javascript

I'm trying to clean strings which has been transformed from word text but I'm stuck on removing special character '…'
By click on button "clean", script removes all dots and only one special character, however I need to remove all of them
Where is my mistake?
Here is my code and plunker with struggles
$scope.string = "My transformed string ………….........…...."
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
var reg = /\./g;
var hellip = /…/g
angular.forEach(em, function (item) {
if(item.innerText.match(reg)){
item.innerText = process(item.innerText)
}
if (item.innerText.match(hellip)){
item.innerText = item.innerText.replace("…", "")
}
});
};
function process( str ) {
return str.replace( /^([^.]*\.)(.*)$/, function ( a, b, c ) {
return b + c.replace( /\./g, '' );
});
}
There's a few problems here, but they can all be resolved by simply reducing the code to a single regex replace within process that will handle both periods and … entities:
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
angular.forEach(em, function (item) {
item.innerText = process(item.innerText)
});
};
function process( str ) {
return str.replace( /\.|…/g, '');
}
});
Plunker demo
You replace every occurrence of . in process, but only replace … once.
I don't see why don't you just do something like .replace(/(\.|…)/g, ''); the g modifier makes sure every match is replaced.
You can do both replacements by first replacing the occurrences of … with one point (because it might be the only thing you find), and then replacing any sequence of points by one:
function process( str ) {
return str.replace(/…/g, '.').replace(/\.\.+/g, '.');
}
var test="My transformed string ………….........…....";
console.log(process(test));
One of the reasons your code did not replace everything, is that you used a string as find argument, which will result in one replacement only. By using the regular expression as find argument you can get the effect of the g modifier.

Regex remove repeated characters from a string by javascript

I have found a way to remove repeated characters from a string using regular expressions.
function RemoveDuplicates() {
var str = "aaabbbccc";
var filtered = str.replace(/[^\w\s]|(.)\1/gi, "");
alert(filtered);
}
Output: abc
this is working fine.
But if str = "aaabbbccccabbbbcccccc" then output is abcabc.
Is there any way to get only unique characters or remove all duplicates one?
Please let me know if there is any way.
A lookahead like "this, followed by something and this":
var str = "aaabbbccccabbbbcccccc";
console.log(str.replace(/(.)(?=.*\1)/g, "")); // "abc"
Note that this preserves the last occurrence of each character:
var str = "aabbccxccbbaa";
console.log(str.replace(/(.)(?=.*\1)/g, "")); // "xcba"
Without regexes, preserving order:
var str = "aabbccxccbbaa";
console.log(str.split("").filter(function(x, n, s) {
return s.indexOf(x) == n
}).join("")); // "abcx"
This is an old question, but in ES6 we can use Sets. The code looks like this:
var test = 'aaabbbcccaabbbcccaaaaaaaasa';
var result = Array.from(new Set(test)).join('');
console.log(result);

JavaScript - string regex backreferences

You can backreference like this in JavaScript:
var str = "123 $test 123";
str = str.replace(/(\$)([a-z]+)/gi, "$2");
This would (quite silly) replace "$test" with "test". But imagine I'd like to pass the resulting string of $2 into a function, which returns another value. I tried doing this, but instead of getting the string "test", I get "$2". Is there a way to achieve this?
// Instead of getting "$2" passed into somefunc, I want "test"
// (i.e. the result of the regex)
str = str.replace(/(\$)([a-z]+)/gi, somefunc("$2"));
Like this:
str.replace(regex, function(match, $1, $2, offset, original) { return someFunc($2); })
Pass a function as the second argument to replace:
str = str.replace(/(\$)([a-z]+)/gi, myReplace);
function myReplace(str, group1, group2) {
return "+" + group2 + "+";
}
This capability has been around since Javascript 1.3, according to mozilla.org.
Using ESNext, quite a dummy links replacer but just to show-case how it works :
let text = 'Visit http://lovecats.com/new-posts/ and https://lovedogs.com/best-dogs NOW !';
text = text.replace(/(https?:\/\/[^ ]+)/g, (match, link) => {
// remove ending slash if there is one
link = link.replace(/\/?$/, '');
return `${link.substr(link.lastIndexOf('/') +1)}`;
});
document.body.innerHTML = text;
Note: Previous answer was missing some code. It's now fixed + example.
I needed something a bit more flexible for a regex replace to decode the unicode in my incoming JSON data:
var text = "some string with an encoded 's' in it";
text.replace(/&#(\d+);/g, function() {
return String.fromCharCode(arguments[1]);
});
// "some string with an encoded 's' in it"
If you would have a variable amount of backreferences then the argument count (and places) are also variable. The MDN Web Docs describe the follwing syntax for sepcifing a function as replacement argument:
function replacer(match[, p1[, p2[, p...]]], offset, string)
For instance, take these regular expressions:
var searches = [
'test([1-3]){1,3}', // 1 backreference
'([Ss]ome) ([A-z]+) chars', // 2 backreferences
'([Mm][a#]ny) ([Mm][0o]r[3e]) ([Ww][0o]rd[5s])' // 3 backreferences
];
for (var i in searches) {
"Some string chars and many m0re w0rds in this test123".replace(
new RegExp(
searches[i]
function(...args) {
var match = args[0];
var backrefs = args.slice(1, args.length - 2);
// will be: ['Some', 'string'], ['many', 'm0re', 'w0rds'], ['123']
var offset = args[args.length - 2];
var string = args[args.length - 1];
}
)
);
}
You can't use 'arguments' variable here because it's of type Arguments and no of type Array so it doesn't have a slice() method.

Categories

Resources