Replace all in JavaScript Regex - javascript

i have below string from which I have to extract username and ID.
This is a string which has a #[User Full Name](contact:1) data inside.
To get username and contact id from above string I am using this regex pattern.
var re = /\#\[(.*)\]\(contact\:(\d+)\)/;
text = text.replace(re,"username:$1 with ID: $2");
// result == username: User Full Name with ID: 1
It works perfectly now issue is I have multiple usernames in string, I tried using /g (global) but its not replacing properly:
Example string:
This is a string which has a #[User Full Name](contact:1) data inside. and it can also contain many other users data like #[Second Username](contact:2) and #[Third username](contact:3) and so many others....
when used global I get this result:
var re = /\#\[(.*)\]\(contact\:(\d+)\)/g;
text = text.replace(re,"username:$1 with ID: $2");
//RESULT from above
This is a string which has a user username; User Full Name](contact:1) data inside. and it can also contain many other users data like #[[Second Username](contact:2) and #[Third username and ID: 52 and so many others....

You just need a non greedy ? match in your first capture group. By having .* you are matching the most amount possible while if you use .*?, it matches the least amount possible.
/#\[(.*?)\]\(contact:(\d+)\)/
And if the word contact is not always there, you could do..
/#\[(.*?)\]\([^:]+:(\d+)\)/
See working demo

Can't say I can see how your resulting string is going to be usable. How about something like this...
var re = /#\[(.*?)\]\(contact:(\d+)\)/g;
var users = [];
var match = re.exec(text);
while (match !== null) {
users.push({
username: match[1],
id: match[2]
});
match = re.exec(text);
}

Related

Regular expression for extracting domain from an input

I'm trying to extract the domain.com from an input that can be in the following formats and structure:
1. x.x.domain.com
2. x.domain.com
Once I am getting user's email, for example:
user#x.x.domain.com
I am able to remove the first part of the email address:
user#
by the following regex:
/^.+#/
I want to be able by using the regex over the 2 formats to get the domain.com right away and not manipulate the input several times until getting the domain.
I thought maybe to count the number of dots from the input and then to do some logic, but it looks so complex for this small solution.
Thanks!
Without RegExp: split the e-mail address twice, slice from the last split and join the result. Plus two RegExp ideas. Take your pick.
const getDomain = address =>
address.split("#")[1].split(".").slice(-2).join(".");
const getDomainRE = address =>
address.match(/\.\w+/g).slice(-2).join("").slice(1);
const getDomainRE2 = address =>
address.match(/(?:(#|\.)\w+){2}$/)[0].slice(1);
console.log(getDomain("user#x.x.domain.com"));
console.log(getDomain("user#x.domain.com"));
console.log(getDomain("user#abc.x.y.z.domain.com"));
console.log(getDomainRE("user#x.x.domain.com"));
console.log(getDomainRE("user#x.domain.com"));
console.log(getDomainRE("user#abc.x.y.z.domain.com"));
console.log(getDomainRE2("user#x.x.domain.com"));
console.log(getDomainRE2("user#x.domain.com"));
console.log(getDomainRE2("user#abc.x.y.z.domain.com"));
Regex way:
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
let reg = /[^.]+\.[^.]+$/gi;
console.log(str1.match(reg)); // domain.com
console.log(str2.match(reg)); // google.com
console.log(str3.match(reg)); //yahoo.com
Simple javascript:
function getDomain(str){
let arr = str.split(".");
if(arr.length < 2){
console.log("Invalid domain name");
return null;
}
else{
return `${arr[arr.length-2]}.${arr[arr.length-1]}`;
}
}
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
console.log(getDomain(str1)); // domain.com
console.log(getDomain(str2)); // google.com
console.log(getDomain(str3)); //yahoo.com
This is regular expression for domain name
"[a-zA-Z0-9][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]\.[a-zA-Z]{2,}$"
should also manage x.x.domain.co.uk
original pattern uses
"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]"
Good luck
I know you didn't specifically ask for it, but you may want to consider country codes a top level domains as well (e.g. .au, .uk).
If required, you could achieve it with the following:
function getDomainFromEmail(domain) {
const domainExpression = /((\w+)?(\.\w+)(\.(ad|ae|af|ag|ai|al|am|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw))?)$/i;
const match = domainExpression.exec(domain);
return match ? match[1] : null;
}
console.log(getDomainFromEmail('email#example.com'));
console.log(getDomainFromEmail('email#sub.example.com'));
console.log(getDomainFromEmail('email#sub.sub.example.com'));
console.log(getDomainFromEmail('email#example.com.au'));
console.log(getDomainFromEmail('email#example.co.uk'));
console.log(getDomainFromEmail('email#sub.example.co.uk'));
console.log(getDomainFromEmail('email#sub.sub.example.co.uk'));
console.log(getDomainFromEmail('email#bit.ly'));
console.log(getDomainFromEmail('email#domain.other'));
console.log(getDomainFromEmail('email#nomatch'));
The long expression (ad|ae|...|zm|zw) is a list of country codes combined into a regular expression.
How about
(#).*
The first group of the output is # and the second group is domain.com

Finding multiple groups in one string

Figure the following string, it's a list of html a separated by commas. How to get a list of {href,title} that are between 'start' and 'end'?
not thisstartfoo, barendnot this
The following regex give only the last iteration of a.
/start((?:<a href="(?<href>.*?)" title="(?<title>.*?)">.*?<\/a>(?:, )?)+)end/g
How to have all the list?
This should give you what you need.
https://regex101.com/r/isYIeR/1
/(?:start)*(?:<a href=(?<href>.*?)\s+title=(?<title>.*?)>.*?<\/a>)+(?:,|end)
UPDATE
This does not meet the requirement.
The Returned Value for a Given Group is the Last One Captured
I do not think this can be done in one regex match. Here is a javascript solution with 2 regex matches to get a list of {href, title}
var sample='startfoo, bar,barendstart<img> something end\n' +
'beginfoo, bar,barend\n'+
'startfoo again, bar again,bar2 againend';
var reg = /start((?:\s*<a href=.*?\s+title=.*?>.*?<\/a>,?)+)end/gi;
var regex2 = /href=(?<href>.*?)\s+title=(?<title>.*?)>/gi;
var step1, step2 ;
var hrefList = [];
while( (step1 = reg.exec(sample)) !== null) {
while((step2 = regex2.exec(step1[1])) !== null) {
hrefList.push({href:step2.groups["href"], title:step2.groups["title"]});
}
}
console.log(hrefList);
If the format is constant - ie only href and title for each tag, you can use this regex to find a string which is not "", and has " and a space or < after it using lookahead (regex101):
const str = 'startfoo, barend';
const result = str.match(/[^"]+(?="[\s>])/gi);
console.log(result);
This regex:
<.*?>
removes all html tags
so for example
<h1>1. This is a title </h1><ul><a href='www.google.com'>2. Click here </a></ul>
After using regex you will get:
1. This is a title 2. Click here
Not sure if this answers your question though.

Javascript find and replace string with variable values

I have an angular app, but I have one page that needs to be pre-rendered with no javascript (for printing and PDF), some of the content is loaded with the Angular variable inputs {{field(10)}}
I pre-load the content but need a way to find and replace the string so that:
{{field(10)}}
is changed into the value of this
submission.inputs[10]
So for example:
var string = 'This is some content: {{field[10]}}';
var submission.inputs[10] = 'replace value';
I want the new string to be this
var newString = 'This is some content: replace value';
This is running in Node so it has the latest version of Javascript.
I tried this:
var newString = string.replace(/\{{(.+?)}}/g, submission.inputs[$1]);
But I don't think my syntax is correct.
Your current regex extracts all the text contained within {{}} with its capture group. But you only want the index of the replacement, which is contained within the [], and not the entire string itself. So you have two options:
Modify regex to capture only the index, so that would look like /{{field\[(.+?)\]}}/, where the capture group now only takes the number within the brackets.
Leave the original regex alone, but change the replace function to extract the number from the returned match. In this case you'll have a second regex (or some other method) to extract the number from the matched string (in this case, get "10" out of "field[10]").
Here's an example demonstrating both:
var string = 'This is some content: {{field[10]}}';
var submission = {inputs: []};
submission.inputs[10] = 'replace value';
// I want the new string to be this
// var newString = 'This is some content: replace value';
var newString = string.replace(/{{field\[(.+?)\]}}/g, (match, cap1) => submission.inputs[cap1]);
console.log(newString)
// OR:
var otherNewString = string.replace(/\{{(.+?)}}/g, (match, cap1) => submission.inputs[cap1.match(/\[(.+?)\]/)[1]]);
console.log(otherNewString)
You can use the following regex to extract the contents between {{field[ and ]}} as the snippet below shows. The snippet uses a callback in the replace function and passes the captured group's value to it so that an appropriate value may be returned (submission.inputs[b] where b is the number you want: 10 in this case).
{{[^[]+\[([^\]]+)]}}
{{ Match this literally
[^[]+ Match any character except [ one or more times
\[ Match [ literally
([^\]]+) Capture any character except ] one or more times into capture group 1. This is the value you want
]}} Match this literally
var string = 'This is some content: {{field[10]}}'
var submission = {inputs:[]}
submission.inputs[10] = 'replace value'
var newString = string.replace(/{{[^[]+\[([^\]]+)]}}/g, function(a, b) { return submission.inputs[b] })
console.log(newString)

Extract email address from string

I have a string like this:
Francesco Renga <francesco_renga-001#gmail.com>
I need to extract only the email, i.e. francesco_renga-001#gmail.com.
How can I do this in nodejs/javascript in "elegant" way?
Using regex, if your string pattern is always Some text<email> or Some text<email>, Some text<email> <email> you can use this simple one <(.*?)>
Demo
Other solution
Use positive lookahead : [^<]+(?=>), here is a snippet and a demo
var text = "Francesco Renga <francesco_renga-001#gmail.com>, Marty McFly <mmcfly#gmail.com> Marty McFly <mmcfly#gmail.com> <mmcfly2#gmail.com>";
var re = /[^< ]+(?=>)/g;
text.match(re).forEach(function(email) {
console.log(email);
});
Explanation
[^<]+ match anything but a <between one and unlimited times
(?=>) followed by a >
Simple and does not require any group.
Here's a simple example showing how to use regex in JavaScript :
var string = "Francesco Renga <francesco_renga-001#gmail.com>"; // Your string containing
var regex = /<(.*)>/g; // The actual regex
var matches = regex.exec(string);
console.log(matches[1]);
Here's the decomposition of the regex /<(.*)>/ :
/ and / are mandatory to define a regex
< and > simply matches the two < and > in your string
() parenthesis "capture" what you're looking for. Here, they get the mail address inside.
.* : . means "any character", and * means "any number of times. Combined, it means "any character any number of times", and that is inside < and >, which correspond to the place where the mail is.
Here's a simple code showing how extract the unique list of emails address using JavaScript :
let emaillst = string .match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
if (emaillst === null) {
// no Email Address Found
} else {
const uniqueEmail = Array.from(new Set(emaillst));
const finaluniqueEmail = [];
for(let i=0; i<=uniqueEmail.length; i++){
let characterIs = String(uniqueEmail[i]).charAt(String(uniqueEmail[i]).length - 1)
if(characterIs==='.'){
finaluniqueEmail.push(String(uniqueEmail[i].slice(0, -1)))
}else{
finaluniqueEmail.push(uniqueEmail[i]);
}
}
emaillst = finaluniqueEmail.join('\n').toLowerCase();
console.log(matches[1]);
See the Live Demo of email address extractor online
Features
Get Unique Emails
Auto remove duplicate emails
convert upper case email address to lowercase

regex for string to get customerID

I am looking for the word customerID number in a string. Customer id would be in this format customerID{id}
so look at some different strings I would have
myVar = "id: 1928763783.Customer Email: test#test.com.Customer Name:John Smith.CustomerID #123456.";
myVar = "id: 192783.Customer Email: test1#test.com.Customer Name:Rose Vil.CustomerID #193474.";
myVar = "id: 84374398.Customer Email: test2#test.com.Customer Name:James Yuem.";
Ideally I wanna be able to check if a CustomerID exists or not. If it does exists then I want to see what it is. I know we can use regext but not sure howd that look
thanks
var match = myVar.match(/CustomerID #(\d+)/);
if (match) id = match[1];
I'm not 100% farmiliar with the syntax but I'd say: "(CustomerID #([0-9]+).)"
I think this is a valid regular expression for what you're looking for, it would check if a string had 'CustomerID' followed by a space, a numer sign and then a sequence of numbers. By surrouding the numbers with brackets, they can be captured by refrencing bracket 2 if it found something
I'm not sure if the brackets or period need a \ before them in this syntax or not. Sorry I can't be of more help but I hope this helps in some way.
Play around to get this to work for your needs:
// case-insensitive regular expression (i indicates case-insensitive match)
// that looks for one of more spaces after customerid (if you want zero or more spaces, change + to *)
// optional # character (remove ? if you don't want optional)
// one or more digits thereafter, (you can specify how long of an id to expect with by replacing + with {length} or {min, max})
var regex = /CustomerID\s+#?(\d+)/i;
var myVar1 = "id: 1928763783.Customer Email: test#test.com.Customer Name:John Smith.CustomerID #123456.";
var match = myVar1.match(regex);
if(match) { // if no match, this will be null
console.log(match[1]); // match[0] is the full string, you want the second item in the array for your first group
}

Categories

Resources