Extract email address from string - javascript

I have a string like this:
Francesco Renga <francesco_renga-001#gmail.com>
I need to extract only the email, i.e. francesco_renga-001#gmail.com.
How can I do this in nodejs/javascript in "elegant" way?

Using regex, if your string pattern is always Some text<email> or Some text<email>, Some text<email> <email> you can use this simple one <(.*?)>
Demo
Other solution
Use positive lookahead : [^<]+(?=>), here is a snippet and a demo
var text = "Francesco Renga <francesco_renga-001#gmail.com>, Marty McFly <mmcfly#gmail.com> Marty McFly <mmcfly#gmail.com> <mmcfly2#gmail.com>";
var re = /[^< ]+(?=>)/g;
text.match(re).forEach(function(email) {
console.log(email);
});
Explanation
[^<]+ match anything but a <between one and unlimited times
(?=>) followed by a >
Simple and does not require any group.

Here's a simple example showing how to use regex in JavaScript :
var string = "Francesco Renga <francesco_renga-001#gmail.com>"; // Your string containing
var regex = /<(.*)>/g; // The actual regex
var matches = regex.exec(string);
console.log(matches[1]);
Here's the decomposition of the regex /<(.*)>/ :
/ and / are mandatory to define a regex
< and > simply matches the two < and > in your string
() parenthesis "capture" what you're looking for. Here, they get the mail address inside.
.* : . means "any character", and * means "any number of times. Combined, it means "any character any number of times", and that is inside < and >, which correspond to the place where the mail is.

Here's a simple code showing how extract the unique list of emails address using JavaScript :
let emaillst = string .match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
if (emaillst === null) {
// no Email Address Found
} else {
const uniqueEmail = Array.from(new Set(emaillst));
const finaluniqueEmail = [];
for(let i=0; i<=uniqueEmail.length; i++){
let characterIs = String(uniqueEmail[i]).charAt(String(uniqueEmail[i]).length - 1)
if(characterIs==='.'){
finaluniqueEmail.push(String(uniqueEmail[i].slice(0, -1)))
}else{
finaluniqueEmail.push(uniqueEmail[i]);
}
}
emaillst = finaluniqueEmail.join('\n').toLowerCase();
console.log(matches[1]);
See the Live Demo of email address extractor online
Features
Get Unique Emails
Auto remove duplicate emails
convert upper case email address to lowercase

Related

Regular expression for extracting domain from an input

I'm trying to extract the domain.com from an input that can be in the following formats and structure:
1. x.x.domain.com
2. x.domain.com
Once I am getting user's email, for example:
user#x.x.domain.com
I am able to remove the first part of the email address:
user#
by the following regex:
/^.+#/
I want to be able by using the regex over the 2 formats to get the domain.com right away and not manipulate the input several times until getting the domain.
I thought maybe to count the number of dots from the input and then to do some logic, but it looks so complex for this small solution.
Thanks!
Without RegExp: split the e-mail address twice, slice from the last split and join the result. Plus two RegExp ideas. Take your pick.
const getDomain = address =>
address.split("#")[1].split(".").slice(-2).join(".");
const getDomainRE = address =>
address.match(/\.\w+/g).slice(-2).join("").slice(1);
const getDomainRE2 = address =>
address.match(/(?:(#|\.)\w+){2}$/)[0].slice(1);
console.log(getDomain("user#x.x.domain.com"));
console.log(getDomain("user#x.domain.com"));
console.log(getDomain("user#abc.x.y.z.domain.com"));
console.log(getDomainRE("user#x.x.domain.com"));
console.log(getDomainRE("user#x.domain.com"));
console.log(getDomainRE("user#abc.x.y.z.domain.com"));
console.log(getDomainRE2("user#x.x.domain.com"));
console.log(getDomainRE2("user#x.domain.com"));
console.log(getDomainRE2("user#abc.x.y.z.domain.com"));
Regex way:
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
let reg = /[^.]+\.[^.]+$/gi;
console.log(str1.match(reg)); // domain.com
console.log(str2.match(reg)); // google.com
console.log(str3.match(reg)); //yahoo.com
Simple javascript:
function getDomain(str){
let arr = str.split(".");
if(arr.length < 2){
console.log("Invalid domain name");
return null;
}
else{
return `${arr[arr.length-2]}.${arr[arr.length-1]}`;
}
}
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
console.log(getDomain(str1)); // domain.com
console.log(getDomain(str2)); // google.com
console.log(getDomain(str3)); //yahoo.com
This is regular expression for domain name
"[a-zA-Z0-9][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]\.[a-zA-Z]{2,}$"
should also manage x.x.domain.co.uk
original pattern uses
"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]"
Good luck
I know you didn't specifically ask for it, but you may want to consider country codes a top level domains as well (e.g. .au, .uk).
If required, you could achieve it with the following:
function getDomainFromEmail(domain) {
const domainExpression = /((\w+)?(\.\w+)(\.(ad|ae|af|ag|ai|al|am|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw))?)$/i;
const match = domainExpression.exec(domain);
return match ? match[1] : null;
}
console.log(getDomainFromEmail('email#example.com'));
console.log(getDomainFromEmail('email#sub.example.com'));
console.log(getDomainFromEmail('email#sub.sub.example.com'));
console.log(getDomainFromEmail('email#example.com.au'));
console.log(getDomainFromEmail('email#example.co.uk'));
console.log(getDomainFromEmail('email#sub.example.co.uk'));
console.log(getDomainFromEmail('email#sub.sub.example.co.uk'));
console.log(getDomainFromEmail('email#bit.ly'));
console.log(getDomainFromEmail('email#domain.other'));
console.log(getDomainFromEmail('email#nomatch'));
The long expression (ad|ae|...|zm|zw) is a list of country codes combined into a regular expression.
How about
(#).*
The first group of the output is # and the second group is domain.com

Regex match cookie value and remove hyphens

I'm trying to extract out a group of words from a larger string/cookie that are separated by hyphens. I would like to replace the hyphens with a space and set to a variable. Javascript or jQuery.
As an example, the larger string has a name and value like this within it:
facility=34222%7CConner-Department-Store;
(notice the leading "C")
So first, I need to match()/find facility=34222%7CConner-Department-Store; with regex. Then break it down to "Conner Department Store"
var cookie = document.cookie;
var facilityValue = cookie.match( REGEX ); ??
var test = "store=874635%7Csomethingelse;facility=34222%7CConner-Department-Store;store=874635%7Csomethingelse;";
var test2 = test.replace(/^(.*)facility=([^;]+)(.*)$/, function(matchedString, match1, match2, match3){
return decodeURIComponent(match2);
});
console.log( test2 );
console.log( test2.split('|')[1].replace(/[-]/g, ' ') );
If I understood it correctly, you want to make a phrase by getting all the words between hyphens and disallowing two successive Uppercase letters in a word, so I'd prefer using Regex in that case.
This is a Regex solution, that works dynamically with any cookies in the same format and extract the wanted sentence from it:
var matches = str.match(/([A-Z][a-z]+)-?/g);
console.log(matches.map(function(m) {
return m.replace('-', '');
}).join(" "));
Demo:
var str = "facility=34222%7CConner-Department-Store;";
var matches = str.match(/([A-Z][a-z]+)-?/g);
console.log(matches.map(function(m) {
return m.replace('-', '');
}).join(" "));
Explanation:
Use this Regex (/([A-Z][a-z]+)-?/g to match the words between -.
Replace any - occurence in the matched words.
Then just join these matches array with white space.
Ok,
first, you should decode this string as follows:
var str = "facility=34222%7CConner-Department-Store;"
var decoded = decodeURIComponent(str);
// decoded = "facility=34222|Conner-Department-Store;"
Then you have multiple possibilities to split up this string.
The easiest way is to use substring()
var solution1 = decoded.substring(decoded.indexOf('|') + 1, decoded.length)
// solution1 = "Conner-Department-Store;"
solution1 = solution1.replace('-', ' ');
// solution1 = "Conner Department Store;"
As you can see, substring(arg1, arg2) returns the string, starting at index arg1 and ending at index arg2. See Full Documentation here
If you want to cut the last ; just set decoded.length - 1 as arg2 in the snippet above.
decoded.substring(decoded.indexOf('|') + 1, decoded.length - 1)
//returns "Conner-Department-Store"
or all above in just one line:
decoded.substring(decoded.indexOf('|') + 1, decoded.length - 1).replace('-', ' ')
If you want still to use a regular Expression to retrieve (perhaps more) data out of the string, you could use something similar to this snippet:
var solution2 = "";
var regEx= /([A-Za-z]*)=([0-9]*)\|(\S[^:\/?#\[\]\#\;\,']*)/;
if (regEx.test(decoded)) {
solution2 = decoded.match(regEx);
/* returns
[0:"facility=34222|Conner-Department-Store",
1:"facility",
2:"34222",
3:"Conner-Department-Store",
index:0,
input:"facility=34222|Conner-Department-Store;"
length:4] */
solution2 = solution2[3].replace('-', ' ');
// "Conner Department Store"
}
I have applied some rules for the regex to work, feel free to modify them according your needs.
facility can be any Word built with alphabetical characters lower and uppercase (no other chars) at any length
= needs to be the char =
34222 can be any number but no other characters
| needs to be the char |
Conner-Department-Store can be any characters except one of the following (reserved delimiters): :/?#[]#;,'
Hope this helps :)
edit: to find only the part
facility=34222%7CConner-Department-Store; just modify the regex to
match facility= instead of ([A-z]*)=:
/(facility)=([0-9]*)\|(\S[^:\/?#\[\]\#\;\,']*)/
You can use cookies.js, a mini framework from MDN (Mozilla Developer Network).
Simply include the cookies.js file in your application, and write:
docCookies.getItem("Connor Department Store");

Extract word between '=' and '('

I have the following string
234234=AWORDHERE('sdf.'aa')
where I need to extract AWORDHERE.
Sometimes there can be space in between.
234234= AWORDHERE('sdf.'aa')
Can I do this with a regular expression?
Or should I do it manually by finding indexes?
The datasets are huge, so it's important to do it as fast as possible.
Try this regex:
\d+=\s?(\w+)\(
Check Demo
in Javascript it would like that:
var myString = "234234=AWORDHERE('sdf.'aa')";// or 234234= AWORDHERE('sdf.'aa')
var myRegexp = /\d+=\s?(\w+)\(/g;
var match = myRegexp.exec(myString);
console.log(match[1]); // AWORDHERE
You could do this at least three ways. You need to benchmark to see what's fastest.
Substring w/ indexes
function extract(from) {
var ixEq = from.indexOf("=");
var ixParen = from.indexOf("(");
return from.substring(ixEq + 1, ixParen);
}
.
Splits
function extract(from) {
var spEq = from.split("=");
var spParen = spEq[1].split("(");
return spParen[0];
}
Regex (demo)
Here is some sample regex you could use
/[^=]+=([^(]+).*/g
This says
[^=]+ - One or more character which is not an =
= - The = itself
( - creates a matching group so you can access your match in code
[^(]+ - One or more character which is not a (
) - closes the matching group
.* - Matches the rest of the line
the /g on the end tells it to perform the match on all lines.
Using look around you can search for string preceded by = and followed by ( as following.
Regex: (?<==)[A-Z ]+(?=\()
Explanation:
(?<==) checks if [A-Z ] is preceded by an =.
[A-Z ]+ matches your pattern.
(?=\() checks if matched pattern is followed by a (.
Regex101 Demo
var str = "234234= AWORDHERE('sdf.'aa')";
var regexp = /.*=\s+(\w+)\(.*\)/g;
var match = regexp.exec(str);
alert( match[1] );
I made my solution for this just a little more general than you asked for, but I don't think it takes much more time to execute. I didn't measure. If you need greater efficiency than this provides, comment and I or someone else can help you with that.
Here's what I did, using the command prompt of node:
> var s = "234234= AWORDHERE('sdf.'aa')"
undefined
> var a = s.match(/(\w+)=\s*(\w+)\s*\(.*/)
undefined
> a
[ '234234= AWORDHERE(\'sdf.\'aa\')',
'234234',
'AWORDHERE',
index: 0,
input: '234234= AWORDHERE(\'sdf.\'aa\')' ]
>
As you can see, this matches the number before the = in a[1], and it matches the AWORDHERE name as you requested in a[2]. This will work with any number (including zero) spaces before and/or after the =.

Javascript Regex to get text between certain characters

I need a regex in Javascript that would allow me to match an order number in two different formats of order URL:
The URLs:
http://store.apple.com/vieworder/1003123464/test#test.com
http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A=
M-104121
The first one will always be all numbers, and the second one will always start with a W, followed by just numbers.
I need to be able to use a single regex to return these matches:
1003123464
W411234368
This is what I've tried so far:
/(vieworder\/)(.*?)(?=\/)/g
RegExr link
That allows me to match:
vieworder/1003123464
vieworder/W411234368
but I'd like it to not include the first capture group.
I know I could then run the result through a string.replace('vieworder/'), but it'd be cool to be able to do this in just one command.
Use your expression without grouping vieworder
vieworder\/(.*?)(?=\/)
DEMO
var string = 'http://store.apple.com/vieworder/1003123464/test#test.com http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A=M-104121';
var myRegEx = /vieworder\/(.*?)(?=\/)/g;
var index = 1;
var matches = [];
var match;
while (match = myRegEx.exec(string)) {
matches.push(match[index]);
}
console.log(matches);
Use replace instead of match since js won't support lookbehinds. You could use capturing groups and exec method to print the chars present inside a particular group.
> var s1 = 'http://store.apple.com/vieworder/1003123464/test#test.com'
undefined
> var s2 = 'http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A='
undefined
> s1.replace(/^.*?vieworder\/|\/.*/g, '')
'1003123464'
> s2.replace(/^.*?vieworder\/|\/.*/g, '')
'W411234368'
OR
> s1.replace(/^.*?\bvieworder\/([^\/]*)\/.*/g, '$1')
'1003123464'
I'd suggest
W?\d+
That ought to translate to "one or zero W and one or more digits".

Extract all email addresses from bulk text using jquery

I'm having the this text below:
sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>
Here emails are seprated by , or ;.
I want to extract all emails present above and store them in array. Is there any easy way using regex to get all emails directly?
Here's how you can approach this:
HTML
<p id="emails"></p>
JavaScript
var text = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmal.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com> datum eternus hello+11#gmail.com';
function extractEmails (text)
{
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
}
$("#emails").text(extractEmails(text).join('\n'));
Result
sdabhikagathara#rediffmail.com,dsfassdfhsdfarkal#gmail.com,rfernsdfson#gmal.com,gyfanamosl#gmail.com,pidfpinfg#truformdftechnoproducts.com,nthfsskare#ysahoo.in,akashkatsdfsa#yahsdfsfoo.in,bimsdaalprakash#live.com,dfdmilifsd.ensfdfcogndfdfatia#gmail.com,hello+11#gmail.com
Source: Extract email from bulk text (with Regular Expressions, JavaScript & jQuery)
Demo 1 Here
Demo 2 Here using jQuery's each iterator function
You can use this regex:
var re = /(([^<>()[\]\\.,;:\s#\"]+(\.[^<>()[\]\\.,;:\s#\"]+)*)|(\".+\"))#((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/g;
You can extract the e-mails like this:
('sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>').match(re);
//["sdabhikagathara#rediffmail.com", "dsfassdfhsdfarkal#gmail.com", "rfernsdfson#gmail.com", "gyfanamosl#gmail.com", "pidfpinfg#truformdftechnoproducts.com", "nthfsskare#ysahoo.in", "akashkatsdfsa#yahsdfsfoo.in", "bimsdaalprakash#live.com", "dfdmilifsd.ensfdfcogndfdfatia#gmail.com"]
Just an update to the accepted answer. This does not work for "plus" signs in the email address. GMAIL supports emailaddress+randomtext#gmail.com.
I've updated to:
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
The bellow function is RFC2822 compliant according to Regexr.com
ES5 :
var extract = function(value) {
var reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g;
return value && value.match(reg);
}
ES6 :
const reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g
const extract = value => value && value.match(reg)
Regexr community source
function GetEmailsFromString(input) {
var ret = [];
var email = /\"([^\"]+)\"\s+\<([^\>]+)\>/g
var match;
while (match = email.exec(input))
ret.push({'name':match[1], 'email':match[2]})
return ret;
}
var str = '"Name one" <foo#domain.com>, ..., "And so on" <andsoon#gmx.net>'
var emails = GetEmailsFromString(str)
Source
You don't need jQuery for that; JavaScript itself supports regexes built-in.
Have a look at Regular Expression for more info on using regex with JavaScript.
Other than that, I think you'll find the exact answer to your question somewhere else on Stack Overflow - How to find out emails and names out of a string in javascript
const = regex = /\S+[a-z0-9]#[a-z0-9\.]+/img
"hello sean#example.com how are you? do you know bob#example.com?".match(regex)
A bunch of the answer in here are including lower/capital letters [a-zA-Z] AND the insensitive regex flag i, which is nonsense.
i modifier: insensitive. Case insensitive match (ignores case of [a-zA-Z]).
\d matches a digit (equivalent to [0-9])As domain extensions don't end with numeric characters).
As a result, combined with the \d token. we get a much more condenses and elegant sentence.
/[a-z\d._+-]+#[a-z\d._-]+/gi
Demo
let input = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>'
function get_email(string) {
return string.match(/[a-z\d._+-]+#[a-z\d._-]+/gi)
};
$('#output').html(get_email(input).join('; '));
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div id="output"></div>
See it live # https://regex101.com/r/OveC5B/1/

Categories

Resources