Extract all email addresses from bulk text using jquery - javascript

I'm having the this text below:
sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>
Here emails are seprated by , or ;.
I want to extract all emails present above and store them in array. Is there any easy way using regex to get all emails directly?

Here's how you can approach this:
HTML
<p id="emails"></p>
JavaScript
var text = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmal.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com> datum eternus hello+11#gmail.com';
function extractEmails (text)
{
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
}
$("#emails").text(extractEmails(text).join('\n'));
Result
sdabhikagathara#rediffmail.com,dsfassdfhsdfarkal#gmail.com,rfernsdfson#gmal.com,gyfanamosl#gmail.com,pidfpinfg#truformdftechnoproducts.com,nthfsskare#ysahoo.in,akashkatsdfsa#yahsdfsfoo.in,bimsdaalprakash#live.com,dfdmilifsd.ensfdfcogndfdfatia#gmail.com,hello+11#gmail.com
Source: Extract email from bulk text (with Regular Expressions, JavaScript & jQuery)
Demo 1 Here
Demo 2 Here using jQuery's each iterator function

You can use this regex:
var re = /(([^<>()[\]\\.,;:\s#\"]+(\.[^<>()[\]\\.,;:\s#\"]+)*)|(\".+\"))#((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/g;
You can extract the e-mails like this:
('sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>').match(re);
//["sdabhikagathara#rediffmail.com", "dsfassdfhsdfarkal#gmail.com", "rfernsdfson#gmail.com", "gyfanamosl#gmail.com", "pidfpinfg#truformdftechnoproducts.com", "nthfsskare#ysahoo.in", "akashkatsdfsa#yahsdfsfoo.in", "bimsdaalprakash#live.com", "dfdmilifsd.ensfdfcogndfdfatia#gmail.com"]

Just an update to the accepted answer. This does not work for "plus" signs in the email address. GMAIL supports emailaddress+randomtext#gmail.com.
I've updated to:
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);

The bellow function is RFC2822 compliant according to Regexr.com
ES5 :
var extract = function(value) {
var reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g;
return value && value.match(reg);
}
ES6 :
const reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g
const extract = value => value && value.match(reg)
Regexr community source

function GetEmailsFromString(input) {
var ret = [];
var email = /\"([^\"]+)\"\s+\<([^\>]+)\>/g
var match;
while (match = email.exec(input))
ret.push({'name':match[1], 'email':match[2]})
return ret;
}
var str = '"Name one" <foo#domain.com>, ..., "And so on" <andsoon#gmx.net>'
var emails = GetEmailsFromString(str)
Source

You don't need jQuery for that; JavaScript itself supports regexes built-in.
Have a look at Regular Expression for more info on using regex with JavaScript.
Other than that, I think you'll find the exact answer to your question somewhere else on Stack Overflow - How to find out emails and names out of a string in javascript

const = regex = /\S+[a-z0-9]#[a-z0-9\.]+/img
"hello sean#example.com how are you? do you know bob#example.com?".match(regex)

A bunch of the answer in here are including lower/capital letters [a-zA-Z] AND the insensitive regex flag i, which is nonsense.
i modifier: insensitive. Case insensitive match (ignores case of [a-zA-Z]).
\d matches a digit (equivalent to [0-9])As domain extensions don't end with numeric characters).
As a result, combined with the \d token. we get a much more condenses and elegant sentence.
/[a-z\d._+-]+#[a-z\d._-]+/gi
Demo
let input = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>'
function get_email(string) {
return string.match(/[a-z\d._+-]+#[a-z\d._-]+/gi)
};
$('#output').html(get_email(input).join('; '));
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div id="output"></div>
See it live # https://regex101.com/r/OveC5B/1/

Related

Regex match cookie value and remove hyphens

I'm trying to extract out a group of words from a larger string/cookie that are separated by hyphens. I would like to replace the hyphens with a space and set to a variable. Javascript or jQuery.
As an example, the larger string has a name and value like this within it:
facility=34222%7CConner-Department-Store;
(notice the leading "C")
So first, I need to match()/find facility=34222%7CConner-Department-Store; with regex. Then break it down to "Conner Department Store"
var cookie = document.cookie;
var facilityValue = cookie.match( REGEX ); ??
var test = "store=874635%7Csomethingelse;facility=34222%7CConner-Department-Store;store=874635%7Csomethingelse;";
var test2 = test.replace(/^(.*)facility=([^;]+)(.*)$/, function(matchedString, match1, match2, match3){
return decodeURIComponent(match2);
});
console.log( test2 );
console.log( test2.split('|')[1].replace(/[-]/g, ' ') );
If I understood it correctly, you want to make a phrase by getting all the words between hyphens and disallowing two successive Uppercase letters in a word, so I'd prefer using Regex in that case.
This is a Regex solution, that works dynamically with any cookies in the same format and extract the wanted sentence from it:
var matches = str.match(/([A-Z][a-z]+)-?/g);
console.log(matches.map(function(m) {
return m.replace('-', '');
}).join(" "));
Demo:
var str = "facility=34222%7CConner-Department-Store;";
var matches = str.match(/([A-Z][a-z]+)-?/g);
console.log(matches.map(function(m) {
return m.replace('-', '');
}).join(" "));
Explanation:
Use this Regex (/([A-Z][a-z]+)-?/g to match the words between -.
Replace any - occurence in the matched words.
Then just join these matches array with white space.
Ok,
first, you should decode this string as follows:
var str = "facility=34222%7CConner-Department-Store;"
var decoded = decodeURIComponent(str);
// decoded = "facility=34222|Conner-Department-Store;"
Then you have multiple possibilities to split up this string.
The easiest way is to use substring()
var solution1 = decoded.substring(decoded.indexOf('|') + 1, decoded.length)
// solution1 = "Conner-Department-Store;"
solution1 = solution1.replace('-', ' ');
// solution1 = "Conner Department Store;"
As you can see, substring(arg1, arg2) returns the string, starting at index arg1 and ending at index arg2. See Full Documentation here
If you want to cut the last ; just set decoded.length - 1 as arg2 in the snippet above.
decoded.substring(decoded.indexOf('|') + 1, decoded.length - 1)
//returns "Conner-Department-Store"
or all above in just one line:
decoded.substring(decoded.indexOf('|') + 1, decoded.length - 1).replace('-', ' ')
If you want still to use a regular Expression to retrieve (perhaps more) data out of the string, you could use something similar to this snippet:
var solution2 = "";
var regEx= /([A-Za-z]*)=([0-9]*)\|(\S[^:\/?#\[\]\#\;\,']*)/;
if (regEx.test(decoded)) {
solution2 = decoded.match(regEx);
/* returns
[0:"facility=34222|Conner-Department-Store",
1:"facility",
2:"34222",
3:"Conner-Department-Store",
index:0,
input:"facility=34222|Conner-Department-Store;"
length:4] */
solution2 = solution2[3].replace('-', ' ');
// "Conner Department Store"
}
I have applied some rules for the regex to work, feel free to modify them according your needs.
facility can be any Word built with alphabetical characters lower and uppercase (no other chars) at any length
= needs to be the char =
34222 can be any number but no other characters
| needs to be the char |
Conner-Department-Store can be any characters except one of the following (reserved delimiters): :/?#[]#;,'
Hope this helps :)
edit: to find only the part
facility=34222%7CConner-Department-Store; just modify the regex to
match facility= instead of ([A-z]*)=:
/(facility)=([0-9]*)\|(\S[^:\/?#\[\]\#\;\,']*)/
You can use cookies.js, a mini framework from MDN (Mozilla Developer Network).
Simply include the cookies.js file in your application, and write:
docCookies.getItem("Connor Department Store");

Extract email address from string

I have a string like this:
Francesco Renga <francesco_renga-001#gmail.com>
I need to extract only the email, i.e. francesco_renga-001#gmail.com.
How can I do this in nodejs/javascript in "elegant" way?
Using regex, if your string pattern is always Some text<email> or Some text<email>, Some text<email> <email> you can use this simple one <(.*?)>
Demo
Other solution
Use positive lookahead : [^<]+(?=>), here is a snippet and a demo
var text = "Francesco Renga <francesco_renga-001#gmail.com>, Marty McFly <mmcfly#gmail.com> Marty McFly <mmcfly#gmail.com> <mmcfly2#gmail.com>";
var re = /[^< ]+(?=>)/g;
text.match(re).forEach(function(email) {
console.log(email);
});
Explanation
[^<]+ match anything but a <between one and unlimited times
(?=>) followed by a >
Simple and does not require any group.
Here's a simple example showing how to use regex in JavaScript :
var string = "Francesco Renga <francesco_renga-001#gmail.com>"; // Your string containing
var regex = /<(.*)>/g; // The actual regex
var matches = regex.exec(string);
console.log(matches[1]);
Here's the decomposition of the regex /<(.*)>/ :
/ and / are mandatory to define a regex
< and > simply matches the two < and > in your string
() parenthesis "capture" what you're looking for. Here, they get the mail address inside.
.* : . means "any character", and * means "any number of times. Combined, it means "any character any number of times", and that is inside < and >, which correspond to the place where the mail is.
Here's a simple code showing how extract the unique list of emails address using JavaScript :
let emaillst = string .match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
if (emaillst === null) {
// no Email Address Found
} else {
const uniqueEmail = Array.from(new Set(emaillst));
const finaluniqueEmail = [];
for(let i=0; i<=uniqueEmail.length; i++){
let characterIs = String(uniqueEmail[i]).charAt(String(uniqueEmail[i]).length - 1)
if(characterIs==='.'){
finaluniqueEmail.push(String(uniqueEmail[i].slice(0, -1)))
}else{
finaluniqueEmail.push(uniqueEmail[i]);
}
}
emaillst = finaluniqueEmail.join('\n').toLowerCase();
console.log(matches[1]);
See the Live Demo of email address extractor online
Features
Get Unique Emails
Auto remove duplicate emails
convert upper case email address to lowercase

Filtering a regular expression in string with Javascript

I have a javascript string with some contact information I would like to filter. In example, if an email or phone is written, these should be replaced with a mask.
I´m trying to do something like:
function(message) {
var filteredMessage = message.replace("/^([\da-z_\.-]+)#([\da-z\.-]+)\.([a-z\.]{2,6})$/", "<Email>");
return filteredMessage;
}
but this is not working.
You may try this:
function filterMessage(message) {
var m = message.replace(/[\da-z_\.-]+#[\da-z\.-]+\.[a-z\.]{2,6}/, "<Email>");
return m;
}
alert(filterMessage("Your Email: bla#fasel.de lorem Ipsum dolor"));
Working fiddle here: http://jsfiddle.net/ochxdkam/
Issues with your script:
No function name
Regular Expression in ", must be without
I prefer to use a very loose regex to search for email addresses, because there are so many variations in how email addresses are formatted. You might try, for example:
/\S+#\S+\.\S+/
This will match a#b.c where one or more non-whitespace characters are allowed in place of a, b, and c.
You could use this in place of the regex in strah's answer, just posted.
Try this:
function filterMessage(message) {
//var reg = /^([\da-z_\.-]+)#([\da-z\.-]+)\.([a-z\.]{2,6})$/;
var reg = /\b(\S+#\S+\.\S+)\b/g;
var filteredMessage = message.replace(reg, "<Email>");
return filteredMessage;
}
filterMessage("email.example.com will be replaced, so will be next#example.com");
or you could use this regExp: /\b(\S+#\S+\.\S+)\b/g (from Kieran Pot's answer) and then you could use your function to do a little better filtering.

Javascript Regex to get text between certain characters

I need a regex in Javascript that would allow me to match an order number in two different formats of order URL:
The URLs:
http://store.apple.com/vieworder/1003123464/test#test.com
http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A=
M-104121
The first one will always be all numbers, and the second one will always start with a W, followed by just numbers.
I need to be able to use a single regex to return these matches:
1003123464
W411234368
This is what I've tried so far:
/(vieworder\/)(.*?)(?=\/)/g
RegExr link
That allows me to match:
vieworder/1003123464
vieworder/W411234368
but I'd like it to not include the first capture group.
I know I could then run the result through a string.replace('vieworder/'), but it'd be cool to be able to do this in just one command.
Use your expression without grouping vieworder
vieworder\/(.*?)(?=\/)
DEMO
var string = 'http://store.apple.com/vieworder/1003123464/test#test.com http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A=M-104121';
var myRegEx = /vieworder\/(.*?)(?=\/)/g;
var index = 1;
var matches = [];
var match;
while (match = myRegEx.exec(string)) {
matches.push(match[index]);
}
console.log(matches);
Use replace instead of match since js won't support lookbehinds. You could use capturing groups and exec method to print the chars present inside a particular group.
> var s1 = 'http://store.apple.com/vieworder/1003123464/test#test.com'
undefined
> var s2 = 'http://store.apple.com/vieworder/W411234368/test#test.com/AOS-A='
undefined
> s1.replace(/^.*?vieworder\/|\/.*/g, '')
'1003123464'
> s2.replace(/^.*?vieworder\/|\/.*/g, '')
'W411234368'
OR
> s1.replace(/^.*?\bvieworder\/([^\/]*)\/.*/g, '$1')
'1003123464'
I'd suggest
W?\d+
That ought to translate to "one or zero W and one or more digits".

Javascript regex to bring back all symbol matches?

I need a javascript regex object that brings back any matches of symbols in a string,
take for example the following string:
input = !"£$[]{}%^&*:#\~#';/.,<>\|¬`
then the following code:
input.match(regExObj,"g");
would return an array of matches:
[[,!,",£,$,%,^,&,*,:,#,~,#,',;,/,.,,,<,>,\,|,¬,`,]]
I have tried the following with no luck.
match(/[U+0021-U+0027]/g);
and I cannot use the following because I need to allow none ascii chars, for example Chinese characters.
[^0-9a-zA-Z\s]
var re = /[!"\[\]{}%^&*:#~#';/.<>\\|`]/g;
var matches = [];
var someString = "aejih!\"£$[]{}%^&*:#\~#';/.,<>\\|¬`oejtoj%";
while(match = re.exec(someString)) {
matches.push(match[1]);
}
Getting
['!','"','[',']','{','}','%','^','&','*',':','#','~','#',''',';','/','.','<','>','\','|','`','%]
What about
/[!"£$\[\]{}%^&*:#\\~#';\/.,<>|¬`]/g
?

Categories

Resources