Regular expression for extracting domain from an input - javascript

I'm trying to extract the domain.com from an input that can be in the following formats and structure:
1. x.x.domain.com
2. x.domain.com
Once I am getting user's email, for example:
user#x.x.domain.com
I am able to remove the first part of the email address:
user#
by the following regex:
/^.+#/
I want to be able by using the regex over the 2 formats to get the domain.com right away and not manipulate the input several times until getting the domain.
I thought maybe to count the number of dots from the input and then to do some logic, but it looks so complex for this small solution.
Thanks!

Without RegExp: split the e-mail address twice, slice from the last split and join the result. Plus two RegExp ideas. Take your pick.
const getDomain = address =>
address.split("#")[1].split(".").slice(-2).join(".");
const getDomainRE = address =>
address.match(/\.\w+/g).slice(-2).join("").slice(1);
const getDomainRE2 = address =>
address.match(/(?:(#|\.)\w+){2}$/)[0].slice(1);
console.log(getDomain("user#x.x.domain.com"));
console.log(getDomain("user#x.domain.com"));
console.log(getDomain("user#abc.x.y.z.domain.com"));
console.log(getDomainRE("user#x.x.domain.com"));
console.log(getDomainRE("user#x.domain.com"));
console.log(getDomainRE("user#abc.x.y.z.domain.com"));
console.log(getDomainRE2("user#x.x.domain.com"));
console.log(getDomainRE2("user#x.domain.com"));
console.log(getDomainRE2("user#abc.x.y.z.domain.com"));

Regex way:
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
let reg = /[^.]+\.[^.]+$/gi;
console.log(str1.match(reg)); // domain.com
console.log(str2.match(reg)); // google.com
console.log(str3.match(reg)); //yahoo.com
Simple javascript:
function getDomain(str){
let arr = str.split(".");
if(arr.length < 2){
console.log("Invalid domain name");
return null;
}
else{
return `${arr[arr.length-2]}.${arr[arr.length-1]}`;
}
}
let str1 = "x.domain.com";
let str2 = "x.sdfsdf.google.com";
let str3 = "www.subdomain.yahoo.com";
console.log(getDomain(str1)); // domain.com
console.log(getDomain(str2)); // google.com
console.log(getDomain(str3)); //yahoo.com

This is regular expression for domain name
"[a-zA-Z0-9][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]\.[a-zA-Z]{2,}$"
should also manage x.x.domain.co.uk
original pattern uses
"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]"
Good luck

I know you didn't specifically ask for it, but you may want to consider country codes a top level domains as well (e.g. .au, .uk).
If required, you could achieve it with the following:
function getDomainFromEmail(domain) {
const domainExpression = /((\w+)?(\.\w+)(\.(ad|ae|af|ag|ai|al|am|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw))?)$/i;
const match = domainExpression.exec(domain);
return match ? match[1] : null;
}
console.log(getDomainFromEmail('email#example.com'));
console.log(getDomainFromEmail('email#sub.example.com'));
console.log(getDomainFromEmail('email#sub.sub.example.com'));
console.log(getDomainFromEmail('email#example.com.au'));
console.log(getDomainFromEmail('email#example.co.uk'));
console.log(getDomainFromEmail('email#sub.example.co.uk'));
console.log(getDomainFromEmail('email#sub.sub.example.co.uk'));
console.log(getDomainFromEmail('email#bit.ly'));
console.log(getDomainFromEmail('email#domain.other'));
console.log(getDomainFromEmail('email#nomatch'));
The long expression (ad|ae|...|zm|zw) is a list of country codes combined into a regular expression.

How about
(#).*
The first group of the output is # and the second group is domain.com

Related

Partially hide email address with RegEx and Javascript

I'm trying to using RegEx to hide email addresses, except for the first two characters and the email domain.
The function, in that case, is replacing the characters that I want to keep.
email.replace(/^[A-Za-z]{2}/, "**" ).replace(/#[^:]*/, "**" )
What I get: email#domain.com > **ail**
Expected: em***#domain.com
Anyone here who knows how can I change my RegEx to get the expected result?
Thanks!
I could only achieve that with a function in the replace. Not sure if that can be achieved with only regex tho. Check it out:
let hideEmail = function(email) {
return email.replace(/(.{2})(.*)(?=#)/,
function(gp1, gp2, gp3) {
for(let i = 0; i < gp3.length; i++) {
gp2+= "*";
} return gp2;
});
};
document.querySelector("button").addEventListener("click", function() {
let emailField = document.querySelector("input");
console.log(hideEmail(emailField.value));
});
<input type="email" value="abcdef#gmail.com">
<button>Hide e-mail</button>
This worked perfectly for me:
const email = "username#domain.tld"
const partialEmail = email.replace(/(\w{3})[\w.-]+#([\w.]+\w)/, "$1***#$2")
console.log(partialEmail)
It captures first 3 characters (just replace the number as per your needs) in 1st group and the domain in 2nd.
You could try this, show first two characters and the rest always *** in the user part:
var email = "exampleexample#example.com";
let hide = email.split("#")[0].length - 2;//<-- number of characters to hide
var r = new RegExp(".{"+hide+"}#", "g")
email = email.replace(r, "***#" );
console.log(email)
I don't know if the three * is a requirement, but I think is a good idea, because you hide the real length.
You can also have this,(?<=^[A-Za-z0-9]{2}).*?(?=#)
Demo
I found a solution in flutter of dart
"example#gmail.com".replaceRange(
0,
"example#gmail.com"
.indexOf("#") -
3,
"****")
It will show result like that-
****ple#gmail.com

Extract email address from string

I have a string like this:
Francesco Renga <francesco_renga-001#gmail.com>
I need to extract only the email, i.e. francesco_renga-001#gmail.com.
How can I do this in nodejs/javascript in "elegant" way?
Using regex, if your string pattern is always Some text<email> or Some text<email>, Some text<email> <email> you can use this simple one <(.*?)>
Demo
Other solution
Use positive lookahead : [^<]+(?=>), here is a snippet and a demo
var text = "Francesco Renga <francesco_renga-001#gmail.com>, Marty McFly <mmcfly#gmail.com> Marty McFly <mmcfly#gmail.com> <mmcfly2#gmail.com>";
var re = /[^< ]+(?=>)/g;
text.match(re).forEach(function(email) {
console.log(email);
});
Explanation
[^<]+ match anything but a <between one and unlimited times
(?=>) followed by a >
Simple and does not require any group.
Here's a simple example showing how to use regex in JavaScript :
var string = "Francesco Renga <francesco_renga-001#gmail.com>"; // Your string containing
var regex = /<(.*)>/g; // The actual regex
var matches = regex.exec(string);
console.log(matches[1]);
Here's the decomposition of the regex /<(.*)>/ :
/ and / are mandatory to define a regex
< and > simply matches the two < and > in your string
() parenthesis "capture" what you're looking for. Here, they get the mail address inside.
.* : . means "any character", and * means "any number of times. Combined, it means "any character any number of times", and that is inside < and >, which correspond to the place where the mail is.
Here's a simple code showing how extract the unique list of emails address using JavaScript :
let emaillst = string .match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
if (emaillst === null) {
// no Email Address Found
} else {
const uniqueEmail = Array.from(new Set(emaillst));
const finaluniqueEmail = [];
for(let i=0; i<=uniqueEmail.length; i++){
let characterIs = String(uniqueEmail[i]).charAt(String(uniqueEmail[i]).length - 1)
if(characterIs==='.'){
finaluniqueEmail.push(String(uniqueEmail[i].slice(0, -1)))
}else{
finaluniqueEmail.push(uniqueEmail[i]);
}
}
emaillst = finaluniqueEmail.join('\n').toLowerCase();
console.log(matches[1]);
See the Live Demo of email address extractor online
Features
Get Unique Emails
Auto remove duplicate emails
convert upper case email address to lowercase

Extract links in a string and return an array of objects

I receive a string from a server and this string contains text and links (mainly starting with http://, https:// and www., very rarely different but if they are different they don't matter).
Example:
"simple text simple text simple text domain.ext/subdir again text text text youbank.com/transfertomealltheirmoney/witharegex text text text and again text"
I need a JS function that does the following:
- finds all the links (no matter if there are duplicates);
- returns an array of objects, each representing a link, together with keys that return where the link starts in the text and where it ends, something like:
[{link:"http://www.dom.ext/dir",startsAt:25,endsAt:47},
{link:"https://www.dom2.ext/dir/subdir",startsAt:57,endsAt:88},
{link:"www.dom.ext/dir",startsAt:176,endsAt:192}]
Is this possible? How?
EDIT: #Touffy: I tried this but I could not get how long is any string, only the starting index. Moreover, this does not detect www: var str = string with many links (SO does not let me post them)"
var regex =/(\b(https?|ftp|file|www):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig; var result, indices = [];
while ( (result = regex.exec(str)) ) {
indices.push({startsAt:result.index});
}; console.log(indices[0].link);console.log(indices[1].link);
One way to approach this would be with the use of regular expressions. Assuming whatever input, you can do something like
var expression = /(https?:\/\/(?:www\.|(?!www))[^\s\.]+\.[^\s]{2,}|www\.[^\s]+\.[^\s]{2,})/gi;
var matches = input.match(expression);
Then, you can iterate through the matches to discover there starting and ending points with the use of indexOf
for(match in matches)
{
var result = {};
result['link'] = matches[match];
result['startsAt'] = input.indexOf(matches[match]);
result['endsAt'] =
input.indexOf(matches[match]) + matches[match].length;
}
Of course, you may have to tinker with the regular expression itself to suit your specific needs.
You can see the results logged by console in this fiddle
const getLinksPool = (links) => {
//you can replace the https with any links like http or www
const linksplit = links.replace(/https:/g, " https:");
let linksarray = linksplit.split(" ");
let linkspools = linksarray.filter((array) => {
return array !== "";
});
return linkspools;
};

Extract all email addresses from bulk text using jquery

I'm having the this text below:
sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>
Here emails are seprated by , or ;.
I want to extract all emails present above and store them in array. Is there any easy way using regex to get all emails directly?
Here's how you can approach this:
HTML
<p id="emails"></p>
JavaScript
var text = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmal.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com> datum eternus hello+11#gmail.com';
function extractEmails (text)
{
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
}
$("#emails").text(extractEmails(text).join('\n'));
Result
sdabhikagathara#rediffmail.com,dsfassdfhsdfarkal#gmail.com,rfernsdfson#gmal.com,gyfanamosl#gmail.com,pidfpinfg#truformdftechnoproducts.com,nthfsskare#ysahoo.in,akashkatsdfsa#yahsdfsfoo.in,bimsdaalprakash#live.com,dfdmilifsd.ensfdfcogndfdfatia#gmail.com,hello+11#gmail.com
Source: Extract email from bulk text (with Regular Expressions, JavaScript & jQuery)
Demo 1 Here
Demo 2 Here using jQuery's each iterator function
You can use this regex:
var re = /(([^<>()[\]\\.,;:\s#\"]+(\.[^<>()[\]\\.,;:\s#\"]+)*)|(\".+\"))#((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/g;
You can extract the e-mails like this:
('sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>').match(re);
//["sdabhikagathara#rediffmail.com", "dsfassdfhsdfarkal#gmail.com", "rfernsdfson#gmail.com", "gyfanamosl#gmail.com", "pidfpinfg#truformdftechnoproducts.com", "nthfsskare#ysahoo.in", "akashkatsdfsa#yahsdfsfoo.in", "bimsdaalprakash#live.com", "dfdmilifsd.ensfdfcogndfdfatia#gmail.com"]
Just an update to the accepted answer. This does not work for "plus" signs in the email address. GMAIL supports emailaddress+randomtext#gmail.com.
I've updated to:
return text.match(/([a-zA-Z0-9._+-]+#[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
The bellow function is RFC2822 compliant according to Regexr.com
ES5 :
var extract = function(value) {
var reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g;
return value && value.match(reg);
}
ES6 :
const reg = /[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/g
const extract = value => value && value.match(reg)
Regexr community source
function GetEmailsFromString(input) {
var ret = [];
var email = /\"([^\"]+)\"\s+\<([^\>]+)\>/g
var match;
while (match = email.exec(input))
ret.push({'name':match[1], 'email':match[2]})
return ret;
}
var str = '"Name one" <foo#domain.com>, ..., "And so on" <andsoon#gmx.net>'
var emails = GetEmailsFromString(str)
Source
You don't need jQuery for that; JavaScript itself supports regexes built-in.
Have a look at Regular Expression for more info on using regex with JavaScript.
Other than that, I think you'll find the exact answer to your question somewhere else on Stack Overflow - How to find out emails and names out of a string in javascript
const = regex = /\S+[a-z0-9]#[a-z0-9\.]+/img
"hello sean#example.com how are you? do you know bob#example.com?".match(regex)
A bunch of the answer in here are including lower/capital letters [a-zA-Z] AND the insensitive regex flag i, which is nonsense.
i modifier: insensitive. Case insensitive match (ignores case of [a-zA-Z]).
\d matches a digit (equivalent to [0-9])As domain extensions don't end with numeric characters).
As a result, combined with the \d token. we get a much more condenses and elegant sentence.
/[a-z\d._+-]+#[a-z\d._-]+/gi
Demo
let input = 'sdabhikagathara#rediffmail.com, "assdsdf" <dsfassdfhsdfarkal#gmail.com>, "rodnsdfald ferdfnson" <rfernsdfson#gmail.com>, "Affdmdol Gondfgale" <gyfanamosl#gmail.com>, "truform techno" <pidfpinfg#truformdftechnoproducts.com>, "NiTsdfeSh ThIdfsKaRe" <nthfsskare#ysahoo.in>, "akasdfsh kasdfstla" <akashkatsdfsa#yahsdfsfoo.in>, "Bisdsdfamal Prakaasdsh" <bimsdaalprakash#live.com>,; "milisdfsfnd ansdfasdfnsftwar" <dfdmilifsd.ensfdfcogndfdfatia#gmail.com>'
function get_email(string) {
return string.match(/[a-z\d._+-]+#[a-z\d._-]+/gi)
};
$('#output').html(get_email(input).join('; '));
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div id="output"></div>
See it live # https://regex101.com/r/OveC5B/1/

Node.js: Regular Expression to read email from string

How can I take an e-mail address from "XXX <email#email.com>" ? I don't want to get the "< >".
Thanks!
Here's one based on Tejs' answer. Simple to understand and I think a bit more elegant
// Split on < or >
var parts = "XXX <email#email.com>".split(/[<>]/);
var name = parts[0], email = parts[1];
Really simply (no need for regex!)
var components = emailAddress.split('<')
if(components.length > 1)
{
var emailAddress = components[1].replace('>', '');
}
function getEmailsFromString(input) {
var ret = [];
var email = /\"([^\"]+)\"\s+\<([^\>]+)\>/g;
var match;
while ( match = email.exec(input) ) {
ret.push({'name': match[1], 'email': match[2]});
}
return ret;
}
var str = '"Name one" <foo#domain.com>, ..., "And so on" <andsoon#gmx.net>';
var emails = getEmailsFromString(str);
credit: How to find out emails and names out of a string in javascript
^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*#[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$
Matches e-mail addresses, including some of the newer top-level-domain extensions, such as info, museum, name, etc. Also allows for emails tied directly to IP addresses.
This regex will work for your example.
/<([^>]+)/
It searches for anything after the '<' that is not a '>' and that is returned in your matches.
To just grab what's inside the angle brackets, you can use the following:
var pattern = /<(.*)>/;
pattern.exec("XXX <foo#bar.com>"); // this returns ["<foo#bar.com>", "foo#bar.com"]
Not positive if I'm understanding you correctly. If you want to get the email domain ie gmail.com or hotmail.com then you could just use
var x =string.indexOf("#"); var y =string.subString(x)
this will give you the string y as the email domain.

Categories

Resources