Get domain name without subdomains using JavaScript? - javascript

How to get the domain name without subdomains?
e.g. if the url is "http://one.two.roothost.co.uk/page.html" how to get "roothost.co.uk"?

Following is a solution to extract a domain name without any subdomains. This solution doesn't make any assumptions about the URL format, so it should work for any URL. Since some domain names have one suffix (.com), and some have two or more (.co.uk), to get an accurate result in all cases, we need to parse the hostname using the Public Suffix List, which contains a list of all public domain name suffixes.
Solution
First, include the public suffix list js api in a script tag in your HTML, then in JavaScript to get the hostname you can call:
var parsed = psl.parse('one.two.roothost.co.uk');
console.log(parsed.domain);
...which will return "roothost.co.uk". To get the name from the current page, you can use location.hostname instead of a static string:
var parsed = psl.parse(location.hostname);
console.log(parsed.domain);
Finally, if you need to parse a domain name directly out of a full URL string, you can use the following:
var url = "http://one.two.roothost.co.uk/page.html";
url = url.split("/")[2]; // Get the hostname
var parsed = psl.parse(url); // Parse the domain
document.getElementById("output").textContent = parsed.domain;
JSFiddle Example (it includes the entire minified library in the jsFiddle, so scroll down!): https://jsfiddle.net/6aqdbL71/2/

What about this?
function getCanonicalHost(hostname) {
const MAX_TLD_LENGTH = 3;
function isNotTLD(_) { return _.length > MAX_TLD_LENGTH; };
hostname = hostname.split('.');
hostname = hostname.slice(Math.max(0, hostname.findLastIndex(isNotTLD)));
hostname = hostname.join('.');
return hostname;
}
console.log(getCanonicalHost('mail.google.com'));
console.log(getCanonicalHost('some.google.com.ar'));
console.log(getCanonicalHost('some.another.google.com.ar'));
console.log(getCanonicalHost('foo.bar.google.com'));
console.log(getCanonicalHost('foo.bar.google.com.ar'));
console.log(getCanonicalHost('bar.google.ar'));
Its works since https://developer.mozilla.org/en-US/docs/Learn/Common_questions/What_is_a_domain_name say:
TLDs can contain special as well as latin characters. A TLD's maximum length is 63 characters, although most are around 2–3.
In https://data.iana.org/TLD/tlds-alpha-by-domain.txt are 1481 TLD, 466 of this has length around 2–3 and the most used TLD no has more than 3.
If you need a solution that works with all TLDS, here is a more complex aproach:
function getCanonicalHost(hostname) {
return getCanonicalHost.tlds.then(function(tlds) {
function isNotTLD(_) { return tlds.indexOf(_) === -1; };
hostname = hostname.toLowerCase();
hostname = hostname.split('.');
hostname = hostname.slice(Math.max(0, hostname.findLastIndex(isNotTLD)));
hostname = hostname.join('.');
return hostname;
});
}
getCanonicalHost.tlds = new Promise(function(res, rej) {
const TLD_LIST_URL= 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt';
const xhr = new XMLHttpRequest();
xhr.addEventListener('error', rej);
xhr.addEventListener('load', function() {
const MAX_TLD_LENGTH = 63;
var tlds = xhr.responseText.split('\n');
tlds = tlds.map(function(_) { return _.trim().toLowerCase(); });
tlds = tlds.filter(Boolean);
tlds = tlds.filter(function(_) { return _.length < MAX_TLD_LENGTH; });
res(tlds);
});
xhr.open('GET', TLD_LIST_URL);
xhr.send();
})
getCanonicalHost('mail.google.com').then(console.log);
getCanonicalHost('some.google.com.ar').then(console.log);
getCanonicalHost('some.another.google.com.ar').then(console.log);
getCanonicalHost('foo.bar.google.com').then(console.log);
getCanonicalHost('foo.bar.google.com.ar').then(console.log);
getCanonicalHost('bar.google.ar').then(console.log);

You can use parse-domain to do the heavy lifting for you. This package considers the public suffix list and returns an easy to work with object breaking up the domain.
Here is an example from their readme:
npm install parse-domain
import { parseDomain, ParseResultType } from 'parse-domain';
const parseResult = parseDomain(
// should be a string with basic latin characters only. more details in the readme
'www.some.example.co.uk',
);
// check if the domain is listed in the public suffix list
if (parseResult.type === ParseResultType.Listed) {
const { subDomains, domain, topLevelDomains } = parseResult;
console.log(subDomains); // ["www", "some"]
console.log(domain); // "example"
console.log(topLevelDomains); // ["co", "uk"]
} else {
// more about other parseResult types in the readme
}

This works for me:
const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');
const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');
const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";
function removeSubdomain(s) {
const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
s = s.replace(knownSubdomainsRegExp, '');
const parts = s.split('.');
while (parts.length > 3) {
parts.shift();
}
if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
parts.shift();
}
return parts.join('.');
};
var tests = {
'www.sidanmor.com': 'sidanmor.com',
'exemple.com': 'exemple.com',
'argos.co.uk': 'argos.co.uk',
'www.civilwar.museum': 'civilwar.museum',
'www.sub.civilwar.museum': 'civilwar.museum',
'www.xxx.sub.civilwar.museum': 'civilwar.museum',
'www.exemple.com': 'exemple.com',
'main.testsite.com': 'testsite.com',
'www.ex-emple.com.ar': 'ex-emple.com.ar',
'main.test-site.co.uk': 'test-site.co.uk',
'en.tour.mysite.nl': 'tour.mysite.nl',
'www.one.lv': 'one.lv',
'www.onfdsadfsafde.lv': 'onfdsadfsafde.lv',
'aaa.onfdsadfsafde.aa': 'onfdsadfsafde.aa',
};
const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');
const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');
const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";
function removeSubdomain(s) {
const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
s = s.replace(knownSubdomainsRegExp, '');
const parts = s.split('.');
while (parts.length > 3) {
parts.shift();
}
if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
parts.shift();
}
return parts.join('.');
};
for (var test in tests) {
if (tests.hasOwnProperty(test)) {
var t = test;
var e = tests[test];
var r = removeSubdomain(test);
var s = e === r;
if (s) {
console.log('OK: "' + t + '" should be "' + e + '" and it is really "' + r + '"');
} else {
console.log('Fail: "' + t + '" should be "' + e + '" but it is NOT "' + r + '"');
}
}
}
Referance:
psl.min.js file
Maximillian Laumeister Answer to this question
The most popular subdomains on the internet

Simplest solution:
var domain='https://'+window.location.hostname.split('.')[window.location.hostname.split('.').length-2]+'.'+window.location.hostname.split('.')[window.location.hostname.split('.').length-1];
alert(domain);

I created this function which uses URL to parse. It cheats by assuming all hostnames will have either 4 or less parts.
const getDomainWithoutSubdomain = url => {
const urlParts = new URL(url).hostname.split('.')
return urlParts
.slice(0)
.slice(-(urlParts.length === 4 ? 3 : 2))
.join('.')
}
[
'https://www.google.com',
'https://www.google.co.uk',
'https://mail.google.com',
'https://www.bbc.co.uk/news',
'https://github.com',
].forEach(url => {
console.log(getDomainWithoutSubdomain(url))
})

Here is a working JSFiddle
My solution works with the assumption that the root hostname you are looking for is of the type "abc.xyz.pp".
extractDomain() returns the hostname with all the subdomains.
getRootHostName() splits the hostname by . and then based on the assumption mentioned above, it uses the shift() to remove each subdomain name.
Finally, whatever remains in parts[], it joins them by . to form the root hostname.
Javascript
var urlInput = "http://one.two.roothost.co.uk/page.html";
function extractDomain(url) {
var domain;
//find & remove protocol (http, ftp, etc.) and get domain
if (url.indexOf("://") > -1) {
domain = url.split('/')[2];
} else {
domain = url.split('/')[0];
}
//find & remove port number
domain = domain.split(':')[0];
return domain;
}
function getRootHostName(url) {
var parts = extractDomain(url).split('.');
var partsLength = parts.length - 3;
//parts.length-3 assuming root hostname is of type abc.xyz.pp
for (i = 0; i < partsLength; i++) {
parts.shift(); //remove sub-domains one by one
}
var rootDomain = parts.join('.');
return rootDomain;
}
document.getElementById("result").innerHTML = getRootHostName(urlInput);
HTML
<div id="result"></div>
EDIT 1: Updated the JSFiddle link. It was reflecting the incorrect code.

What about...
function getDomain(){
if(document.domain.length){
var parts = document.domain.replace(/^(www\.)/,"").split('.');
//is there a subdomain?
while(parts.length > 2){
//removing it from our array
var subdomain = parts.shift();
}
//getting the remaining 2 elements
var domain = parts.join('.');
return domain.replace(/(^\.*)|(\.*$)/g, "");
}
return '';
}

My solution worked for me: Get "gocustom.com" from "shop.gocustom.com"
var site_domain_name = 'shop.gocustom.com';
alert(site_domain_name);
var strsArray = site_domain_name.split('.');
var strsArrayLen = strsArray.length;
alert(strsArray[eval(strsArrayLen - 2)]+'.'+strsArray[eval(strsArrayLen - 1)])

You can try this in JavaScript:
alert(window.location.hostname);
It will return the hostname.

Related

Getting domain without subdomain from an url with javascript [duplicate]

How to get the domain name without subdomains?
e.g. if the url is "http://one.two.roothost.co.uk/page.html" how to get "roothost.co.uk"?
Following is a solution to extract a domain name without any subdomains. This solution doesn't make any assumptions about the URL format, so it should work for any URL. Since some domain names have one suffix (.com), and some have two or more (.co.uk), to get an accurate result in all cases, we need to parse the hostname using the Public Suffix List, which contains a list of all public domain name suffixes.
Solution
First, include the public suffix list js api in a script tag in your HTML, then in JavaScript to get the hostname you can call:
var parsed = psl.parse('one.two.roothost.co.uk');
console.log(parsed.domain);
...which will return "roothost.co.uk". To get the name from the current page, you can use location.hostname instead of a static string:
var parsed = psl.parse(location.hostname);
console.log(parsed.domain);
Finally, if you need to parse a domain name directly out of a full URL string, you can use the following:
var url = "http://one.two.roothost.co.uk/page.html";
url = url.split("/")[2]; // Get the hostname
var parsed = psl.parse(url); // Parse the domain
document.getElementById("output").textContent = parsed.domain;
JSFiddle Example (it includes the entire minified library in the jsFiddle, so scroll down!): https://jsfiddle.net/6aqdbL71/2/
What about this?
function getCanonicalHost(hostname) {
const MAX_TLD_LENGTH = 3;
function isNotTLD(_) { return _.length > MAX_TLD_LENGTH; };
hostname = hostname.split('.');
hostname = hostname.slice(Math.max(0, hostname.findLastIndex(isNotTLD)));
hostname = hostname.join('.');
return hostname;
}
console.log(getCanonicalHost('mail.google.com'));
console.log(getCanonicalHost('some.google.com.ar'));
console.log(getCanonicalHost('some.another.google.com.ar'));
console.log(getCanonicalHost('foo.bar.google.com'));
console.log(getCanonicalHost('foo.bar.google.com.ar'));
console.log(getCanonicalHost('bar.google.ar'));
Its works since https://developer.mozilla.org/en-US/docs/Learn/Common_questions/What_is_a_domain_name say:
TLDs can contain special as well as latin characters. A TLD's maximum length is 63 characters, although most are around 2–3.
In https://data.iana.org/TLD/tlds-alpha-by-domain.txt are 1481 TLD, 466 of this has length around 2–3 and the most used TLD no has more than 3.
If you need a solution that works with all TLDS, here is a more complex aproach:
function getCanonicalHost(hostname) {
return getCanonicalHost.tlds.then(function(tlds) {
function isNotTLD(_) { return tlds.indexOf(_) === -1; };
hostname = hostname.toLowerCase();
hostname = hostname.split('.');
hostname = hostname.slice(Math.max(0, hostname.findLastIndex(isNotTLD)));
hostname = hostname.join('.');
return hostname;
});
}
getCanonicalHost.tlds = new Promise(function(res, rej) {
const TLD_LIST_URL= 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt';
const xhr = new XMLHttpRequest();
xhr.addEventListener('error', rej);
xhr.addEventListener('load', function() {
const MAX_TLD_LENGTH = 63;
var tlds = xhr.responseText.split('\n');
tlds = tlds.map(function(_) { return _.trim().toLowerCase(); });
tlds = tlds.filter(Boolean);
tlds = tlds.filter(function(_) { return _.length < MAX_TLD_LENGTH; });
res(tlds);
});
xhr.open('GET', TLD_LIST_URL);
xhr.send();
})
getCanonicalHost('mail.google.com').then(console.log);
getCanonicalHost('some.google.com.ar').then(console.log);
getCanonicalHost('some.another.google.com.ar').then(console.log);
getCanonicalHost('foo.bar.google.com').then(console.log);
getCanonicalHost('foo.bar.google.com.ar').then(console.log);
getCanonicalHost('bar.google.ar').then(console.log);
You can use parse-domain to do the heavy lifting for you. This package considers the public suffix list and returns an easy to work with object breaking up the domain.
Here is an example from their readme:
npm install parse-domain
import { parseDomain, ParseResultType } from 'parse-domain';
const parseResult = parseDomain(
// should be a string with basic latin characters only. more details in the readme
'www.some.example.co.uk',
);
// check if the domain is listed in the public suffix list
if (parseResult.type === ParseResultType.Listed) {
const { subDomains, domain, topLevelDomains } = parseResult;
console.log(subDomains); // ["www", "some"]
console.log(domain); // "example"
console.log(topLevelDomains); // ["co", "uk"]
} else {
// more about other parseResult types in the readme
}
This works for me:
const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');
const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');
const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";
function removeSubdomain(s) {
const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
s = s.replace(knownSubdomainsRegExp, '');
const parts = s.split('.');
while (parts.length > 3) {
parts.shift();
}
if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
parts.shift();
}
return parts.join('.');
};
var tests = {
'www.sidanmor.com': 'sidanmor.com',
'exemple.com': 'exemple.com',
'argos.co.uk': 'argos.co.uk',
'www.civilwar.museum': 'civilwar.museum',
'www.sub.civilwar.museum': 'civilwar.museum',
'www.xxx.sub.civilwar.museum': 'civilwar.museum',
'www.exemple.com': 'exemple.com',
'main.testsite.com': 'testsite.com',
'www.ex-emple.com.ar': 'ex-emple.com.ar',
'main.test-site.co.uk': 'test-site.co.uk',
'en.tour.mysite.nl': 'tour.mysite.nl',
'www.one.lv': 'one.lv',
'www.onfdsadfsafde.lv': 'onfdsadfsafde.lv',
'aaa.onfdsadfsafde.aa': 'onfdsadfsafde.aa',
};
const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');
const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');
const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";
function removeSubdomain(s) {
const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
s = s.replace(knownSubdomainsRegExp, '');
const parts = s.split('.');
while (parts.length > 3) {
parts.shift();
}
if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
parts.shift();
}
return parts.join('.');
};
for (var test in tests) {
if (tests.hasOwnProperty(test)) {
var t = test;
var e = tests[test];
var r = removeSubdomain(test);
var s = e === r;
if (s) {
console.log('OK: "' + t + '" should be "' + e + '" and it is really "' + r + '"');
} else {
console.log('Fail: "' + t + '" should be "' + e + '" but it is NOT "' + r + '"');
}
}
}
Referance:
psl.min.js file
Maximillian Laumeister Answer to this question
The most popular subdomains on the internet
Simplest solution:
var domain='https://'+window.location.hostname.split('.')[window.location.hostname.split('.').length-2]+'.'+window.location.hostname.split('.')[window.location.hostname.split('.').length-1];
alert(domain);
I created this function which uses URL to parse. It cheats by assuming all hostnames will have either 4 or less parts.
const getDomainWithoutSubdomain = url => {
const urlParts = new URL(url).hostname.split('.')
return urlParts
.slice(0)
.slice(-(urlParts.length === 4 ? 3 : 2))
.join('.')
}
[
'https://www.google.com',
'https://www.google.co.uk',
'https://mail.google.com',
'https://www.bbc.co.uk/news',
'https://github.com',
].forEach(url => {
console.log(getDomainWithoutSubdomain(url))
})
Here is a working JSFiddle
My solution works with the assumption that the root hostname you are looking for is of the type "abc.xyz.pp".
extractDomain() returns the hostname with all the subdomains.
getRootHostName() splits the hostname by . and then based on the assumption mentioned above, it uses the shift() to remove each subdomain name.
Finally, whatever remains in parts[], it joins them by . to form the root hostname.
Javascript
var urlInput = "http://one.two.roothost.co.uk/page.html";
function extractDomain(url) {
var domain;
//find & remove protocol (http, ftp, etc.) and get domain
if (url.indexOf("://") > -1) {
domain = url.split('/')[2];
} else {
domain = url.split('/')[0];
}
//find & remove port number
domain = domain.split(':')[0];
return domain;
}
function getRootHostName(url) {
var parts = extractDomain(url).split('.');
var partsLength = parts.length - 3;
//parts.length-3 assuming root hostname is of type abc.xyz.pp
for (i = 0; i < partsLength; i++) {
parts.shift(); //remove sub-domains one by one
}
var rootDomain = parts.join('.');
return rootDomain;
}
document.getElementById("result").innerHTML = getRootHostName(urlInput);
HTML
<div id="result"></div>
EDIT 1: Updated the JSFiddle link. It was reflecting the incorrect code.
What about...
function getDomain(){
if(document.domain.length){
var parts = document.domain.replace(/^(www\.)/,"").split('.');
//is there a subdomain?
while(parts.length > 2){
//removing it from our array
var subdomain = parts.shift();
}
//getting the remaining 2 elements
var domain = parts.join('.');
return domain.replace(/(^\.*)|(\.*$)/g, "");
}
return '';
}
My solution worked for me: Get "gocustom.com" from "shop.gocustom.com"
var site_domain_name = 'shop.gocustom.com';
alert(site_domain_name);
var strsArray = site_domain_name.split('.');
var strsArrayLen = strsArray.length;
alert(strsArray[eval(strsArrayLen - 2)]+'.'+strsArray[eval(strsArrayLen - 1)])
You can try this in JavaScript:
alert(window.location.hostname);
It will return the hostname.

Adding a Parameter to Url with javascript

I am trying to add the parameter "referer=" to my url corresponding to the trafic referer of a new session.
I used some of the code from this topic... but it keeps reloading the page in a loop... then the url is like :
https://example.com?refere=facebookreferer=facebookreferer=facebook
Note:
I have been using this solution 1 :
function addOrUpdateUrlParam(name, value)
{
var ref = document.referrer;
var refsplit = ref.split(".")[1];
var href = window.location.href;
var regex = new RegExp("[&\\?]" + name + "=");
if(regex.test(href))
{
regex = new RegExp("([&\\?])" + name + "=\\d+");
{
else
{
if(href.indexOf("?") > -1)
window.location.href = href + "&" + name + "=" + value;
else
window.location.href = href + "?" + name + "=" + value;
}
if (refsplit != "example") {
return addOrUpdateUrlParam("referer", refsplit);
}
}
And this solution 2:
function () {
var ref = document.referrer;
var refsplit = ref.split(".")[1];
if (refsplit != "example") {
return location.search += "referer=" + refsplit;
}
}
Edit 1:
Thanks to Prasanth I improved the code to :
function () {
var ref = document.referrer;
var refsplit = ref.split(".")[1];
var currentUrl = location.href;
var url1 = currentUrl += "?referer="+refsplit;
var url2 = currentUrl += "&referer="+refsplit;
if(currentUrl.indexOf("?") < 0) {
return window.location = url1;
} else {
return window.location = url2;
}
}
However, it is returning both conditions :
https://example.com/?referer=facebook&referer=facebook
Edit 2:
So after many attempts, I achieved it by working with the parameters of the url (location.search) instead of the full url (location.href) :
function addRefererParam () {
var ref = document.referrer; //Get Referrer
var refDomain = ref.match(/[^(?:http:\/\/|www\.|https:\/\/)]([^\/]+)/i)[0]; //Extract Referrer Domain name for better readability
var params = location.search; //Get Url parameters
if (refDomain.match(/mydomain|null|undefined/i)) { //check if domain not null or own domain.
return params ;
} else {
return params += "utm_source=" + refDomain; //create new query string with referrer domain
}
}
However, it is no making a persistent query string through browsing... how can I make the new parameters persistent ?
Obtain the url of the current window and after the domain name just concat your url with &referer=value.
var currentUrl = location.href;
var paramsInUrl = currentUrl.split('&');
var flag = true;
for(var i in paramsInUrl)
{
if(!paramsInUrl[i].includes('referer=')
{
continue;
}
else
{
flag = false;
break;
}
}
if(flag)
{
currentUrl += '&referer='+value;
window.location = currentUrl;
}
For what it's worth (because the more generic question of just how to do this generally is what lead me to this post), I've made a 178 byte helper function that takes in an object of the query parameters you want to add to a url for a GET request (in similar format for how you might add headers to a request) and made an npm package for it here: https://www.npmjs.com/package/add-query-params-to-url
Hopefully this is helpful to some.

Javascript, create Page title with URL Query string

I am going to create the title of the page according to its URL query String
The URL sample is:
domain.com/pricelist/phones/?min_price=0&max_price=50000
If max_price = 50000, My page title will be: Phones Under Rs.50000
If URL contains only brand like:
domain.com/pricelist/phones/?brand=apple
Page title will be: Apple phones Price list 2018
And if URL contains both price and brand like:
domain.com/pricelist/phones/?min_price=0&max_price=50000&brand=apple
Page title: Apple phones under Rs.50000
here is my code:-
<script>
function getUrlVars() {
var vars = {};
var parts = window.location.href.replace(/[?&]+([^=&]+)=([^&]*)/gi, function(m, key, value) {
vars[key] = value;
});
return vars;
}
var path = window.location.pathname;
var pathName = path.substring(0, path.lastIndexOf('/') + 1);
console.log(path);
console.log(pathName);
pathName = pathName.replace(/\//g, "")
pathName = pathName.replace(/pricelist/g, "")
pathName = pathName.replace(/\b\w/g, l => l.toUpperCase())
var number = getUrlVars()["max_price"];
var brand = getUrlVars()["brand"];
brand = brand.replace(/\b\w/g, l => l.toUpperCase())
if (window.location.href.indexOf("min_price") != null) {document.title = pathName + ' Under Rs. ' + number;}
if (window.location.href.indexOf("pa_brand") > -1) {document.title = brand + ' ' + pathName + ' Price List India';}
if (window.location.href.indexOf("min_price") > -1 && window.location.href.indexOf("brand") > -1) {document.title = brand + ' ' + pathName + ' Under Rs.' + number;}
</script>
First off (in my opinion) I would try to stay away from regular expressions if I could. If you have not heard of URL Search Params, you should read up on it. It makes dealing with the query string very simple.
I also changed the capitalization to not use regular expressions too (source is this answer)
Now for the if statement, which seems like you had trouble with, try to break it down step by step.
First I see if maxPrice is not null, if its not null then great we have our first title: Phones Under Rs.${maxPrice}
Next I check if brand is not null (inside the maxPrice if) this way if brand is not null we can safely assume maxPrice is also not null, so we can change our message to ${brand} Phones Under Rs.${maxPrice}
Now since the only case where we have 2 variables in the message is done we can bounce back out of the first if and continue down to an else if. Now I check if brand is not null. If brand is not null then at this point we can assume maxPrice is also null (otherwise the code would've gone into the first if) so this gives us our final message ${brand} Phones
Now finally an else case just in case we missed something down the road, we can log it and fix the code easily.
Quick note if you are unfamiliar with the backticks in the strings they are Template Literals
var theURL = "https://www.example.com?min_price=0&max_price=5000&brand=apple";
var parsedURL = new URL(theURL);
// you should use window.location.search instead
var searchParams = new URLSearchParams(parsedURL.search);
var maxPrice = searchParams.get("max_price");
var minPrice = searchParams.get("min_price");
var brand = searchParams.get("brand");
// capitalize brand if it is in the query
if (brand !== null) {
brand = brand.toLowerCase()
.split(' ')
.map((s) => s.charAt(0).toUpperCase() + s.substring(1))
.join(' ');
}
// create the title based off which query parameters come through
var title;
if (maxPrice !== null) {
title = `Phones Under Rs.${maxPrice}`
if (brand !== null) {
title = `${brand} Phones Under Rs.${maxPrice}`
}
}
else if (brand !== null) {
title = `${brand} Phones Price list 2018`
}
else {
console.log(`other case came through: brand: ${brand} max_price: ${maxPrice} min_price: ${minPrice}`)
}
console.log(title);
In my opinion, you're having a hard time handling if/else statements, because of overall complexity you've brought to your script. Try to make it simpler and constructing conditions will become a breeze.
I have not tested it, but check out this solution:
function setTitle () {
const search = window.location.search
.substring(1)
.split('&')
.map(pair => pair.split('='))
.reduce((acc, pair) => {
acc[pair[0]] = pair[1];
return acc;
}, {});
const brandPart = search.brand ? `${search.brand} phones` : 'Phones';
const maxPricePart = search.max_price ? `under Rs.${search.max_price}` : '';
const pricePart = maxPricePart || 'Price list 2018';
document.title = `${brandPart} ${pricePart}`;
}
Maybe it has some problems, but it is much easier to understand and maintain.
Your code looks good and I think you can improve OR
as an alternate solution, you can first create a JSON format of all query parameters.
And based on JSON you can easily create the brand title.
https://gomakethings.com/getting-all-query-string-values-from-a-url-with-vanilla-js/
//get query paraeters in json format
var getParams = function (url) {
var params = {};
var parser = document.createElement('a');
parser.href = url;
var query = parser.search.substring(1);
var vars = query.split('&');
for (var i = 0; i < vars.length; i++) {
var pair = vars[i].split('=');
params[pair[0]] = decodeURIComponent(pair[1]);
}
return params;
};
//get quer pareamaeter
var query_parameters = getParams(window.location.href);
var brand_name = '';
if ( query_parameters.max_price ) brand_name ="Phones Under Rs." + query_parameters.max_price;
if ( query_parameters.brand ) brand_name = query_parameters.brand.toUpperCase() + " phones Price list 2018"
if ( query_parameters.max_price && query_parameters.brand ) brand_name =query_parameters.brand.toUpperCase() + "phones Under Rs." + query_parameters.max_price;

Removing final part of URL in Javascript/Jquery

I have some URL's that all follow the same structure.
https://www.website.com/services/county/town/servicename/brand/
When the search has zero results we display a button that when clicked runs a function to remove the final section of the URL and thus expand the search.
For example if the above URL returned 0 results then clicking our button would load https://www.website.com/services/county/town/servicename/ having removed brand from the search criteria and expanding the chance of results.
The code I currently have for this works but seems like a bit of a hack.
function expandSearch() {
var currentURL = window.location.href;
var parts = currentURL.split("/");
var lastPart;
if ( parts.length === 9 ) {
lastPart = currentURL.substr(currentURL.lastIndexOf('/') - parts[7].length) + '$';
window.location.href = currentURL.replace( new RegExp(lastPart), "");
} else if ( parts.length === 8 ) {
lastPart = currentURL.substr(currentURL.lastIndexOf('/') - parts[6].length) + '$';
window.location.href = currentURL.replace( new RegExp(lastPart), "");
} else if ( parts.length === 7 ) {
lastPart = currentURL.substr(currentURL.lastIndexOf('/') - parts[5].length) + '$';
window.location.href = currentURL.replace( new RegExp(lastPart), "");
} else if ( parts.length === 6 ) {
lastPart = currentURL.substr(currentURL.lastIndexOf('/') - parts[4].length) + '$';
window.location.href = currentURL.replace( new RegExp(lastPart), "");
}
}
The search can return 0 results at any point down to https://www.website.com/services/ at which point the whole database is returned.
The URL can also have missing elements for example it might have a county but no town.
Is there a better/cleaner way of removing the final URL element and redirecting the browser to this new broader search?
The final working version I ended up with thanks to #ebilgin for anyone looking:
function expandSearch() {
var parts = window.location.pathname.substr(1).split("/");
parts = parts.filter(Boolean); // Remove trailing empty array object
parts.pop(); // Remove last array object
window.location.href = "/" + parts.join("/") + "/"; // Go to new Location
}
You can use .pop() and .join() functions for your problem.
function expandSearch() {
var parts = window.location.pathname.substr(1);
var lastCharIsSlash = false;
if ( parts.charAt( parts.length - 1 ) == "/" ) {
lastCharIsSlash = true;
parts = parts.slice(0, -1);
}
parts = parts.split("/");
parts.pop();
parts = "/" + parts.join("/") + (lastCharIsSlash ? "/" : "");
window.location.href = parts;
}
If your every URIs has a trailing slash. This is much more clearer version of it.
function expandSearch() {
var parts = window.location.pathname.slice(1, -1).split("/");
parts.pop();
window.location.href = "/" + parts.join("/") + "/";
}

javascript parser for a string which contains .ini data

If a string contains a .ini file data , How can I parse it in JavaScript ?
Is there any JavaScript parser which will help in this regard?
here , typically string contains the content after reading a configuration file. (reading cannot be done through javascript , but somehow I gather .ini info in a string.)
I wrote a javascript function inspirated by node-iniparser.js
function parseINIString(data){
var regex = {
section: /^\s*\[\s*([^\]]*)\s*\]\s*$/,
param: /^\s*([^=]+?)\s*=\s*(.*?)\s*$/,
comment: /^\s*;.*$/
};
var value = {};
var lines = data.split(/[\r\n]+/);
var section = null;
lines.forEach(function(line){
if(regex.comment.test(line)){
return;
}else if(regex.param.test(line)){
var match = line.match(regex.param);
if(section){
value[section][match[1]] = match[2];
}else{
value[match[1]] = match[2];
}
}else if(regex.section.test(line)){
var match = line.match(regex.section);
value[match[1]] = {};
section = match[1];
}else if(line.length == 0 && section){
section = null;
};
});
return value;
}
2017-05-10 updated: fix bug of keys contains spaces.
EDIT:
Sample of ini file read and parse
You could try the config-ini-parser, it's similar to python ConfigParser without I/O operations
It could be installed by npm or bower. Here is an example:
var ConfigIniParser = require("config-ini-parser").ConfigIniParser;
var delimiter = "\r\n"; //or "\n" for *nux
parser = new ConfigIniParser(delimiter); //If don't assign the parameter delimiter then the default value \n will be used
parser.parse(iniContent);
var value = parser.get("section", "option");
parser.stringify('\n'); //get all the ini file content as a string
For more detail you could check the project main page or from the npm package page
Here's a function who's able to parse ini data from a string to an object! (on client side)
function parseINIString(data){
var regex = {
section: /^\s*\[\s*([^\]]*)\s*\]\s*$/,
param: /^\s*([\w\.\-\_]+)\s*=\s*(.*?)\s*$/,
comment: /^\s*;.*$/
};
var value = {};
var lines = data.split(/\r\n|\r|\n/);
var section = null;
for(x=0;x<lines.length;x++)
{
if(regex.comment.test(lines[x])){
return;
}else if(regex.param.test(lines[x])){
var match = lines[x].match(regex.param);
if(section){
value[section][match[1]] = match[2];
}else{
value[match[1]] = match[2];
}
}else if(regex.section.test(lines[x])){
var match = lines[x].match(regex.section);
value[match[1]] = {};
section = match[1];
}else if(lines.length == 0 && section){//changed line to lines to fix bug.
section = null;
};
}
return value;
}
Based on the other responses i've modified it so you can have nested sections :)
function parseINI(data: string) {
let rgx = {
section: /^\s*\[\s*([^\]]*)\s*\]\s*$/,
param: /^\s*([^=]+?)\s*=\s*(.*?)\s*$/,
comment: /^\s*;.*$/
};
let result = {};
let lines = data.split(/[\r\n]+/);
let section = result;
lines.forEach(function (line) {
//comments
if (rgx.comment.test(line)) return;
//params
if (rgx.param.test(line)) {
let match = line.match(rgx.param);
section[match[1]] = match[2];
return;
}
//sections
if (rgx.section.test(line)) {
section = result
let match = line.match(rgx.section);
for (let subSection of match[1].split(".")) {
!section[subSection] && (section[subSection] = {});
section = section[subSection];
}
return;
}
});
return result;
}

Categories

Resources