Extract domain name suffix from any url - javascript

I have a url in string format like this :
str="http://code.google.com"
and some other like str="http://sub.google.co.in"
i want to extract google.com from first one, and google.co.in from second string .
what i did is :
var a, d, i, ind, j, till, total;
a = document.createElement('a');
a.href = "http://www.wv.sdf.sdf.sd.ds..google.co.in";
d = "";
if (a.host.substr(0, 4) === "www.") {
d = a.host.replace("www.", "");
} else {
d = a.host;
}
till = d.indexOf(".com");
total = 0;
for (i in d) {
if (i === till) {
break;
}
if (d[i] === ".") {
total++;
}
}
j = 1;
while (j < total) {
ind = d.indexOf(".");
d = d.substr(ind + 1, d.length);
j++;
}
alert(d);
My code works but it works only for ".com" , it doesnt work for others like ".co.in","co.uk" till i specify them manually , Can anyone tell me the solution for this ? I dont mind even i need to change the full code, but it should work . Thanks

The only current practical solution (and even that doesn't work 100%) is to refer to the Public Suffix List in your code, and synchronise with that list as required.
There is no algorithm that can look at a domain name and figure out which part is the "registered domain name" and which parts are subdomains. It can't even be done by interrogating the DNS itself.

Regular expressions are quite powerful for such problems.
https://regex101.com/r/rW4rD8/1
The code below should fit this purpose.
var getSuffixOnly = function (url) {
var normalized = url.toLowerCase();
var noProtocol = normalized.replace(/.*?:\/\//g, "");
var splittedURL = noProtocol.split(/\/|\?+/g);
if (splittedURL.length > 1){
noProtocol = splittedURL[0].toString().replace(/[&\/\\#,+()$~%'":*?<>{}£€^ ]/g, '');
}
var regex = /([^.]{2,}|[^.]{2,3}\.[^.]{2})$/g;
var host = noProtocol.match(regex);
return host.toString();
};
getSuffixOnly(window.location.host);

Related

Get the next key from array from string with symbols

I'm working on a simple but difficult problem for me right now, I'm use to work in jQuery but need this to be done in Javascript.
So simple as it is, the user inputs a string lets say:
"hey, wanna hang today?". It should output the next character in my array, so it would be like this: "ifz, xboob iboh upebz?".
And I have tried everything I can come up with. Hopefully some of you guys see the problem right away.
I have set up a short jsFiddle that shows similar to what I got.
function gen() {
var str = document.getElementById('str').value,
output = document.getElementById('output');
var alph = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','æ','ø','å','a'];
for (var i=0;i<str.length;i++) {
var index = str[i].charAt(0),
e = alph.indexOf(index);
console.log(alph[e + 1]);
output.innerHTML += alph[e + 1];
}
}
If you only want to skip to next letter with those chars and leave the others like space and ? as they are:
var index = str[i].charAt(0),
e = alph.indexOf(index);
if(e == -1){
output.innerHTML += index;
}else{
output.innerHTML += alph[e + 1];
}
Update: using #David Thomas method, you could do the following: (wouldnt work for 'å' though)
var index= str[i].toLowerCase().charCodeAt(0);
if((index > 96 && index < 123)){ // a to z
output.innerHTML += String.fromCharCode(str[i].charCodeAt(0)+1);
}else{
output.innerHTML += str[i];
}
}
I'd personally recommend the following approach, which should work with any alphabet for which there's a Unicode representation and, somewhat importantly, doesn't require a hard-coded array of letters/punctuation for each language:
function gen() {
var str = document.getElementById('str').value,
strTo = '',
output = document.getElementById('output');
for (var i = 0; i < str.length; i++) {
strTo += String.fromCharCode(str[i].charCodeAt(0) + 1);
}
output.textContent = strTo;
}
// hey, wanna hang today? -> ifz-!xboob!iboh!upebz#
JS Fiddle demo.
References:
String.prototype.charCodeAt().
String.prototype.fromCharCode().
Why does gen(',') === 'a'?
var alph = 'abcdefghijklmnopqrstuvwxyz';
var e = alph.indexOf(',');
console.log(e);
// -1
console.log(alph[e + 1]);
// 'a'
You need to take this case into account; otherwise, any characters that aren't in alph will map to 'a'.
(I see that you've also duplicated 'a' at the start and end of alph. This works, though it's more common either to use the modulus operator % or to check explicitly if e === alph.length - 1.)
You just have to add an array with the non respected characters:
var ex = ['?','!',' ','%','$','&','/']
In whole
for (var i=0;i<str.length;i++) {
var index = str[i].charAt(0)
if (alph.indexOf(index) >-1) {
var e = alph.indexOf(index);
output.innerHTML += alph[e + 1];
} else {
var e = index;
output.innerHTML += e;
}
}
JSFIDDLE: http://jsfiddle.net/TRNCFRMCN/hs15f0kd/8/.

Regex: ccTLD wildcard detection/removal in Javascript?

I have an url like this one:
http://xenodesystems.blogspot.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html
But the ".com" over there can change depending on the country, so that url also works with:
http://xenodesystems.blogspot.mx/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html
http://xenodesystems.blogspot.it/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html
http://xenodesystems.blogspot.fr/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html
etc.
What I need to do is replacing "xenodesystems.blogspot.*" with "blog.xenodesystems.com" and leave the rest of the URL intact, like this:
http://blog.xenodesystems.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html
Is this possible to do with javascript? I know blogger can redirect a domain, but I need to do it in pure Javascript explicitly. I know it's possible, it's just matter of finding the right regex, right?
Try this (JSFIDDLE). No regular expressions so it's much more efficient:
var str="http://xenodesystems.blogspot.mx/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html";
function xIndexOf(Val, Str, x)
{
if (x <= (Str.split(Val).length - 1)) {
Ot = Str.indexOf(Val);
if (x > 1) { for (var i = 1; i < x; i++) { var Ot = Str.indexOf(Val, Ot + 1) } }
return Ot;
}
}
var slash = (xIndexOf('/',str,3));
var dot = (xIndexOf('.',str,2));
str = str.substring(0,dot)+".com"+str.substring(slash)
alert(str)
xIndexOf function taken from here.
I think this is what you mean:
var urls = [
'http://xenodesystems.blogspot.mx/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html',
'http://xenodesystems.blogspot.it/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html',
'http://xenodesystems.blogspot.fr/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html',
'http://xenodesystems.blogspot.com.au/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html',
];
var ret = new Array;
var len = urls.length;
for (var i = 0; i < len; ++i) {
ret.push(urls[i].replace(/xenodesystems.blogspot(?:\.[a-zA-Z]+)+/,'xenodesystems.blogspot.com'));
}
console.log(ret);
OUTPUT
["http://xenodesystems.blogspot.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html",
"http://xenodesystems.blogspot.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html",
"http://xenodesystems.blogspot.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html",
"http://xenodesystems.blogspot.com/2013/07/actualizarmigrar-ruby-20-y-rails-4-sin.html"]
Here's a fiddle

Javascript: String search for regex, starting at the end of the string

Is there a javascript string function that search a regex and it will start the search at the end?
If not, what is the fastest and/or cleanest way to search the index of a regex starting from the end?
example of regex:
/<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi
Maybe this can be useful and easier:
str.lastIndexOf(str.match(<your_regex_here>).pop());
Perhaps something like this is suitable for you?
Javascript
function lastIndexOfRx(string, regex) {
var match = string.match(regex);
return match ? string.lastIndexOf(match.slice(-1)) : -1;
}
var rx = /<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi;
console.log(lastIndexOfRx("", rx));
console.log(lastIndexOfRx("<i>it</i><b>bo</b>", rx));
jsFiddle
And just for interest, this function vs the function that you choose to go with. jsperf
This requires that you format your regex correctly for matching exactly the pattern you want and globally (like given in your question), for example /.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i will not work with this function. But what you do get is a function that is clean and fast.
You may create a reverse function like:
function reverse (s) {
var o = '';
for (var i = s.length - 1; i >= 0; i--)
o += s[i];
return o;
}
and then use
var yourString = reverse("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);
Another idea: if you want to search by word in reverse order then
function reverseWord(s) {
var o = '';
var split = s.split(' ');
for (var i = split.length - 1; i >= 0; i--)
o += split[i] + ' ';
return o;
}
var yourString = reverseWord("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);
Andreas gave this from the comment:
https://stackoverflow.com/a/274094/402037
String.prototype.regexLastIndexOf = function(regex, startpos) {
regex = (regex.global) ? regex : new RegExp(regex.source, "g" + (regex.ignoreCase ? "i" : "") + (regex.multiLine ? "m" : ""));
if(typeof (startpos) == "undefined") {
startpos = this.length;
} else if(startpos < 0) {
startpos = 0;
}
var stringToWorkWith = this.substring(0, startpos + 1);
var lastIndexOf = -1;
var nextStop = 0;
while((result = regex.exec(stringToWorkWith)) != null) {
lastIndexOf = result.index;
regex.lastIndex = ++nextStop;
}
return lastIndexOf;
}
Which gives the functionality that I need, I tested my regex, and it is successful. So I'll use this
It depends what you exactly want to search for. You can use string.lastIndexOf or inside the regexp to use $ (end of the string).
Update:
try the regexp
/<\/?([a-z][a-z0-9]*)\b[^>]*>?[\w\W]*$/gi
var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = text.lastIndexOf(textFound);
}
Use .* to skip as much text as posible, capture the text found and search it with lastIndexOf
EDIT:
Well, if text is found, no need to search with lastIndexOf. m[0] contains the full coincidence (including all the initial padding), and m[1] the searched text. So position of found text is m[0].length - m[1].length
var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = m[0].length - m[1].length;
}
Assuming you're looking for a string 'token', then you need the position of 'token' that has no other 'token' following until the end of the string.
So you should compose your regex something like that:
$token = 'token';
$re = "/(?:$token)[^(?:$token)]*$/";
This will find your 'token' where no further 'token' can be found until string end. The "(?:" grouping simply makes the group non-storing, slightly speeding up performance and saving memory.

I need to get the last two value after dot using javascript split

I am getting the results like demo.in,demo.co.in,demo.tv,demo.org.in
I need to split the extension separately using JavaScript split function
var vsp = i.split(".");
this is my code I will get the result as
demo,in demo,co,in
but I need to get the extension separately
Working fiddle(demo version)
var values = [
"demo.in",
"demo.co.in",
"demo.tv","demo.org"
];
var results = [];
// iterate through the values
for (var i = 0, len = values.length; i < len; i++) {
// Split the parts on every dot.
var parts = values[i].split(".");
// Remove the first part (before the first dot).
parts = parts.slice(1, parts.length);
// Join the results together
results.push(parts.join("."));
};
console.dir(results); // all done
// Nicely display the values for the OP:
for (var i = 0, len = results.length; i < len; i++) {
document.body.innerHTML += (i + 1) + ": " + results[i] + "<br />";
};
I have no idea what you want, so here's some functions to cover the likely cases:
var s = 'demo.co.in'
// Return everything before the first '.'
function getExtension(s) {
var m = s.match(/^[^.]+/);
return m? m[0] : '';
}
alert(getExtension(s)); // demo
// Return everything after the last '.'
function getExtension2(s) {
var m = s.match(/[^.]+$/);
return m? m[0] : '';
}
alert(getExtension2(s)); // in
// Return everything after the first '.'
function getExtension3(s) {
return s.replace(/^[^.]+\./, '');
}
alert(getExtension3(s)); // co.in
I could not understand exactly .. "the extension" . You can try like below code
var urls = "demo,demo.in,my.demo.co.in,demo.tv,demo.org.in"
.split(',');
var splited = urls.reduce( function( o, n ){
var parts = n.split( '.' );
o[ n ] = (function(){
var ext = [];
while( !(parts.length == 1 || ext.length == 2) ) {
ext.unshift( parts.pop() );
};
return ext.join('.');
}());
return o;
}, {} );
console.log( JSON.stringify( splited ) );
which prints
{
"demo":"",
"demo.in":"in",
"my.demo.co.in":"co.in",
"demo.tv":"tv",
"demo.org.in":"org.in"
}
process result using
for( var i in splited ) {
console.log( i, splited[i]);
}
Try this:
var vsp = i.split(".");
for(var i=0; i< vsp.length; i++){
if(i !== 0){ //leaving the first match
// do something with vsp[i]
}
}
I hope you are not considering www also. :). If so, then keep i>1 instead of i !== 0.
There are several ways to do it (probably none as easy as it should be). You could define a function like this:
function mySplit(str, delim) {
var idx = (str.indexOf(delim) == -1) ? str.indexOf(delim) : str.length;
return [str.substr(0,idx), str.substr(idx)];
}
And then call it like: var sp = mySplit(i, ".");
You can also use lastIndexOf, which returns the location of the last . and from there you can get the rest of the string using substring.
function getExtension(hostName) {
var extension = null;
if(hostName && hostName.length > 0 && hostName.indexOf(".") !== -1) {
extension = hostName.substring(hostName.lastIndexOf(".") + 1);
}
return extension;
}
From there, you can use this function in a loop to get the extensions of many host names.
Edit Just noticed the "last two values" part in the title :) Thanks #rab

Retrieve Cookie Content

I'm trying tof ind a way to retrieve the content of a cookie in javascript.
Let's assume that the cookie is named "Google"
and lets also assume content of this cookie is just "blah"
I've been looking online and all I find are complex functions, and what I was wondering if there is a simple one line such code that retreives the value of the content in a cookie'
such as -
var myCookie = cookie.content('Google');
I don't want long parsers to check for various cookies or if the cookies have multiple value or whatever..Thanks!
QuirksMode has a very simple, but effective cookie script.
var Google = readCookie("Google"); // Google is now "blah"
Not exactly a simple one-line solution but close!
var results = document.cookie.match ( '(^|;) ?' + cookiename + '=([^;]*)(;|$)' );
if ( results ) myCookie = decodeURIComponent(results[2] ) ;
You'll have to parse the cookie jar yourself but it isn't that hard:
var name = 'the_cookie_you_want';
var value = null;
var cookies = document.cookie.split(/\s*;\s*/);
for(var i = 0; i < cookies.length; i++) {
if(cookies[i].substring(0, name.length + 1) == (name + '=')) {
value = decodeURIComponent(cookies[i].substring(name.length + 1));
break;
}
}
You can use document.cookie, or document.cookie.split(';') to get a full list of key/values.
In javascript all cookies are stored in a single string. THe cookies are separated by a ;
A possible function to read cookies is:
function readCookie(myCookieName)
{
if (document.cookie.length > 0)
{
var start = document.cookie.indexOf(myCookieName + "=");
if (start != -1)
{
start = start + myCookieName.length + 1;
var end = document.cookie.indexOf(";",start);
if (end == -1) end = document.cookie.length;
return unescape(document.cookie.substring(start ,end ));
}else{
return "";
}
}
return "";
}

Categories

Resources