Extract domain from a string using Javascript? - javascript

I have an string which include many website url but i want to extract the only url that is out side these bracket [ ].
Can someone correct this ?
Note : Output Must be www.google.com and it not necessary that domain name outside [ ] will come at the end of string.
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
// String can include https and instead of .com there can be .in
var arr = str.split("|");
function domainName(str) {
var match = str.match(/^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/);
if (match != null && match.length > 0) {
return match;
} else {
return null;
}
}
var domainname = domainName(str);
var domain = domainname;
console.log(domain);

Replace all occurrences of [[, followed by non-brackets, followed by ]] with a space::
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
const result = str.replace(/\[\[[^[\]]*\]\]/g, ' ');
console.log(result);
Then you can search for URLs in the replaced string.

As CertainPerformance Suggest you can exclude the url that is in [ ] using replace then by using regex you can extract the domain name. Below is the code :
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
var str = str.replace(/\[\[[^[\]]*\]\]/g, '');
var ptrn = /^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/g;
var i, value, domain, len, array;
array = str.split("|");
len = array.length;
for(i=0; len > i; i++) {
value = array[i].match(ptrn);
if (value !== null) {
domain = value;
}
else {
domain = "Not Found";
}
}
document.write("Domain is = ", domain);

Two main steps:
Create a regular expression that matches your desired pattern.
Use String.match()
Example:
// match all URLs
// let regex = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+#)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+#)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%#\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;
// match only the URL outside of the brackets
let regex = /(((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+#)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+#)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%#\.\w_]*)#?(?:[\.\!\/\\\w]*))?))(([^[\]]+)(?:$|\[))/g;
function getUrlsFromText(input) {
return input.match(regex);
}
console.log(getUrlsFromText('[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3'));
Note that I borrowed the URL matching part of the regular expression from here. If you don't want the query string to be included (as it is on the google.com match), you can modify the regex as desired.

this can be achieve by split
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
let ans=str.split("]")[6]
let finalAns=ans.split("&")[0]
console.log(finalAns)

var dirtySource = "https://example.com/subdirectory";
var dirtyPos = dirtySource.indexOf("://"); // works for http and https
var cleanSource = dirtySource.substr(dirtyPos+3);
cleanSource = cleanSource.split("/")[0]; // trim off subdirectories

Related

How to find special characters in a string and store in an array in javascript

Example of a string
"/city=<A>/state=<B>/sub_div=<C>/type=pos/div=<D>/cli_name=Cstate<E>/<F>/<G>"
characters occurs like A, B, C and .... are variables and count is not fixed
How to identifies how many variables are there and stored in an array
Use regex to find all your matches.
Using a while loop you can iterate through multiple matches and push them in an array. Try this.
var String = "/city=<A>/state=<B>/sub_div=<C>/type=pos/div=<D>/cli_name=Cstate<E>/<F>/<G>";
var myRegexp = /\<.\>/gm;
var matches = [];
var match = myRegexp.exec(String);
while (match != null) {
matches.push(match[0])
match = myRegexp.exec(String);
}
console.log(matches)
Please review below code that will help to resolve your issue. It may find any non word characters and create a non-word array.
let str = "/city=<A>/state=<B>/sub_div=<C>/type=pos/div=<D>/cli_name=Cstate<E>/<F>/<G>";
let arrStr = str.split("");
var strRegExp = /\W/g;
let arrNonWord = [];
arrStr.forEach(function(str){
var result = str.match(strRegExp);
if(result)
arrNonWord.push(result[0]);
});
console.log(arrNonWord);

Parsing BBCode in Javascript

I am using this (http://coursesweb.net/javascript/convert-bbcode-html-javascript_cs) as my script for parsing BBCode. I have extended the BBCodes that it can process, however I am encountering a problem when a newline immediately follows an opening tag, e.g.
[code]
code....
[/code]
The problem does not occur if the code is 'inline'
[code]code....[/code]`
The regex being used to match what's inside these tags is (.*?) which I know does not match newlines. I have tried ([^\r\n]) to match newlines but this hasn't worked either.
I imagine it's a simple issue but I have little experience with regex so any help would be appreciated
EDIT: this is the full list of regex's I am using
var tokens = {
'URL' : '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?)))',
'LINK' : '([a-z0-9\-\./]+[^"\' ]*)',
'EMAIL' : '((?:[\\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*(?:[\\w\!\#$\%\'\*\+\-\/\=\?\^\`{\|\}\~]|&)+#(?:(?:(?:(?:(?:[a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(?:\\d{1,3}\.){3}\\d{1,3}(?:\:\\d{1,5})?))',
'TEXT' : '(.*?)',
'SIMPLETEXT' : '([a-zA-Z0-9-+.,_ ]+)',
'INTTEXT' : '([a-zA-Z0-9-+,_. ]+)',
'IDENTIFIER' : '([a-zA-Z0-9-_]+)',
'COLOR' : '([a-z]+|#[0-9abcdef]+)',
'NUMBER' : '([0-9]+)',
'ALL' : '([^\r\n])',
};
EDIT 2: Full JS for matching
var token_match = /{[A-Z_]+[0-9]*}/ig;
var _getRegEx = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
if (nrmatches <= 0) {
return new RegExp(preg_quote(str), 'g'); // no tokens so return the escaped string
}
for(; i < nrmatches; i += 1) {
// Remove {, } and numbers from the token so it can match the
// keys in tokens
var token = matches[i].replace(/[{}0-9]/g, '');
if (tokens[token]) {
// Escape everything before the token
replacement += preg_quote(str.substr(0, str.indexOf(matches[i]))) + tokens[token];
// Remove everything before the end of the token so it can be used
// with the next token. Doing this so that parts can be escaped
str = str.substr(str.indexOf(matches[i]) + matches[i].length);
}
}
replacement += preg_quote(str);
return new RegExp(replacement, 'gi');
};
var _getTpls = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
var positions = {};
var next_position = 0;
if (nrmatches <= 0) {
return str; // no tokens so return the string
}
for(; i < nrmatches; i += 1) {
// Remove {, } and numbers from the token so it can match the
// keys in tokens
var token = matches[i].replace(/[{}0-9]/g, '');
var position;
// figure out what $# to use ($1, $2)
if (positions[matches[i]]) {
position = positions[matches[i]];
} else {
// token doesn't have a position so increment the next position
// and record this token's position
next_position += 1;
position = next_position;
positions[matches[i]] = position;
}
if (tokens[token]) {
replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position;
str = str.substr(str.indexOf(matches[i]) + matches[i].length);
}
}
replacement += str;
return replacement;
};
This does the trick for me: (updated this one too to avoid confusion)
\[code\]([\s\S]*?)\[\/code\]
See regexpal and enter the following:
[code]
code....
[/code]
[code]code.... [/code]
Update:
Fixed the regex to the following and this works in the Chrome Console for me:
/\[code\]([\s\S]*?)\[\/code\]/g.exec("[code]hello world \n[/code]")
JavaScript does not handle multi-line RegExp matches. Instead you have to use the [\s\S] trick described in this SO answer. Perhaps?
/\[code\][\s\S]*\[code\]/
Also RegExps probably isn't the best choice for parsing syntax. It's is extremely over complicated. I would suggest parsing the string and building an Abstract Syntax Tree then rendering the HTML from that.

How to split a string by <span> tag in Javascript

I have a data.text string that returns a value like:
<span>Name</span>Message
Is it possible in Javascript to take this value and split in to two so that I can get 'Name' and 'Message' in two different variables?
I tried,
var str = data.text;
var arr[] = str.split("</span>", 2);
var str1 = arr[0];
var theRest = arr[1];
But didn't work
You should use a DOM parser to parse HTML.
var str = "<span>Name</span>Message",
el = document.createElement('div');
el.innerHTML = str;
[].map.call(el.childNodes, function(node) {
return node.textContent;
}); // [ "Name", "Message" ]
There might be many methods but adding on using split.
var str = '<span>Name</span>Message';
var result = str.split(/<\/?span>/); // Split by span tags(opening and closing)
result.shift(); // Remove first empty element
console.log(result);
document.write(result);
The regex here <\/?span> will match the <span> and </span> both as in the regex \/? has made / optional.
You can also use following regex with non-capturing group.
(?:</?span>)(\w+)
var str = '<span>Name</span>Message';
var regex = /(?:<\/?span>)(\w+)/g,
result = [];
while (res = regex.exec(str)) {
result.push(res[1]);
}
console.log(result);
document.write(result);
You can replace the open label with a empty string and then split the string by the close label, something like this:
var values = data.text.replace('<span>', '').split('</span>');
console.log(values[0]); // Name
console.log(values[1]); // Message

Javascript get query string values using non-capturing group

Given this query string:
?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0
How can I extract the values from only these param types 'product_cat, product_tag, attr_color, attr_size' returning only 'mens-jeans,shirts,fall,classic-style,charcoal,brown,large,x-small?
I tried using a non-capturing group for the param types and capturing group for just the values, but its returning both.
(?:product_cats=|product_tags=|attr\w+=)(\w|,|-)+
You can collect tha values using
(?:product_cats|product_tags|attr\w+)=([\w,-]+)
Mind that a character class ([\w,-]+) is much more efficient than a list of alternatives ((\w|,|-)*), and we avoid the issue of capturing just the last single character.
Here is a code sample:
var re = /(?:product_cats|product_tags|attr\w+)=([\w,-]+)/g;
var str = '?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0';
var res = [];
while ((m = re.exec(str)) !== null) {
res.push(m[1]);
}
document.getElementById("res").innerHTML = res.join(",");
<div id="res"/>
You can always use a jQuery method param.
You can use following simple regex :
/&\w+=([\w,-]+)/g
Demo
You need to return the result of capture group and split them with ,.
var mystr="?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0
";
var myStringArray = mystr.match(/&\w+=([\w,-]+)/g);
var arrayLength = myStringArray.length-1; //-1 is because of that the last match is 0
var indices = [];
for (var i = 0; i < arrayLength; i++) {
indices.push(myStringArray[i].split(','));
}
Something like
/(?:product_cats|product_tag|attr_color|attr_size)=[^,]+/g
(?:product_cats|product_tag|attr_color|attr_size) will match product_cats or product_tag or attr_color or attr_size)
= Matches an equals
[^,] Negated character class matches anything other than a ,. Basically it matches till the next ,
Regex Demo
Test
string = "?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0";
matched = string.match(/(product_cats|product_tag|attr_color|attr_size)=[^,]+/g);
for (i in matched)
{
console.log(matched[i].split("=")[1]);
}
will produce output as
mens-jeans
charcoal
large
There is no need for regular expressions. just use splits and joins.
var s = '?cgan=1&product_cats=mens-jeans,shirts&product_tags=fall,classic-style&attr_color=charcoal,brown&attr_size=large,x-small&cnep=0';
var query = s.split('?')[1],
pairs = query.split('&'),
allowed = ['product_cats', 'product_tags', 'attr_color', 'attr_size'],
results = [];
$.each(pairs, function(i, pair) {
var key_value = pair.split('='),
key = key_value[0],
value = key_value[1];
if (allowed.indexOf(key) > -1) {
results.push(value);
}
});
console.log(results.join(','));
($.each is from jQuery, but can easily be replaced if jQuery is not around)

How can we split a string using starts with regular expression (/^myString/g)

I am having a case where i need to split given string using starts with regex (/^'searchString'/) which is not working such as
"token=123412acascasdaASDFADS".split('token=')
Here i want to extract the token value but as there might be some other possible parameters such as
"reset_token=SDFDFdsf12313ADADF".split('token=')
Here it also split the string with 'token=', Thats why i need to split the string using some regex which states that split the string where it starts with given string.
Thanks..
EDITED
Guys thanks for your valuable response this issue can be resolve using /\btoken=/ BUT BUT its does not work if 'token=' stored as a string into a variable such as
sParam = 'token=';
"token=123412acascasdaASDFADS".split(/\bsParam/);
This does not works.
You can use regex in split with word boundary:
"token=123412acascasdaASDFADS".split(/\btoken=/)
If token is stored in a variable then use RegExp constructor:
var sParam = "token";
var re = new RegExp("\\b" + sParam + "=");
Then use it:
var tokens = "token=123412acascasdaASDFADS".split( re );
This is the use case for the \b anchor:
\btoken=
It ensures there's no other word character before token (a word character being [a-zA-Z0-9_])
You need to split the string using the & parameter delimiter, then loop through those parameters:
var token;
$.each(params.split('&'), function() {
var parval = this.split('=');
if (parval[0] == "token") {
token = parval[1];
return false; // end the $.each loop
}
});
if you just use token= as the split delimiter, you'll include all the other parameters after it in the value.
It's not clear what you need, but this may be an idea to work with?
var reqstr = "token=12345&reset_token=SDFDFdsf12313ADADF&someval=foo"
.split(/[&=]/)
,req = [];
reqstr.map( function (v, i) {
if (i%2==0) {
var o = {};
o[/token/i.test(v) ? 'token' : v] = reqstr[i+1];
this.push(o);
} return v
}, req);
/* => req now contains:
[ { token: '12345' },
{ token: 'SDFDFdsf12313ADADF' },
{ someval: 'foo' } ]
*/
You can try with String#match() function and get the matched group from index 1
sample code
var re = /^token=(.*)$/;
var str = 'token=123412acascasdaASDFADS';
console.log('token=123412acascasdaASDFADS'.match('/^token=(.*)$/')[1]);
output:
123412acascasdaASDFADS
If token is dynamic then use RegExp
var token='token=';
var re = new RegExp("^"+token+"(.*)$");
var str = 'token=123412acascasdaASDFADS';
console.log(str.match(re)[1]);
Learn more...

Categories

Resources