regex strip domain name - javascript

A quick simple regex question
I have a domain name in a string that I need to strip - There is always http://www. and the domain always ends in "/"
g_adv_fullpath_old = g_adv_fullpath_old.replace(/http\:\/\/www\.(.*?)\//ig, '');
how do I create the regex to strip the domain name?
Any help would be appreciated

I would simply split on "/". For example:
>>> "http://www.asdf.com/a/b/c".split("/").slice(3).join("/")
'a/b/c'

Why complications? Simple indexOf will do.
First remove http://www (10 characters), then everything before the first slash.
var s = "http://www.google.com/test";
s = s.substr(10);
s = s.substr(s.indexOf('/'));
alert(s);
Or split, as David suggests.
An example

If you are looking to remove the http://www. and the following slash (plus anything after it) Try:
g_adv_fullpath_old.replace(/http:\/\/www\.(.*?)\/.*/ig, '$1')

You can also extend the stringobject so it supports urlParts
Example
http://jsfiddle.net/stofke/Uwdha/
Javascript
String.prototype.urlParts = function() {
var loc = this;
loc = loc.split(/([a-z0-9_\-]{1,5}:\/\/)?(([a-z0-9_\-]{1,}):([a-z0-9_\-]{1,})\#)?((www\.)|([a-z0-9_\-]{1,}\.)+)?([a-z0-9_\-]{3,})((\.[a-z]{2,4})(:(\d{1,5}))?)(\/([a-z0-9_\-]{1,}\/)+)?([a-z0-9_\-]{1,})?(\.[a-z]{2,})?(\?)?(((\&)?[a-z0-9_\-]{1,}(\=[a-z0-9_\-]{1,})?)+)?/g);
loc.href = this;
loc.protocol = loc[1];
loc.user = loc[3];
loc.password = loc[4];
loc.subdomain = loc[5];
loc.domain = loc[8];
loc.domainextension = loc[10];
loc.port = loc[12];
loc.path = loc[13];
loc.file = loc[15];
loc.filetype = loc[16];
loc.query = loc[18];
loc.anchor = loc[22];
//return the final object
return loc;
};
Usage:
var link = "http://myusername:mypassword#test.asdf.com/a/b/c/index.php?test1=5&test2=789#tite";
var path = link.urlParts().path;
var path = link.urlParts().user;

Related

How to find text between 2 characters with multiple occurrences?

Given the below string, what would be the most efficient way to get the file ID? The portion wanted: XXXXXXXXXXXxxxxxxxxXXX, which is between / and /view
The attempt below works, but is it really needed to reverse the string twice?
let = url = 'https://drive.google.com/file/d/1pnEX1OXXXXXXu6z9dPV5ZZ5VHqPU--6/view?usp=share_link'
url = reverseString(url)
let id = url.split('weiv/').pop().split('/')[0]
id = reverseString(id)
console.log('URL:' + id)
function reverseString(str) {
var splitString = str.split("");
var reverseArray = splitString.reverse();
var joinArray = reverseArray.join("");
return joinArray;
}
This solution searches for the "/d/" portion and advances three characters to begin a string.slice, continuing until the next occurence of /. Provided /d/ is always before the id portion, this should be reliable.
const url = 'https://drive.google.com/file/d/1pnEX1OXXXXXXu6z9dPV5ZZ5VHqPU--6/view?usp=share_link';
const id = url.slice(url.indexOf("/d/")+3, url.indexOf("/",url.indexOf("/d/")+3 ));
console.log(id);
I'd solve this with a simple regex.
const url = 'https://drive.google.com/file/d/1pnEX1OXXXXXXu6z9dPV5ZZ5VHqPU--6/view?usp=share_link';
const m = url.match(/^.*?\/.\/(.*?)\/view.*$/);
console.log(m[1])
you can use substring to get the value between /d/ and /view
let = url = 'https://drive.google.com/file/d/1pnEX1OXXXXXXu6z9dPV5ZZ5VHqPU--6/view?usp=share_link'
const fileId = url.substring(url.lastIndexOf("/d/") + 3, url.lastIndexOf("/view"));
console.log(fileId)

replace a string partially with something else

lets say I have this image address like
https://firebasestorage.googleapis.com/v0/b/myproj-d.appspot.com/o/FILE_NAME.jpg?alt=media&token=124bb2bf-c6ef-432b-92c7-7032563ba31b
how is it possible to replace FILE_NAME.jpg with THUMB_FILE_NAME.jpg
Note: FILE_NAME and THUMB_FILE_NAME are not static and fix.
the FILE_NAME is not fixed and I can't use string.replace method.
eventually I don't know the File_Name
Use replace
.replace(/(?<=\/)[^\/]*(?=(.jpg))/g, "THUMB_FILE_NAME")
or if you want to support multiple formats
.replace(/(?<=\/)[^\/]*(?=(.(jpg|png|jpeg)))/g, "THUMB_FILE_NAME")
Demo
var output = "https://firebasestorage.googleapis.com/v0/b/myproj-d.appspot.com/o/FILE_NAME.jpg?alt=media&token=124bb2bf-c6ef-432b-92c7-7032563ba31b".replace(/(?<=\/)[^\/]*(?=(.jpg))/g, "THUMB_FILE_NAME");
console.log( output );
Explanation
(?<=\/) matches / but doesn't remember the match
[^\/]* matches till you find next /
(?=(.jpg) ensures that match ends with .jpg
To match the FILE_NAME, use
.match(/(?<=\/)[^\/]*(?=(.(jpg|png|jpeg)))/g)
var pattern = /[\w-]+\.(jpg|png|txt)/
var c = 'https://firebasestorage.googleapis.com/v0/b/myproj-d.appspot.com/o/FILE_NAME.jpg?alt=media&token=124bb2bf-c6ef-432b-92c7-7032563ba31b
'
c.replace(pattern, 'YOUR_FILE_NAME.jpg')
you can add any format in the pipe operator
You can use the String's replace method.
var a = "https://firebasestorage.googleapis.com/v0/b/myproj-d.appspot.com/o/FILE_NAME.jpg?alt=media&token=124bb2bf-c6ef-432b-92c7-7032563ba31b";
a = a.replace('FILE_NAME', 'THUMB_FILE_NAME');
If you know the format, you can use the split and join to replace the FILE_NAME.
let str = "https://firebasestorage.googleapis.com/v0/b/myproj-d.appspot.com/o/FILE_NAME.jpg?alt=media&token=124bb2bf-c6ef-432b-92c7-7032563ba31b";
let str_pieces = str.split('/');
let str_last = str_pieces[str_pieces.length - 1];
let str_last_pieces = str_last.split('?');
str_last_pieces[0] = 'THUMB_' + str_last_pieces[0];
str_last = str_last_pieces.join('?');
str_pieces[str_pieces.length - 1] = str_last;
str = str_pieces.join('/');

Single regexp to get page URL but exclude port number from a full URL

I'm trying to come up with a regexp to get the page URL from the full URL but exclude a possible port number from it. So far I came up with the following JS:
var res = url.match(/^.*\:\/\/(?:www2?.)?([^?#]+)/i);
if(res)
{
var pageURL = res[1];
console.log(pageURL);
}
If I call it for this:
var url = "http://www.example.com/php/page.php?what=sw#print";
I get the correct answer: example.com/php/page.php
But if I do:
var url = "http://www.example.com:80/php/page.php?what=sw#print";
I need it to return example.com/php/page.php instead of example.com:80/php/page.php.
I can remove it with the second regexp, but I was curious if I could do it with just one (for speed)?
You can modify your regex to this:
/^.*\:\/\/(?:www2?.)?([^/:]+)(?:[^:]*:\d+)?([^?#]+)/i
RegEx Demo
It will return 2 matches:
1: example.com
2: /php/page.php
as match[1] and match[2] respectively for both inputs that you can concatenate.
http://www.example.com/php/page.php?what=sw#print
OR
http://www.example.com:80/php/page.php?what=sw#print
Update: Here are performance results on jsperf.com that shows regex method is fastest is of all.
Keep it simple:
~ node
> "http://www.example.com:3000/php/page.php?what=sw#print".replace(/:\d+/, '');
'http://www.example.com/php/page.php?what=sw#print'
> "http://www.example.com/php/page.php?what=sw#print".replace(/:\d+/, '');
'http://www.example.com/php/page.php?what=sw#print'
Why would you use a regex at all?
EDIT:
As pointed out by #c00000fd: Because document might not be available and document.createElement is very slow compared to RegExp - see:
http://jsperf.com/url-parsing/5
http://jsperf.com/hostname-from-url
Nevertheless I will leave my original answer for reference.
ORIGINAL ANSWER:
Instead you could just use the Anchor element:
Fiddle:
http://jsfiddle.net/12qjqx7n/
JS:
var url = 'http://foo:bar#www.example.com:8080/php/page.php?what=sw#print'
var a = document.createElement('a');
a.href = url;
console.log(a.hash);
console.log(a.host);
console.log(a.hostname);
console.log(a.origin);
console.log(a.password);
console.log(a.pathname);
console.log(a.port);
console.log(a.protocol);
console.log(a.search);
console.log(a.username);
Additional information:
http://www.w3schools.com/jsref/dom_obj_anchor.asp
How about a group for matching the port, if present?
var url = "http://www.example.com:80/php/page.php?what=sw#print";
var res = url.match(/^.*\:\/\/(?:www2?.)?([^?#\/:]+)(\:\d+)?(\/[^?#]+)/i);
if(res)
{
var pageURL = res[1]+res[3];
console.log(res, pageURL);
}
Try
var url = "http://www.example.com:80/php/page.php?what=sw#print";
var res = url.split(/\w+:\/\/+\w+\.|:+\d+|\?.*/).join("");
var url = "http://www.example.com:80/php/page.php?what=sw#print";
var res = url.split(/\w+:\/\/+\w+\.|:+\d+|\?.*/).join("");
document.body.innerText = res;
You could use replace method to modify your original string or Url,
> var url = "http://www.example.com/php/page.php?what=sw#print";
undefined
> var url1 = "http://www.example.com:80/php/page.php?what=sw#print";
undefined
> url.replace(/^.*?:\/\/(?:www2?.)?([^/:]+)(?::\d+)?([^?#]+).*$/g, "$1$2")
'example.com/php/page.php'
> url1.replace(/^.*?:\/\/(?:www2?.)?([^/:]+)(?::\d+)?([^?#]+).*$/g, "$1$2")
'example.com/php/page.php'
DEMO

get particular string part in javascript

I have a javascript string like "firstHalf_0_0_0" or secondHalf_0_0_0". Now I want to get the string before the string "Half" from above both strings using javascript.Please help me.
Thanks.
var myString = "firstHalf_0_0_0";
var parts = myString.split("Half");
var thePart = parts[0];
var str = 'firstHalf_0_0_0',
part = str.match(/(\w+)Half/)[1];
alert(part); // Alerts "first"
var str = "firstHalf.....";
var index = str.indexOf("Half");
var substring = str.substr(0, index);
jsFiddle demo.
Using this you can get any particular part of string.
var str= 'your string';
var result = str.split('_')[0];
Working example here for your particular case.
http://jsfiddle.net/7kypu/3/
cheers!

Javascript remove characters utill 3 slash /

Whats the best to way, based on the input below, to get everything in the url after the domain:
var url = "http://www.domain.com.uk/sadsad/asdsadsad/asdasdasda/?asda=ggy";
var url = "http://www.domain.com.uk/asdsadsad/asdasdasda/#45435";
var url = "http://www.domain.com.uk/asdasdasda/?324324";
var url = "http://www.domain.com.uk/asdasdasda/";
The output:
url = "/sadsad/asdsadsad/asdasdasda/?asda=ggy";
url = "/asdsadsad/asdasdasda/#45435";
url = "/asdasdasda/?324324";
UPDATE: the domain its not always the same. (sorry)
Thx
You should really parse the URI.
http://stevenlevithan.com/demo/parseuri/js/
Every absolute URL consists of a protocol, separated by two slashes, followed by a host, followed by a pathname. An implementation can look like:
// Search for the index of the first //, then search the next slash after it
var slashOffset = url.indexOf("/", url.indexOf("//") + 2);
url = url.substr(slashOffset);
If the domain is always the same, a simple replace will work fine:
var url = "http://www.domain.com.uk/sadsad/asdsadsad/asdasdasda/?asda=ggy";
var afterDomain = url.replace("^http://www.domain.com.uk/", "");
You could also use RegEx:
var url = "http://www.domain.com.uk/sadsad/asdsadsad/asdasdasda/?asda=ggy";
var afterDomain = url.replace(/^[^\/]*(?:\/[^\/]*){2}/, "");
Assuming this is in the browser, creating an anchor element will do a lot of magic on your behalf:
var a=document.createElement('a');
a.href="http://somedomain/iouhowe/ewouho/wiouhfe?jjj";
alert(a.pathname + a.search + a.hash); // /iouhowe/ewouho/wiouhfe?jjj

Categories

Resources