js function to get filename from url - javascript

I have a url like http://www.example.com/blah/th.html
I need a javascript function to give me the 'th' value from that.
All my urls have the same format (2 letter filenames, with .html extension).
I want it to be a safe function, so if someone passes in an empty url it doesn't break.
I know how to check for length, but I should be checking for null to right?

var filename = url.split('/').pop()

Why so difficult?
var filename = url.split('/').pop().split('#')[0].split('?')[0];

Use the match function.
function GetFilename(url)
{
if (url)
{
var m = url.toString().match(/.*\/(.+?)\./);
if (m && m.length > 1)
{
return m[1];
}
}
return "";
}

Similar to the others, but...I've used Tom's simple script - a single line,
then you can use the filename var anywhere:
http://www.tomhoppe.com/index.php/2008/02/grab-filename-from-window-location/
var filename = location.pathname.substr(location.pathname.lastIndexOf("/")+1);

This should work for all cases
function getFilenameFromUrl(url) {
const pathname = new URL(url).pathname;
const index = pathname.lastIndexOf('/');
return pathname.substring(index + 1) // if index === -1 then index+1 will be 0
}

In addition to the existing answers, I would recommend using URL() constructor (works both in browsers and Node.js) because you can be sure your URL is valid:
const url = 'https://test.com/path/photo123.png?param1=1&param2=2#hash';
let filename = '';
try {
filename = new URL(url).pathname.split('/').pop();
} catch (e) {
console.error(e);
}
console.log(`filename: ${filename}`);

A regex solution which accounts for URL query and hash identifier:
function fileNameFromUrl(url) {
var matches = url.match(/\/([^\/?#]+)[^\/]*$/);
if (matches.length > 1) {
return matches[1];
}
return null;
}
JSFiddle here.

Because cases tend to fail with custom code, I looked up to the JavaScript URL class. Alas, it chokes on relative URLs! Also, it doesn't have a property to get the file name. Not epic.
There has to be a good library out there which solves this common problem. Behold URI.js. All you need is a simple statement like the following:
let file = new URI(url).filename()
Then we can create a simple function that does null checks and removes the file extension:
function fileName(url) {
if (url === null || typeof url === 'undefined')
return ''
let file = new URI(url).filename() // File name with file extension
return file.substring(0, file.lastIndexOf('.')) // Remove the extension
}
Here's a snippet with test cases to play around with. All cases pass except drive paths.
test('Dots in file name without URL', 'dotted.file.name.png', 'dotted.file.name')
test('Dots in file name with URL', 'http://example.com/file.name.txt', 'file.name')
test('Lengthy URL with parameters', '/my/folder/filename.html#dssddsdsd?toto=33&dududu=podpodpo', 'filename')
test('URL with hash', '/my/folder/filename.html#dssddsdsd', 'filename')
test('URL with query strings', '/my/folder/filename.html?toto=33&dududu=podpodp', 'filename')
test('Hash after query string', 'http://www.myblog.com/filename.php?year=2019#06', 'filename')
test('Query parameter with file path character', 'http://www.example.com/filename.zip?passkey=1/2', 'filename')
test('Query parameter with file path character and hash', 'http://www.example.com/filename.html?lang=en&user=Aan9u/o8ai#top', 'filename')
test('Asian characters', 'http://example.com/文件名.html', '文件名')
test('URL without file name', 'http://www.example.com', '')
test('Null', null, '')
test('Undefined', undefined, '')
test('Empty string', '', '')
test('Drive path name', 'C:/fakepath/filename.csv', 'filename')
function fileName(url) {
if (url === null || typeof url === 'undefined')
return ''
let file = new URI(url).filename() // File name with file extension
return file.substring(0, file.lastIndexOf('.')) // Remove the extension
}
function test(description, input, expected) {
let result = fileName(input)
let pass = 'FAIL'
if (result === expected)
pass = 'PASS'
console.log(pass + ': ' + description + ': ' + input)
console.log(' => "' + fileName(input) + '"')
}
<script src="https://cdn.jsdelivr.net/gh/medialize/URI.js#master/src/URI.js"></script>
Results
PASS: Dots in file name without URL: dotted.file.name.png
=> "dotted.file.name"
PASS: Dots in file name with URL: http://example.com/file.name.txt
=> "file.name"
PASS: Lengthy URL with parameters: /my/folder/filename.html#dssddsdsd?toto=33&dududu=podpodpo
=> "filename"
PASS: URL with hash: /my/folder/filename.html#dssddsdsd
=> "filename"
PASS: URL with query strings: /my/folder/filename.html?toto=33&dududu=podpodp
=> "filename"
PASS: Hash after query string: http://www.myblog.com/filename.php?year=2019#06
=> "filename"
PASS: Query parameter with file path character: http://www.example.com/filename.zip?passkey=1/2
=> "filename"
PASS: Query parameter with file path character and hash: http://www.example.com/filename.html?lang=en&user=Aan9u/o8ai#top
=> "filename"
PASS: Asian characters: http://example.com/文件名.html
=> "文件名"
PASS: URL without file name: http://www.example.com
=> ""
PASS: Null: null
=> ""
PASS: Undefined: undefined
=> ""
PASS: Empty string:
=> ""
FAIL: Drive path name: C:/fakepath/filename.csv
=> ""
This solution is for you if you're too lazy to write custom code and don't mind using a library to do work for you. It isn't for you if you want to code golf the solution.

those will not work for lenghty url like
"/my/folder/questions.html#dssddsdsd?toto=33&dududu=podpodpo"
here I expect to get "questions.html".
So a possible (slow) solution is as below
fname=function(url)
{ return url?url.split('/').pop().split('#').shift().split('?').shift():null }
then you can test that in any case you get only the filename.
fname("/my/folder/questions.html#dssddsdsd?toto=33&dududu=podpodpo")
-->"questions.html"
fname("/my/folder/questions.html#dssddsdsd")
-->"questions.html"
fname("/my/folder/questions.html?toto=33&dududu=podpodpo")
"-->questions.html"
(and it works for null)
(I would love to see a faster or smarter solution)

This answer only works in browser environment. Not suitable for node.
function getFilename(url) {
const filename = decodeURIComponent(new URL(url).pathname.split('/').pop());
if (!filename) return 'index.html'; // some default filename
return filename;
}
function filenameWithoutExtension(filename) {
return filename.replace(/^(.+?)(?:\.[^.]*)?$/, '$1');
}
Here are two functions:
first one get filename from url
second one get filename without extension from a full filename
For parsing URL, new an URL object should be the best choice. Also notice that URL do not always contain a filename.
Notice: This function try to resolve filename from an URL. But it do NOT guarantee that the filename is valid and suitable for use:
Some OS disallow certain character in filename (e.g. : in windows, \0 in most OS, ...);
Some filename may reserved by OS (e.g. CON in windows);
Some filename may make user unhappy to handle it (e.g. a file named "--help" in Linux)
Test it out:
function getFilename(url) {
const filename = decodeURIComponent(new URL(url).pathname.split('/').pop());
if (!filename) return 'index.html'; // some default filename
return filename;
}
function test(url) {
console.log('Filename: %o\nUrl: %o', getFilename(url), url);
}
test('http://www.example.com');
test('http://www.example.com/');
test('http://www.example.com/name.txt');
test('http://www.example.com/path/name.txt');
test('http://www.example.com/path/name.txt/realname.txt');
test('http://www.example.com/page.html#!/home');
test('http://www.example.com/page.html?lang=en&user=Aan9u/o8ai#top');
test('http://www.example.com/%E6%96%87%E4%BB%B6%E5%90%8D.txt')

I'd use the substring function combined with lastIndexOf. This will allow for filenames with periods in them e.g. given http://example.com/file.name.txt this gives file.name unlike the accepted answer that would give file.
function GetFilename(url)
{
if (url)
{
return url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("."));
}
return "";
}

Using jQuery with the URL plugin:
var file = jQuery.url.attr("file");
var fileNoExt = file.replace(/\.(html|htm)$/, "");
// file == "th.html", fileNoExt = "th"

For node and browsers, based on #pauls answer but solving issues with hash and more defensive:
export function getFileNameFromUrl(url) {
const hashIndex = url.indexOf('#')
url = hashIndex !== -1 ? url.substring(0, hashIndex) : url
return (url.split('/').pop() || '').replace(/[\?].*$/g, '')
}
Few cases:
describe('getFileNameFromUrl', () => {
it('absolute, hash and no extension', () => {
expect(getFileNameFromUrl(
'https://foo.bar/qs/bar/js-function-to-get-filename-from-url#comment95124061_53560218'))
.toBe('js-function-to-get-filename-from-url')
})
it('relative, extension and parameters', () => {
expect(getFileNameFromUrl('../foo.png?ar=8')).toBe('foo.png')
})
it('file name with multiple dots, hash with slash', () => {
expect(getFileNameFromUrl('questions/511761/js-function.min.js?bar=9.9&y=1#/src/jjj?=9.9')).toBe('js-function.min.js')
})
})

This should handle anything you throw at it (absolute URLs, relative URLs, complex AWS URLs, etc). It includes an optional default or uses a psuedorandom string if neither a filename nor a default were present.
function parseUrlFilename(url, defaultFilename = null) {
let filename = new URL(url, "https://example.com").href.split("#").shift().split("?").shift().split("/").pop(); //No need to change "https://example.com"; it's only present to allow for processing relative URLs.
if(!filename) {
if(defaultFilename) {
filename = defaultFilename;
//No default filename provided; use a pseudorandom string.
} else {
filename = Math.random().toString(36).substr(2, 10);
}
}
return filename;
}
Props to #hayatbiralem for nailing the order of the split()s.

Similarly to what #user2492653 suggested, if all you want is the name of the file like Firefox gives you, then you the split() method, which breaks the string into an array of components, then all you need to do it grab the last index.
var temp = url.split("//");
if(temp.length > 1)
return temp[temp.length-1] //length-1 since array indexes start at 0
This would basically break C:/fakepath/test.csv into {"C:", "fakepath", "test.csv"}

my 2 cents
the LastIndexOf("/") method in itself falls down if the querystrings contain "/"
We all know they "should" be encoded as %2F but it would only take one un-escaped value to cause problems.
This version correctly handles /'s in the querystrings and has no reliance on .'s in the url
function getPageName() {
//#### Grab the url
var FullUrl = window.location.href;
//#### Remove QueryStrings
var UrlSegments = FullUrl.split("?")
FullUrl = UrlSegments[0];
//#### Extract the filename
return FullUrl.substr(FullUrl.lastIndexOf("/") + 1);
}

Try this
url.substring(url.lastIndexOf('/')+1, url.length)

url? url.substring(url.lastIndexOf('/')+1, url.lastIndexOf('.')):''
Safety is as asked for. when url is null or undefined the result is ''.
Removes all of the path with ':', dots and any symbol including the last '/'.
This gives the true answer 'th' as asked and not 'th.index'. That is very important of course to have it work at all.
It allows filename to have several periods!
Not asked, but you can also have a query string without '/' and '.'
It is a corrected answer from Abhishek Sharma so I gave him an upvote. So genious and minimal one-liner - I saw it there :)

function getFileNameWithoutExtension(url) {
if (typeof url !== 'string') throw new Error('url must be a string');
// Remove the QueryString
return url.replace(/\?.*$/, '')
// Extract the filename
.split('/').pop()
// Remove the extension
.replace(/\.[^.]+$/, '');
}
This will return news from this URL http://www.myblog.com/news.php?year=2019#06.

ES6 syntax based on TypeScript
Actually, the marked answer is true but if the second if doesn't satisfy the function returns undefined, I prefer to write it like below:
const getFileNameFromUrl = (url: string): string => {
if (url) {
const tmp = url.split('/');
const tmpLength = tmp.length;
return tmpLength ? tmp[tmpLength - 1] : '';
}
return '';
};
For my problem, I need to have the file extension.

Simple Function (Using RegEx Pattern)
function pathInfo(s) {
s=s.match(/(.*?\/)?(([^/]*?)(\.[^/.]+?)?)(?:[?#].*)?$/);
return {path:s[1],file:s[2],name:s[3],ext:s[4]};
}
var sample='/folder/another/file.min.js?query=1';
var result=pathInfo(sample);
console.log(result);
/*
{
"path": "/folder/another/",
"file": "file.min.js",
"name": "file.min",
"ext": ".js"
}
*/
console.log(result.name);

Using RegEx
Let´s say you have this url:
http://127.0.0.1:3000/pages/blog/posts/1660345251.html
Using the following line of code:
var filename = location.pathname.replace(/[\D]/g, "");
Will return:
1660345251
Notice that this number is Unix Time, which returns your local time no matter where you are in the World (this should give you really unique post names for this blog example).
.replace(/[\D]/g, ""), replaces any non-digit character with an empty string. /[\D]/g says non-digit, and "" says empty string. More about it: here for numbers and here for letters.
More about RegEx, here. There are lots of RegEx tools out there that can help you out to get better results in the returning value for filename.
Extra code: Unix Time to Local Time
var humanDate = new Date(0);
var timestamp = entries[index].timestamp;
humanDate.setUTCSeconds(timestamp);
humanDate is for my local time:
Fri Aug 12 2022 20:00:51 GMT-0300 (Argentina Standard Time)
Credits for this code, here.

function getFilenameFromUrlString(url) {
const response = {
data: {
filename: ""
},
error: ""
};
try {
response.data.filename = new URL(url).pathname.split("/").pop();
} catch (e) {
response.error = e.toString();
}
return response;
}
For tests check this:
https://codesandbox.io/s/get-filename-from-url-string-wqthx1

from How to get the file name from a full path using JavaScript?
var filename = fullPath.replace(/^.*[\\\/]/, '')

Related

Complete url validation angular [duplicate]

I want to validate a URL and display message. Below is my code:
$("#pageUrl").keydown(function(){
$(".status").show();
var url = $("#pageUrl").val();
if(isValidURL(url)){
$.ajax({
type: "POST",
url: "demo.php",
data: "pageUrl="+ url,
success: function(msg){
if(msg == 1 ){
$(".status").html('<img src="images/success.gif"/><span><strong>SiteID:</strong>12345678901234456</span>');
}else{
$(".status").html('<img src="images/failure.gif"/>');
}
}
});
}else{
$(".status").html('<img src="images/failure.gif"/>');
}
});
function isValidURL(url){
var RegExp = /(ftp|http|https):\/\/(\w+:{0,1}\w*#)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%#!\-\/]))?/;
if(RegExp.test(url)){
return true;
}else{
return false;
}
}
My problem is now it will show an error message even when entering a proper URL until it matches regular expression, and it return true even if the URL is something like "http://wwww".
I appreciate your suggestions.
Someone mentioned the Jquery Validation plugin, seems overkill if you just want to validate the url, here is the line of regex from the plugin:
return this.optional(element) || /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(value);
Here is where they got it from: http://projects.scottsplayground.com/iri/
Pointed out by #nhahtdh This has been updated to:
// Copyright (c) 2010-2013 Diego Perini, MIT licensed
// https://gist.github.com/dperini/729294
// see also https://mathiasbynens.be/demo/url-regex
// modified to allow protocol-relative URLs
return this.optional( element ) || /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})).?)(?::\d{2,5})?(?:[/?#]\S*)?$/i.test( value );
source: https://github.com/jzaefferer/jquery-validation/blob/c1db10a34c0847c28a5bd30e3ee1117e137ca834/src/core.js#L1349
It's not practical to parse URLs using regex. A full implementation of the RFC1738 rules would result in an enormously long regex (assuming it's even possible). Certainly your current expression fails many valid URLs, and passes invalid ones.
Instead:
a. use a proper URL parser that actually follows the real rules. (I don't know of one for JavaScript; it would probably be overkill. You could do it on the server side though). Or,
b. just trim away any leading or trailing spaces, then check it has one of your preferred schemes on the front (typically ‘http://’ or ‘https://’), and leave it at that. Or,
c. attempt to use the URL and see what lies at the end, for example by sending it am HTTP HEAD request from the server-side. If you get a 404 or connection error, it's probably wrong.
it return true even if url is something like "http://wwww".
Well, that is indeed a perfectly valid URL.
If you want to check whether a hostname such as ‘wwww’ actually exists, you have no choice but to look it up in the DNS. Again, this would be server-side code.
function validateURL(textval) {
var urlregex = /^(https?|ftp):\/\/([a-zA-Z0-9.-]+(:[a-zA-Z0-9.&%$-]+)*#)*((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}|([a-zA-Z0-9-]+\.)*[a-zA-Z0-9-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(:[0-9]+)*(\/($|[a-zA-Z0-9.,?'\\+&%$#=~_-]+))*$/;
return urlregex.test(textval);
}
This can return true for URLs like:
http://stackoverflow.com/questions/1303872/url-validation-using-javascript
or:
http://regexlib.com/DisplayPatterns.aspx?cattabindex=1&categoryId=2
I written also a URL validation function base on rfc1738 and rfc3986 to check http and https urls. I try to hold this modular, so it can be better maintained and adapted to own requirements.
The RegExp in one line is show at end of this post.
The RegExp accept HTTP and HTTPS URLs with some international domain or IPv4 number. IPv6 is not supported yet.
window.isValidURL = (function() {// wrapped in self calling function to prevent global pollution
//URL pattern based on rfc1738 and rfc3986
var rg_pctEncoded = "%[0-9a-fA-F]{2}";
var rg_protocol = "(http|https):\\/\\/";
var rg_userinfo = "([a-zA-Z0-9$\\-_.+!*'(),;:&=]|" + rg_pctEncoded + ")+" + "#";
var rg_decOctet = "(25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])"; // 0-255
var rg_ipv4address = "(" + rg_decOctet + "(\\." + rg_decOctet + "){3}" + ")";
var rg_hostname = "([a-zA-Z0-9\\-\\u00C0-\\u017F]+\\.)+([a-zA-Z]{2,})";
var rg_port = "[0-9]+";
var rg_hostport = "(" + rg_ipv4address + "|localhost|" + rg_hostname + ")(:" + rg_port + ")?";
// chars sets
// safe = "$" | "-" | "_" | "." | "+"
// extra = "!" | "*" | "'" | "(" | ")" | ","
// hsegment = *[ alpha | digit | safe | extra | ";" | ":" | "#" | "&" | "=" | escape ]
var rg_pchar = "a-zA-Z0-9$\\-_.+!*'(),;:#&=";
var rg_segment = "([" + rg_pchar + "]|" + rg_pctEncoded + ")*";
var rg_path = rg_segment + "(\\/" + rg_segment + ")*";
var rg_query = "\\?" + "([" + rg_pchar + "/?]|" + rg_pctEncoded + ")*";
var rg_fragment = "\\#" + "([" + rg_pchar + "/?]|" + rg_pctEncoded + ")*";
var rgHttpUrl = new RegExp(
"^"
+ rg_protocol
+ "(" + rg_userinfo + ")?"
+ rg_hostport
+ "(\\/"
+ "(" + rg_path + ")?"
+ "(" + rg_query + ")?"
+ "(" + rg_fragment + ")?"
+ ")?"
+ "$"
);
// export public function
return function (url) {
if (rgHttpUrl.test(url)) {
return true;
} else {
return false;
}
};
})();
RegExp in one line:
var rg = /^(http|https):\/\/(([a-zA-Z0-9$\-_.+!*'(),;:&=]|%[0-9a-fA-F]{2})+#)?(((25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])(\.(25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])){3})|localhost|([a-zA-Z0-9\-\u00C0-\u017F]+\.)+([a-zA-Z]{2,}))(:[0-9]+)?(\/(([a-zA-Z0-9$\-_.+!*'(),;:#&=]|%[0-9a-fA-F]{2})*(\/([a-zA-Z0-9$\-_.+!*'(),;:#&=]|%[0-9a-fA-F]{2})*)*)?(\?([a-zA-Z0-9$\-_.+!*'(),;:#&=\/?]|%[0-9a-fA-F]{2})*)?(\#([a-zA-Z0-9$\-_.+!*'(),;:#&=\/?]|%[0-9a-fA-F]{2})*)?)?$/;
In a similar situation I got away with this:
someUtils.validateURL = function(url) {
var parser = document.createElement('a');
try {
parser.href = url;
return !!parser.hostname;
} catch (e) {
return false;
}
};
i.e. why invent the wheel if browsers can do it for you? But, of course, this will only work in the browser.
there are various parts of parsed URL exactly how browser would interpret it:
parser.protocol; // => "http:"
parser.hostname; // => "example.com"
parser.port; // => "8080"
parser.pathname; // => "/path/"
parser.search; // => "?search=test"
parser.hash; // => "#hash"
parser.host; // => "example.com:3000"
Using these you can improve your validating function depending on the requirements. The only drawback is that it will accept relative URLs and use current page server's host and port. But you can use it for your advantage, by re-assembling the URL from parts and always passing it in full to your AJAX service.
What validateURL won't accept is invalid URL, e.g. http:\:8883 will return false, but :1234 is valid and is interpreted as http://pagehost.example.com/:1234 i.e. as a relative path.
UPDATE
This approach is no longer working with Chrome and other WebKit browsers. Even when URL is invalid, hostname is filled with some value, e.g. taken from base. It still helps to parse parts of URL, but will not allow to validate one.
Possible better no-own-parser approach is to use var parsedURL = new URL(url) and catch exceptions. See e.g. URL API. Supported by all major browsers and NodeJS, although still marked experimental.
best regex I found from http://angularjs.org/
var urlregex = /^(ftp|http|https):\/\/(\w+:{0,1}\w*#)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%#!\-\/]))?$/;
This is what worked for me:
function validateURL(value) {
return /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(value);
}
from there is is just a matter of calling the function to get a true or false back:
validateURL(urltovalidate);
I know it's quite an old question but since it does not have any accepted answer, I suggest you to use the URI.js framework: https://github.com/medialize/URI.js
You can use it to check for malformed URI using a try/catch block:
function isValidURL(url)
{
try {
(new URI(url));
return true;
}
catch (e) {
// Malformed URI
return false;
}
}
Of course it will consider something like "%#" as a well formed relative URI... So I suggest you read the URI.js API to perform more checks, for example if you want to make sure that the user entered a well formed absolute URL you may do like this:
function isValidURL(url)
{
try {
var uri = new URI(url);
// URI has a scheme and a host
return (!!uri.scheme() && !!uri.host());
}
catch (e) {
// Malformed URI
return false;
}
}
Import in an npm package like
https://www.npmjs.com/package/valid-url
and use it to validate your url.
You can use the URL API that is recently standard. Browser support is sketchy at best, see the link. new URL(str) is guaranteed to throw TypeError for invalid URLs.
As stated above, http://wwww is a valid URL.
The URL API can be used to validate the structure of a URL string.
An error is thrown when trying to serialise an invalid URL string into a URL object. This could be abstracted into a helper function (Typescript snippet below):
function isValidURL(URL: string) : boolean {
try {
new URL(string);
return true;
} catch (err) { return false; }
}
isValidURL('https://www.google.com'); // returns true
isValidURL('localhost:3000'); // returns true
isValidURL('not-a-valid-url'); // returns false
isValidURL('google.com'); // returns false (see footnote)
If you strictly want HTTP / web links to be valid, we can simply add a condition to the return statement:
...
const url = new URL(string);
return url.protocol === 'https:' || url.protocol === 'http:';
...
Granted, this approach comes with a few caveats:
No support for the URL API in Internet Explorer (could be fixed with a polyfill)
Without additional checks, URLs without either a protocol or port are seen as invalid (e.g. google.com is invalid but google.com:3000 is OK). This may be an unintended behaviour for some usecases.
If you're looking for a more reliable regex, check out RegexLib. Here's the page you'd probably be interested in:
http://regexlib.com/Search.aspx?k=url
As for the error messages showing while the person is still typing, change the event from keydown to blur and then it will only check once the person moves to the next element.
var RegExp = (/^HTTP|HTTP|http(s)?:\/\/(www\.)?[A-Za-z0-9]+([\-\.]{1}[A-Za-z0-9]+)*\.[A-Za-z]{2,40}(:[0-9]{1,40})?(\/.*)?$/);
My solution:
function isValidUrl(t)
{
return t.match(/^(http|https|ftp):\/\/(([A-Z0-9][A-Z0-9_-]*)(\.[A-Z0-9][A-Z0-9_-]*)+)(:(\d+))?\/?/i)
}
Demo : http://jsbin.com/uzimeb/1/edit
function checkURL(value) {
var urlregex = new RegExp("^(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*#)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*$");
if (urlregex.test(value)) {
return (true);
}
return (false);
}
I have found a great resource for comparing different solutions:
https://mathiasbynens.be/demo/url-regex
According to that page, only solution from diegoperini passes all tests. Here is that regex:
_^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?#)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS
I checked a lot of url validators in google and no one works for me. For example I'd like to see valid on links like 'aa.com'. I like silly check for dot sign in string.
function isValidUri(str) {
var dotIndex = str.indexOf('.');
return (dotIndex > 0 && dotIndex < str.length - 2);
}
It should not stay on beginning and end of string (for now we don't have top level domain names with one character).
Here's a regular expression which might fit the bill (it's very long):
/^(?:\u0066\u0069\u006C\u0065\u003A\u002F{2}(?:\u002F{2}(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*\u0040)?(?:\u005B(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){6}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){5}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){4}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A)?[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){3}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,3}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,4}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,5}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,6}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2})\u005D|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?\u002E)+[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?))(?:\u003A(?:\u0030-\u0035\u0030-\u0039{0,4}|\u0036\u0030-\u0034\u0030-\u0039{3}|\u0036\u0035\u0030-\u0034\u0030-\u0039{2}|\u0036\u0035\u0035\u0030-\u0032\u0030-\u0039|\u0036\u0035\u0035\u0033\u0030-\u0035))?(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*|\u002F(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)|[\u0041-\u005A\u0061-\u007A][\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002B\u002D\u002E]*\u003A(?:\u002F{2}(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*\u0040)?(?:\u005B(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){6}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){5}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){4}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A)?[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){3}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,3}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,4}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,5}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,6}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2})\u005D|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?\u002E)+[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?))(?:\u003A(?:\u0030-\u0035\u0030-\u0039{0,4}|\u0036\u0030-\u0034\u0030-\u0039{3}|\u0036\u0035\u0030-\u0034\u0030-\u0039{2}|\u0036\u0035\u0035\u0030-\u0032\u0030-\u0039|\u0036\u0035\u0035\u0033\u0030-\u0035))?(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*|\u002F(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)(?:\u003F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040\u002F\u003F]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)?(?:\u0023(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040\u002F\u003F]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)?)$/
There are some caveats to its usage, namely it does not validate URIs which contain additional information after the user name (e.g. "username:password"). Also, only IPv6 addresses can be contained within the IP literal syntax and the "IPvFuture" syntax is currently ignored and will not validate against this regular expression. Port numbers are also constrained to be between 0 and 65,535. Also, only the file scheme can use triple slashes (e.g. "file:///etc/sysconfig") and can ignore both the query and fragment parts of a URI. Finally, it is geared towards regular URIs and not IRIs, hence the extensive focus on the ASCII character set.
This regular expression could be expanded upon, but it's already complex and long enough as it is. I also cannot guarantee it's going to be "100% accurate" or "bug free", but it should correctly validate URIs for all schemes.
You will need to do additional verification for any scheme-specific requirements or do URI normalization as this regular expression will validate a very broad range of URIs.
Try edit your isValidURL function as follows:
function isValidURL(url) {
var encodedURL = encodeURIComponent(url);
var isValid = false;
$.ajax({
url: "http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D%22" + encodedURL + "%22&format=json",
type: "get",
async: false,
dataType: "json",
success: function(data) {
isValid = data.query.results != null;
},
error: function(){
isValid = false;
}
});
return isValid;
}
This should do the trick.

Check if a string starts with http using Javascript

I've been searching all over for an answer to this and all of the answers I've found haven't been in JavaScript.
I need a way, in javascript, to check if a string starts with http, https, or ftp. If it doesn't start with one of those I need to prepend the string with http://. indexOf won't work for me I don't think as I need either http, https or ftp. Also I don't want something like google.com/?q=http://google.com to trigger that as being valid as it doesn't start with an http whereas indexOf would trigger that as being true (if I'm not entirely mistaken).
The closest PHP regex I've found is this:
function addhttp($url) {
if (!preg_match("~^(?:f|ht)tps?://~i", $url)) {
$url = "http://" . $url;
}
return $url;
}
Source: How to add http if its not exists in the url
I just don't know how to convert that to javascript. Any help would be greatly appreciated.
export const getValidUrl = (url = "") => {
let newUrl = window.decodeURIComponent(url);
newUrl = newUrl.trim().replace(/\s/g, "");
if(/^(:\/\/)/.test(newUrl)){
return `http${newUrl}`;
}
if(!/^(f|ht)tps?:\/\//i.test(newUrl)){
return `http://${newUrl}`;
}
return newUrl;
};
Tests:
expect(getValidUrl('https://www.test.com')).toBe('https://www.test.com');
expect(getValidUrl('http://www.test.com')).toBe('http://www.test.com');
expect(getValidUrl(' http : / / www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('ftp://www.test.com')).toBe('ftp://www.test.com');
expect(getValidUrl('www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('://www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('http%3A%2F%2Fwww.test.com')).toBe('http://www.test.com');
expect(getValidUrl('www . test.com')).toBe('http://www.test.com');
This should work:
var pattern = /^((http|https|ftp):\/\/)/;
if(!pattern.test(url)) {
url = "http://" + url;
}
jsFiddle
var url = "http://something.com"
if( url.indexOf("http") == 0 ) {
alert("yeah!");
} else {
alert("No no no!");
}
Non-Regex declarative way:
const hasValidUrlProtocol = (url = '') =>
['http://', 'https://', 'ftp://'].some(protocol => url.startsWith(protocol))
This should work:
var re = /^(http|https|ftp)/
Refining previous answers a bit more, I used new RegExp(...) to avoid messy escapes, and also added an optional s.
var pattern = new RegExp('^(https?|ftp)://');
if(!pattern.test(url)) {
url = "http://" + url;
}
var pattern = new RegExp('^(https?|ftp)://');
console.log(pattern.test('http://foo'));
console.log(pattern.test('https://foo'));
console.log(pattern.test('ftp://foo'));
console.log(pattern.test('bar'));
Best readability I'd say is to use .startsWith('theString').
The code below checks for both http://, https:// and ftp:// and sets okUrl to a boolean true if any of them comply, by using the || which means or.
When you know if the url contains none of those strings, then just just add http:// to the start of the string either with literals (${})
let url = 'google.com'
const urlOK = url.startsWith('http://') || url.startsWith('https://') || url.startsWith('ftp://')
if (!urlOk) url = `http://${url}`
// => 'http://google.com'
or with simple string concat, which can be done in various ways, for example:
url = 'http://' + url
Read more about it, and test it, here:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith

Extract hostname name from string

I would like to match just the root of a URL and not the whole URL from a text string. Given:
http://www.youtube.com/watch?v=ClkQA2Lb_iE
http://youtu.be/ClkQA2Lb_iE
http://www.example.com/12xy45
http://example.com/random
I want to get the 2 last instances resolving to the www.example.com or example.com domain.
I heard regex is slow and this would be my second regex expression on the page so If there is anyway to do it without regex let me know.
I'm seeking a JS/jQuery version of this solution.
A neat trick without using regular expressions:
var tmp = document.createElement ('a');
; tmp.href = "http://www.example.com/12xy45";
// tmp.hostname will now contain 'www.example.com'
// tmp.host will now contain hostname and port 'www.example.com:80'
Wrap the above in a function such as the below and you have yourself a superb way of snatching the domain part out of an URI.
function url_domain(data) {
var a = document.createElement('a');
a.href = data;
return a.hostname;
}
I give you 3 possible solutions:
Using an npm package psl that extract anything you throw at it.
Using my custom implementation extractRootDomain which works with most cases.
URL(url).hostname works, but not for every edge case. Click "Run Snippet" to see how it run against them.
1. Using npm package psl (Public Suffix List)
The "Public Suffix List" is a list of all valid domain suffixes and rules, not just Country Code Top-Level domains, but unicode characters as well that would be considered the root domain (i.e. www.食狮.公司.cn, b.c.kobe.jp, etc.). Read more about it here.
Try:
npm install --save psl
Then with my "extractHostname" implementation run:
let psl = require('psl');
let url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
psl.get(extractHostname(url)); // returns youtube.com
2. My Custom Implementation of extractRootDomain
Below is my implementation and it also runs against a variety of possible URL inputs.
function extractHostname(url) {
var hostname;
//find & remove protocol (http, ftp, etc.) and get hostname
if (url.indexOf("//") > -1) {
hostname = url.split('/')[2];
} else {
hostname = url.split('/')[0];
}
//find & remove port number
hostname = hostname.split(':')[0];
//find & remove "?"
hostname = hostname.split('?')[0];
return hostname;
}
// Warning: you can use this function to extract the "root" domain, but it will not be as accurate as using the psl package.
function extractRootDomain(url) {
var domain = extractHostname(url),
splitArr = domain.split('.'),
arrLen = splitArr.length;
//extracting the root domain here
//if there is a subdomain
if (arrLen > 2) {
domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
//check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) {
//this is using a ccTLD
domain = splitArr[arrLen - 3] + '.' + domain;
}
}
return domain;
}
const urlHostname = url => {
try {
return new URL(url).hostname;
}
catch(e) { return e; }
};
const urls = [
"http://www.blog.classroom.me.uk/index.php",
"http://www.youtube.com/watch?v=ClkQA2Lb_iE",
"https://www.youtube.com/watch?v=ClkQA2Lb_iE",
"www.youtube.com/watch?v=ClkQA2Lb_iE",
"ftps://ftp.websitename.com/dir/file.txt",
"websitename.com:1234/dir/file.txt",
"ftps://websitename.com:1234/dir/file.txt",
"example.com?param=value",
"https://facebook.github.io/jest/",
"//youtube.com/watch?v=ClkQA2Lb_iE",
"www.食狮.公司.cn",
"b.c.kobe.jp",
"a.d.kyoto.or.jp",
"http://localhost:4200/watch?v=ClkQA2Lb_iE"
];
const test = (method, arr) => console.log(
`=== Testing "${method.name}" ===\n${arr.map(url => method(url)).join("\n")}\n`);
test(extractHostname, urls);
test(extractRootDomain, urls);
test(urlHostname, urls);
Regardless having the protocol or even port number, you can extract the domain. This is a very simplified, non-regex solution, so I think this will do given the data set we were provided in the question.
3. URL(url).hostname
URL(url).hostname is a valid solution but it doesn't work well with some edge cases that I have addressed. As you can see in my last test, it doesn't like some of the URLs. You can definitely use a combination of my solutions to make it all work though.
*Thank you #Timmerz, #renoirb, #rineez, #BigDong, #ra00l, #ILikeBeansTacos, #CharlesRobertson for your suggestions! #ross-allen, thank you for reporting the bug!
There is no need to parse the string, just pass your URL as an argument to URL constructor:
const url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
const { hostname } = new URL(url);
console.assert(hostname === 'www.youtube.com');
Try this:
var matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
var domain = matches && matches[1]; // domain will be null if no match is found
If you want to exclude the port from your result, use this expression instead:
/^https?\:\/\/([^\/:?#]+)(?:[\/:?#]|$)/i
Edit: To prevent specific domains from matching, use a negative lookahead. (?!youtube.com)
/^https?\:\/\/(?!(?:www\.)?(?:youtube\.com|youtu\.be))([^\/:?#]+)(?:[\/:?#]|$)/i
There are two good solutions for this, depending on whether you need to optimize for performance or not (and without external dependencies!):
1. Use URL.hostname for readability
The cleanest and easiest solution is to use URL.hostname.
const getHostname = (url) => {
// use URL constructor and return hostname
return new URL(url).hostname;
}
// tests
console.log(getHostname("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
console.log(getHostname("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));
URL.hostname is part of the URL API, supported by all major browsers except IE (caniuse). Use a URL polyfill if you need to support legacy browsers.
Bonus: using the URL constructor will also give you access to other URL properties and methods!
2. Use RegEx for performance
URL.hostname should be your choice for most use cases. However, it's still much slower than this regex (test it yourself on jsPerf):
const getHostnameFromRegex = (url) => {
// run against regex
const matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
// extract hostname (will be null if no match is found)
return matches && matches[1];
}
// tests
console.log(getHostnameFromRegex("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
console.log(getHostnameFromRegex("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));
TL;DR
You should probably use URL.hostname. If you need to process an incredibly large number of URLs (where performance would be a factor), consider RegEx.
Parsing a URL can be tricky because you can have port numbers and special chars. As such, I recommend using something like parseUri to do this for you. I doubt performance is going to be a issue unless you are parsing hundreds of URLs.
I tried to use the Given solutions, the Chosen one was an overkill for my purpose and "Creating a element" one messes up for me.
It's not ready for Port in URL yet. I hope someone finds it useful
function parseURL(url){
parsed_url = {}
if ( url == null || url.length == 0 )
return parsed_url;
protocol_i = url.indexOf('://');
parsed_url.protocol = url.substr(0,protocol_i);
remaining_url = url.substr(protocol_i + 3, url.length);
domain_i = remaining_url.indexOf('/');
domain_i = domain_i == -1 ? remaining_url.length - 1 : domain_i;
parsed_url.domain = remaining_url.substr(0, domain_i);
parsed_url.path = domain_i == -1 || domain_i + 1 == remaining_url.length ? null : remaining_url.substr(domain_i + 1, remaining_url.length);
domain_parts = parsed_url.domain.split('.');
switch ( domain_parts.length ){
case 2:
parsed_url.subdomain = null;
parsed_url.host = domain_parts[0];
parsed_url.tld = domain_parts[1];
break;
case 3:
parsed_url.subdomain = domain_parts[0];
parsed_url.host = domain_parts[1];
parsed_url.tld = domain_parts[2];
break;
case 4:
parsed_url.subdomain = domain_parts[0];
parsed_url.host = domain_parts[1];
parsed_url.tld = domain_parts[2] + '.' + domain_parts[3];
break;
}
parsed_url.parent_domain = parsed_url.host + '.' + parsed_url.tld;
return parsed_url;
}
Running this:
parseURL('https://www.facebook.com/100003379429021_356001651189146');
Result:
Object {
domain : "www.facebook.com",
host : "facebook",
path : "100003379429021_356001651189146",
protocol : "https",
subdomain : "www",
tld : "com"
}
If you end up on this page and you are looking for the best REGEX of URLS try this one:
^(?:https?:)?(?:\/\/)?([^\/\?]+)
https://regex101.com/r/pX5dL9/1
You can use it like below and also with case insensitive manner to match with HTTPS and HTTP as well.:
const match = str.match(/^(?:https?:)?(?:\/\/)?([^\/\?]+)/i);
const hostname = match && match[1];
It works for urls without http:// , with http, with https, with just // and dont grab the path and query path as well.
Good Luck
All url properties, no dependencies, no JQuery, easy to understand
This solution gives your answer plus additional properties. No JQuery or other dependencies required, paste and go.
Usage
getUrlParts("https://news.google.com/news/headlines/technology.html?ned=us&hl=en")
Output
{
"origin": "https://news.google.com",
"domain": "news.google.com",
"subdomain": "news",
"domainroot": "google.com",
"domainpath": "news.google.com/news/headlines",
"tld": ".com",
"path": "news/headlines/technology.html",
"query": "ned=us&hl=en",
"protocol": "https",
"port": 443,
"parts": [
"news",
"google",
"com"
],
"segments": [
"news",
"headlines",
"technology.html"
],
"params": [
{
"key": "ned",
"val": "us"
},
{
"key": "hl",
"val": "en"
}
]
}
Code
The code is designed to be easy to understand rather than super fast. It can be called easily 100 times per second, so it's great for front end or a few server usages, but not for high volume throughput.
function getUrlParts(fullyQualifiedUrl) {
var url = {},
tempProtocol
var a = document.createElement('a')
// if doesn't start with something like https:// it's not a url, but try to work around that
if (fullyQualifiedUrl.indexOf('://') == -1) {
tempProtocol = 'https://'
a.href = tempProtocol + fullyQualifiedUrl
} else
a.href = fullyQualifiedUrl
var parts = a.hostname.split('.')
url.origin = tempProtocol ? "" : a.origin
url.domain = a.hostname
url.subdomain = parts[0]
url.domainroot = ''
url.domainpath = ''
url.tld = '.' + parts[parts.length - 1]
url.path = a.pathname.substring(1)
url.query = a.search.substr(1)
url.protocol = tempProtocol ? "" : a.protocol.substr(0, a.protocol.length - 1)
url.port = tempProtocol ? "" : a.port ? a.port : a.protocol === 'http:' ? 80 : a.protocol === 'https:' ? 443 : a.port
url.parts = parts
url.segments = a.pathname === '/' ? [] : a.pathname.split('/').slice(1)
url.params = url.query === '' ? [] : url.query.split('&')
for (var j = 0; j < url.params.length; j++) {
var param = url.params[j];
var keyval = param.split('=')
url.params[j] = {
'key': keyval[0],
'val': keyval[1]
}
}
// domainroot
if (parts.length > 2) {
url.domainroot = parts[parts.length - 2] + '.' + parts[parts.length - 1];
// check for country code top level domain
if (parts[parts.length - 1].length == 2 && parts[parts.length - 1].length == 2)
url.domainroot = parts[parts.length - 3] + '.' + url.domainroot;
}
// domainpath (domain+path without filenames)
if (url.segments.length > 0) {
var lastSegment = url.segments[url.segments.length - 1]
var endsWithFile = lastSegment.indexOf('.') != -1
if (endsWithFile) {
var fileSegment = url.path.indexOf(lastSegment)
var pathNoFile = url.path.substr(0, fileSegment - 1)
url.domainpath = url.domain
if (pathNoFile)
url.domainpath = url.domainpath + '/' + pathNoFile
} else
url.domainpath = url.domain + '/' + url.path
} else
url.domainpath = url.domain
return url
}
Just use the URL() constructor:
new URL(url).host
function hostname(url) {
var match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
if ( match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0 ) return match[2];
}
The above code will successfully parse the hostnames for the following example urls:
http://WWW.first.com/folder/page.html
first.com
http://mail.google.com/folder/page.html
mail.google.com
https://mail.google.com/folder/page.html
mail.google.com
http://www2.somewhere.com/folder/page.html?q=1
somewhere.com
https://www.another.eu/folder/page.html?q=1
another.eu
Original credit goes to: http://www.primaryobjects.com/CMS/Article145
Was looking for a solution to this problem today. None of the above answers seemed to satisfy. I wanted a solution that could be a one liner, no conditional logic and nothing that had to be wrapped in a function.
Here's what I came up with, seems to work really well:
hostname="http://www.example.com:1234"
hostname.split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.') // gives "example.com"
May look complicated at first glance, but it works pretty simply; the key is using 'slice(-n)' in a couple of places where the good part has to be pulled from the end of the split array (and [0] to get from the front of the split array).
Each of these tests return "example.com":
"http://example.com".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://www.example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://foo.www.example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
Here's the jQuery one-liner:
$('<a>').attr('href', url).prop('hostname');
This is not a full answer, but the below code should help you:
function myFunction() {
var str = "https://www.123rf.com/photo_10965738_lots-oop.html";
matches = str.split('/');
return matches[2];
}
I would like some one to create code faster than mine. It help to improve my-self also.
Okay, I know this is an old question, but I made a super-efficient url parser so I thought I'd share it.
As you can see, the structure of the function is very odd, but it's for efficiency. No prototype functions are used, the string doesn't get iterated more than once, and no character is processed more than necessary.
function getDomain(url) {
var dom = "", v, step = 0;
for(var i=0,l=url.length; i<l; i++) {
v = url[i]; if(step == 0) {
//First, skip 0 to 5 characters ending in ':' (ex: 'https://')
if(i > 5) { i=-1; step=1; } else if(v == ':') { i+=2; step=1; }
} else if(step == 1) {
//Skip 0 or 4 characters 'www.'
//(Note: Doesn't work with www.com, but that domain isn't claimed anyway.)
if(v == 'w' && url[i+1] == 'w' && url[i+2] == 'w' && url[i+3] == '.') i+=4;
dom+=url[i]; step=2;
} else if(step == 2) {
//Stop at subpages, queries, and hashes.
if(v == '/' || v == '?' || v == '#') break; dom += v;
}
}
return dom;
}
oneline with jquery
$('<a>').attr('href', document.location.href).prop('hostname');
// use this if you know you have a subdomain
// www.domain.com -> domain.com
function getDomain() {
return window.location.hostname.replace(/([a-zA-Z0-9]+.)/,"");
}
String.prototype.trim = function(){return his.replace(/^\s+|\s+$/g,"");}
function getHost(url){
if("undefined"==typeof(url)||null==url) return "";
url = url.trim(); if(""==url) return "";
var _host,_arr;
if(-1<url.indexOf("://")){
_arr = url.split('://');
if(-1<_arr[0].indexOf("/")||-1<_arr[0].indexOf(".")||-1<_arr[0].indexOf("\?")||-1<_arr[0].indexOf("\&")){
_arr[0] = _arr[0].trim();
if(0==_arr[0].indexOf("//")) _host = _arr[0].split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
else return "";
}
else{
_arr[1] = _arr[1].trim();
_host = _arr[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
}
}
else{
if(0==url.indexOf("//")) _host = url.split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
else return "";
}
return _host;
}
function getHostname(url){
if("undefined"==typeof(url)||null==url) return "";
url = url.trim(); if(""==url) return "";
return getHost(url).split(':')[0];
}
function getDomain(url){
if("undefined"==typeof(url)||null==url) return "";
url = url.trim(); if(""==url) return "";
return getHostname(url).replace(/([a-zA-Z0-9]+.)/,"");
}
I personally researched a lot for this solution, and the best one I could find is actually from CloudFlare's "browser check":
function getHostname(){
secretDiv = document.createElement('div');
secretDiv.innerHTML = "<a href='/'>x</a>";
secretDiv = secretDiv.firstChild.href;
var HasHTTPS = secretDiv.match(/https?:\/\//)[0];
secretDiv = secretDiv.substr(HasHTTPS.length);
secretDiv = secretDiv.substr(0, secretDiv.length - 1);
return(secretDiv);
}
getHostname();
I rewritten variables so it is more "human" readable, but it does the job better than expected.
Well, doing using an regular expression will be a lot easier:
mainUrl = "http://www.mywebsite.com/mypath/to/folder";
urlParts = /^(?:\w+\:\/\/)?([^\/]+)(.*)$/.exec(mainUrl);
host = Fragment[1]; // www.mywebsite.com
in short way you can do like this
var url = "http://www.someurl.com/support/feature"
function getDomain(url){
domain=url.split("//")[1];
return domain.split("/")[0];
}
eg:
getDomain("http://www.example.com/page/1")
output:
"www.example.com"
Use above function to get domain name
This solution works well and you can also use if URL contains a lot of invalid characters.
install psl package
npm install --save psl
implementation
const psl = require('psl');
const url= new URL('http://www.youtube.com/watch?v=ClkQA2Lb_iE').hostname;
const parsed = psl.parse(url);
console.log(parsed)
output:
{
input: 'www.youtube.com',
tld: 'com',
sld: 'youtube',
domain: 'youtube.com',
subdomain: 'www',
listed: true
}
import URL from 'url';
const pathname = URL.parse(url).path;
console.log(url.replace(pathname, ''));
this takes care of both the protocol.
Code:
var regex = /\w+.(com|co\.kr|be)/ig;
var urls = ['http://www.youtube.com/watch?v=ClkQA2Lb_iE',
'http://youtu.be/ClkQA2Lb_iE',
'http://www.example.com/12xy45',
'http://example.com/random'];
$.each(urls, function(index, url) {
var convertedUrl = url.match(regex);
console.log(convertedUrl);
});
Result:
youtube.com
youtu.be
example.com
example.com
Parse-Urls appears to be the JavaScript library with the most robust patterns
Here is a rundown of the features:
Chapter 1. Normalize or parse one URL
Chapter 2. Extract all URLs
Chapter 3. Extract URIs with certain names
Chapter 4. Extract all fuzzy URLs
Chapter 5. Highlight all URLs in texts
Chapter 6. Extract all URLs in raw HTML or XML
parse-domain - a very solid lightweight library
npm install parse-domain
const { fromUrl, parseDomain } = require("parse-domain");
Example 1
parseDomain(fromUrl("http://www.example.com/12xy45"))
{ type: 'LISTED',
hostname: 'www.example.com',
labels: [ 'www', 'example', 'com' ],
icann:
{ subDomains: [ 'www' ],
domain: 'example',
topLevelDomains: [ 'com' ] },
subDomains: [ 'www' ],
domain: 'example',
topLevelDomains: [ 'com' ] }
Example 2
parseDomain(fromUrl("http://subsub.sub.test.ExAmPlE.coM/12xy45"))
{ type: 'LISTED',
hostname: 'subsub.sub.test.example.com',
labels: [ 'subsub', 'sub', 'test', 'example', 'com' ],
icann:
{ subDomains: [ 'subsub', 'sub', 'test' ],
domain: 'example',
topLevelDomains: [ 'com' ] },
subDomains: [ 'subsub', 'sub', 'test' ],
domain: 'example',
topLevelDomains: [ 'com' ] }
Why?
Depending on the use case and volume I strongly recommend against solving this problem yourself using regex or other string manipulation means. The core of this problem is that you need to know all the gtld and cctld suffixes to properly parse url strings into domain and subdomains, these suffixes are regularly updated. This is a solved problem and not one you want to solve yourself (unless you are google or something). Unless you need the hostname or domain name in a pinch don't try and parse your way out of this one.
A URL is schema://domain/path/to/resource?key=value#fragment so you could split on /:
/**
* Get root of URL
* #param {string} url - string to parse
* #returns {string} url root or empty string
*/
function getUrlRoot(url) {
return String(url || '').split('/').slice(0, 3).join('/');
}
Example:
getUrlRoot('http://www.youtube.com/watch?v=ClkQA2Lb_iE');
// returns http://www.youtube.com
getUrlRoot('http://youtu.be/ClkQA2Lb_iE');
// returns http://youtu.be
getUrlRoot('http://www.example.com/12xy45');
// returns http://www.example.com
getUrlRoot('http://example.com/random');
// returns http://example.com
My code looks like this.
Regular expressions can come in many forms, and here are my test cases
I think it's more scalable.
function extractUrlInfo(url){
let reg = /^((?<protocol>http[s]?):\/\/)?(?<host>((\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])|[-a-zA-Z0-9#:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()#:%_\+.~#?&//=]*)))(\:(?<port>[0-9]|[1-9]\d|[1-9]\d{2}|[1-9]\d{3}|[1-5]\d{4}|6[0-4]\d{3}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$/
return reg.exec(url).groups
}
var url = "https://192.168.1.1:1234"
console.log(extractUrlInfo(url))
var url = "https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string"
console.log(extractUrlInfo(url))
Try below code for exact domain name using regex,
String line = "http://www.youtube.com/watch?v=ClkQA2Lb_iE";
String pattern3="([\\w\\W]\\.)+(.*)?(\\.[\\w]+)";
Pattern r = Pattern.compile(pattern3);
Matcher m = r.matcher(line);
if (m.find( )) {
System.out.println("Found value: " + m.group(2) );
} else {
System.out.println("NO MATCH");
}

How to get search query from the hash in the URL

I am adding a Search feature, and if the user is on the search results and they refresh the page I want it to stay at that search. I already put the search query into the URL, but I don't know how to retrieve it. It will most likely require a regexp so anyone that is experienced with regexp, please help.
Here is how I put it into the URL:
function trimspace(str) {
str = str.replace(/ +(?= )/g, '');
return str;
}
function searchMail() {
var query = trimspace($('#search_input').val());
if (query == '' || !query || query == null) {
$('#search_input').focus();
}
else {
window.location.hash = '#!/search/' + query;
$('#loader').show();
$.get('tools.php?type=search', { q: query }, function(data, textStatus) {
$('#loader').hide();
if (textStatus == 'success') {
hideAllTabs();
$('#search_results').html(data).show();
document.title = "Search results - WeeBuild Mail";
}
else {
alertBox('Search failed. Please try again in a few minutes.', 2500);
}
});
}
}
And besides just retreiving the query I need to be able to detect if the hash is #!/search/query.
Thanks in advance!
Detect if query is #!/search/query:
location.hash==='#!/search/query'
Finding out about the query parts assuming the query is #!/search/query:
var parts = location.hash.split('/');
parts[0] is #!. parts[1] is search and parts[2] is test. Doing with the parts want you want should now be trivial enough.
In response to the comments:
function getPartAndRemainder(){
var parts = location.hash.split('/');
if(parts[0]!=='#!' || parts.length<2){
// Value after # does not follow the expected pattern #!/part/any_parameters
throw new Error('Cannot parse application part.');
}
return {
// Contains the part of your application (word after first slash after #)
part: parts[1],
// Contains everything after second slash after # or the empty string
remainder: location.hash.substring(
// Length of #! (or whatever somebody might use instead)
parts[0].length
// Length of first slash
+1
// Length of your application part's name
+parts[1].length
// Length of the second slash
+1)
};
}
The function gives back an object that contains the part of your application at the key part and the remainder key will contain the rest. So, if your URI would be something#!/search/test the function would return {part:'search', remainder:'test'}.
In case the URI can't be parsed, you'll get an error and you should then use a sensible default instead.
You would use the method as follows whenever the hash changes or at some other event (point in time) where you are interested in the hash value:
try {
var hashValue = getPartAndRemainder();
if(hashValue.part==='search'){
var query = hashValue.remainder;
alert('You searched: '+query)
}
if(hashValue.part==='inbox'){
alert('You are in inbox, now');
}
} catch(e){
// It was not possible to parse the value after the # from URI according to our expected pattern.
alert('No idea what to do with: '+location.hash);
}
This regex will match anything after the search param, is this what you are looking for?
/#1/search/(.*)/
var match = /\/.*\/.*\/(.*)\//i.exec(location.hash)
//match contain the query

Trying to Validate URL Using JavaScript

I want to validate a URL and display message. Below is my code:
$("#pageUrl").keydown(function(){
$(".status").show();
var url = $("#pageUrl").val();
if(isValidURL(url)){
$.ajax({
type: "POST",
url: "demo.php",
data: "pageUrl="+ url,
success: function(msg){
if(msg == 1 ){
$(".status").html('<img src="images/success.gif"/><span><strong>SiteID:</strong>12345678901234456</span>');
}else{
$(".status").html('<img src="images/failure.gif"/>');
}
}
});
}else{
$(".status").html('<img src="images/failure.gif"/>');
}
});
function isValidURL(url){
var RegExp = /(ftp|http|https):\/\/(\w+:{0,1}\w*#)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%#!\-\/]))?/;
if(RegExp.test(url)){
return true;
}else{
return false;
}
}
My problem is now it will show an error message even when entering a proper URL until it matches regular expression, and it return true even if the URL is something like "http://wwww".
I appreciate your suggestions.
Someone mentioned the Jquery Validation plugin, seems overkill if you just want to validate the url, here is the line of regex from the plugin:
return this.optional(element) || /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(value);
Here is where they got it from: http://projects.scottsplayground.com/iri/
Pointed out by #nhahtdh This has been updated to:
// Copyright (c) 2010-2013 Diego Perini, MIT licensed
// https://gist.github.com/dperini/729294
// see also https://mathiasbynens.be/demo/url-regex
// modified to allow protocol-relative URLs
return this.optional( element ) || /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})).?)(?::\d{2,5})?(?:[/?#]\S*)?$/i.test( value );
source: https://github.com/jzaefferer/jquery-validation/blob/c1db10a34c0847c28a5bd30e3ee1117e137ca834/src/core.js#L1349
It's not practical to parse URLs using regex. A full implementation of the RFC1738 rules would result in an enormously long regex (assuming it's even possible). Certainly your current expression fails many valid URLs, and passes invalid ones.
Instead:
a. use a proper URL parser that actually follows the real rules. (I don't know of one for JavaScript; it would probably be overkill. You could do it on the server side though). Or,
b. just trim away any leading or trailing spaces, then check it has one of your preferred schemes on the front (typically ‘http://’ or ‘https://’), and leave it at that. Or,
c. attempt to use the URL and see what lies at the end, for example by sending it am HTTP HEAD request from the server-side. If you get a 404 or connection error, it's probably wrong.
it return true even if url is something like "http://wwww".
Well, that is indeed a perfectly valid URL.
If you want to check whether a hostname such as ‘wwww’ actually exists, you have no choice but to look it up in the DNS. Again, this would be server-side code.
function validateURL(textval) {
var urlregex = /^(https?|ftp):\/\/([a-zA-Z0-9.-]+(:[a-zA-Z0-9.&%$-]+)*#)*((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}|([a-zA-Z0-9-]+\.)*[a-zA-Z0-9-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(:[0-9]+)*(\/($|[a-zA-Z0-9.,?'\\+&%$#=~_-]+))*$/;
return urlregex.test(textval);
}
This can return true for URLs like:
http://stackoverflow.com/questions/1303872/url-validation-using-javascript
or:
http://regexlib.com/DisplayPatterns.aspx?cattabindex=1&categoryId=2
I written also a URL validation function base on rfc1738 and rfc3986 to check http and https urls. I try to hold this modular, so it can be better maintained and adapted to own requirements.
The RegExp in one line is show at end of this post.
The RegExp accept HTTP and HTTPS URLs with some international domain or IPv4 number. IPv6 is not supported yet.
window.isValidURL = (function() {// wrapped in self calling function to prevent global pollution
//URL pattern based on rfc1738 and rfc3986
var rg_pctEncoded = "%[0-9a-fA-F]{2}";
var rg_protocol = "(http|https):\\/\\/";
var rg_userinfo = "([a-zA-Z0-9$\\-_.+!*'(),;:&=]|" + rg_pctEncoded + ")+" + "#";
var rg_decOctet = "(25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])"; // 0-255
var rg_ipv4address = "(" + rg_decOctet + "(\\." + rg_decOctet + "){3}" + ")";
var rg_hostname = "([a-zA-Z0-9\\-\\u00C0-\\u017F]+\\.)+([a-zA-Z]{2,})";
var rg_port = "[0-9]+";
var rg_hostport = "(" + rg_ipv4address + "|localhost|" + rg_hostname + ")(:" + rg_port + ")?";
// chars sets
// safe = "$" | "-" | "_" | "." | "+"
// extra = "!" | "*" | "'" | "(" | ")" | ","
// hsegment = *[ alpha | digit | safe | extra | ";" | ":" | "#" | "&" | "=" | escape ]
var rg_pchar = "a-zA-Z0-9$\\-_.+!*'(),;:#&=";
var rg_segment = "([" + rg_pchar + "]|" + rg_pctEncoded + ")*";
var rg_path = rg_segment + "(\\/" + rg_segment + ")*";
var rg_query = "\\?" + "([" + rg_pchar + "/?]|" + rg_pctEncoded + ")*";
var rg_fragment = "\\#" + "([" + rg_pchar + "/?]|" + rg_pctEncoded + ")*";
var rgHttpUrl = new RegExp(
"^"
+ rg_protocol
+ "(" + rg_userinfo + ")?"
+ rg_hostport
+ "(\\/"
+ "(" + rg_path + ")?"
+ "(" + rg_query + ")?"
+ "(" + rg_fragment + ")?"
+ ")?"
+ "$"
);
// export public function
return function (url) {
if (rgHttpUrl.test(url)) {
return true;
} else {
return false;
}
};
})();
RegExp in one line:
var rg = /^(http|https):\/\/(([a-zA-Z0-9$\-_.+!*'(),;:&=]|%[0-9a-fA-F]{2})+#)?(((25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])(\.(25[0-5]|2[0-4][0-9]|[0-1][0-9][0-9]|[1-9][0-9]|[0-9])){3})|localhost|([a-zA-Z0-9\-\u00C0-\u017F]+\.)+([a-zA-Z]{2,}))(:[0-9]+)?(\/(([a-zA-Z0-9$\-_.+!*'(),;:#&=]|%[0-9a-fA-F]{2})*(\/([a-zA-Z0-9$\-_.+!*'(),;:#&=]|%[0-9a-fA-F]{2})*)*)?(\?([a-zA-Z0-9$\-_.+!*'(),;:#&=\/?]|%[0-9a-fA-F]{2})*)?(\#([a-zA-Z0-9$\-_.+!*'(),;:#&=\/?]|%[0-9a-fA-F]{2})*)?)?$/;
In a similar situation I got away with this:
someUtils.validateURL = function(url) {
var parser = document.createElement('a');
try {
parser.href = url;
return !!parser.hostname;
} catch (e) {
return false;
}
};
i.e. why invent the wheel if browsers can do it for you? But, of course, this will only work in the browser.
there are various parts of parsed URL exactly how browser would interpret it:
parser.protocol; // => "http:"
parser.hostname; // => "example.com"
parser.port; // => "8080"
parser.pathname; // => "/path/"
parser.search; // => "?search=test"
parser.hash; // => "#hash"
parser.host; // => "example.com:3000"
Using these you can improve your validating function depending on the requirements. The only drawback is that it will accept relative URLs and use current page server's host and port. But you can use it for your advantage, by re-assembling the URL from parts and always passing it in full to your AJAX service.
What validateURL won't accept is invalid URL, e.g. http:\:8883 will return false, but :1234 is valid and is interpreted as http://pagehost.example.com/:1234 i.e. as a relative path.
UPDATE
This approach is no longer working with Chrome and other WebKit browsers. Even when URL is invalid, hostname is filled with some value, e.g. taken from base. It still helps to parse parts of URL, but will not allow to validate one.
Possible better no-own-parser approach is to use var parsedURL = new URL(url) and catch exceptions. See e.g. URL API. Supported by all major browsers and NodeJS, although still marked experimental.
best regex I found from http://angularjs.org/
var urlregex = /^(ftp|http|https):\/\/(\w+:{0,1}\w*#)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%#!\-\/]))?$/;
This is what worked for me:
function validateURL(value) {
return /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(value);
}
from there is is just a matter of calling the function to get a true or false back:
validateURL(urltovalidate);
I know it's quite an old question but since it does not have any accepted answer, I suggest you to use the URI.js framework: https://github.com/medialize/URI.js
You can use it to check for malformed URI using a try/catch block:
function isValidURL(url)
{
try {
(new URI(url));
return true;
}
catch (e) {
// Malformed URI
return false;
}
}
Of course it will consider something like "%#" as a well formed relative URI... So I suggest you read the URI.js API to perform more checks, for example if you want to make sure that the user entered a well formed absolute URL you may do like this:
function isValidURL(url)
{
try {
var uri = new URI(url);
// URI has a scheme and a host
return (!!uri.scheme() && !!uri.host());
}
catch (e) {
// Malformed URI
return false;
}
}
Import in an npm package like
https://www.npmjs.com/package/valid-url
and use it to validate your url.
You can use the URL API that is recently standard. Browser support is sketchy at best, see the link. new URL(str) is guaranteed to throw TypeError for invalid URLs.
As stated above, http://wwww is a valid URL.
The URL API can be used to validate the structure of a URL string.
An error is thrown when trying to serialise an invalid URL string into a URL object. This could be abstracted into a helper function (Typescript snippet below):
function isValidURL(URL: string) : boolean {
try {
new URL(string);
return true;
} catch (err) { return false; }
}
isValidURL('https://www.google.com'); // returns true
isValidURL('localhost:3000'); // returns true
isValidURL('not-a-valid-url'); // returns false
isValidURL('google.com'); // returns false (see footnote)
If you strictly want HTTP / web links to be valid, we can simply add a condition to the return statement:
...
const url = new URL(string);
return url.protocol === 'https:' || url.protocol === 'http:';
...
Granted, this approach comes with a few caveats:
No support for the URL API in Internet Explorer (could be fixed with a polyfill)
Without additional checks, URLs without either a protocol or port are seen as invalid (e.g. google.com is invalid but google.com:3000 is OK). This may be an unintended behaviour for some usecases.
If you're looking for a more reliable regex, check out RegexLib. Here's the page you'd probably be interested in:
http://regexlib.com/Search.aspx?k=url
As for the error messages showing while the person is still typing, change the event from keydown to blur and then it will only check once the person moves to the next element.
var RegExp = (/^HTTP|HTTP|http(s)?:\/\/(www\.)?[A-Za-z0-9]+([\-\.]{1}[A-Za-z0-9]+)*\.[A-Za-z]{2,40}(:[0-9]{1,40})?(\/.*)?$/);
My solution:
function isValidUrl(t)
{
return t.match(/^(http|https|ftp):\/\/(([A-Z0-9][A-Z0-9_-]*)(\.[A-Z0-9][A-Z0-9_-]*)+)(:(\d+))?\/?/i)
}
Demo : http://jsbin.com/uzimeb/1/edit
function checkURL(value) {
var urlregex = new RegExp("^(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*#)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*$");
if (urlregex.test(value)) {
return (true);
}
return (false);
}
I have found a great resource for comparing different solutions:
https://mathiasbynens.be/demo/url-regex
According to that page, only solution from diegoperini passes all tests. Here is that regex:
_^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?#)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS
I checked a lot of url validators in google and no one works for me. For example I'd like to see valid on links like 'aa.com'. I like silly check for dot sign in string.
function isValidUri(str) {
var dotIndex = str.indexOf('.');
return (dotIndex > 0 && dotIndex < str.length - 2);
}
It should not stay on beginning and end of string (for now we don't have top level domain names with one character).
Here's a regular expression which might fit the bill (it's very long):
/^(?:\u0066\u0069\u006C\u0065\u003A\u002F{2}(?:\u002F{2}(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*\u0040)?(?:\u005B(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){6}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){5}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){4}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A)?[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){3}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,3}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,4}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,5}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,6}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2})\u005D|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?\u002E)+[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?))(?:\u003A(?:\u0030-\u0035\u0030-\u0039{0,4}|\u0036\u0030-\u0034\u0030-\u0039{3}|\u0036\u0035\u0030-\u0034\u0030-\u0039{2}|\u0036\u0035\u0035\u0030-\u0032\u0030-\u0039|\u0036\u0035\u0035\u0033\u0030-\u0035))?(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*|\u002F(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)|[\u0041-\u005A\u0061-\u007A][\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002B\u002D\u002E]*\u003A(?:\u002F{2}(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*\u0040)?(?:\u005B(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){6}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){5}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){4}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A)?[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){3}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,3}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,4}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035]))|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,5}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}|(?:(?:[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4}\u003A){0,6}[\u0030-\u0039\u0041-\u0046\u0061-\u0066]{1,4})?\u003A{2})\u005D|(?:(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])\u002E){3}(?:[\u0030-\u0039]|[\u0031-\u0039][\u0030-\u0039]|\u0031[\u0030-\u0039]{2}|\u0032[\u0030-\u0034][\u0030-\u0039]|\u0032\u0035[\u0030-\u0035])|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?\u002E)+[\u0041-\u005A\u0061-\u007A\u0030-\u0039](?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D]+)?[\u0041-\u005A\u0061-\u007A\u0030-\u0039])?))(?:\u003A(?:\u0030-\u0035\u0030-\u0039{0,4}|\u0036\u0030-\u0034\u0030-\u0039{3}|\u0036\u0035\u0030-\u0034\u0030-\u0039{2}|\u0036\u0035\u0035\u0030-\u0032\u0030-\u0039|\u0036\u0035\u0035\u0033\u0030-\u0035))?(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*|\u002F(?:(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)?|(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])+(?:\u002F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)*)(?:\u003F(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040\u002F\u003F]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)?(?:\u0023(?:[\u0041-\u005A\u0061-\u007A\u0030-\u0039\u002D\u002E\u005F\u007E\u0021\u0024\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u003B\u003D\u003A\u0040\u002F\u003F]|\u0025[\u0030-\u0039\u0041-\u0046\u0061-\u0066][\u0030-\u0039\u0041-\u0046\u0061-\u0066])*)?)$/
There are some caveats to its usage, namely it does not validate URIs which contain additional information after the user name (e.g. "username:password"). Also, only IPv6 addresses can be contained within the IP literal syntax and the "IPvFuture" syntax is currently ignored and will not validate against this regular expression. Port numbers are also constrained to be between 0 and 65,535. Also, only the file scheme can use triple slashes (e.g. "file:///etc/sysconfig") and can ignore both the query and fragment parts of a URI. Finally, it is geared towards regular URIs and not IRIs, hence the extensive focus on the ASCII character set.
This regular expression could be expanded upon, but it's already complex and long enough as it is. I also cannot guarantee it's going to be "100% accurate" or "bug free", but it should correctly validate URIs for all schemes.
You will need to do additional verification for any scheme-specific requirements or do URI normalization as this regular expression will validate a very broad range of URIs.
Try edit your isValidURL function as follows:
function isValidURL(url) {
var encodedURL = encodeURIComponent(url);
var isValid = false;
$.ajax({
url: "http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D%22" + encodedURL + "%22&format=json",
type: "get",
async: false,
dataType: "json",
success: function(data) {
isValid = data.query.results != null;
},
error: function(){
isValid = false;
}
});
return isValid;
}
This should do the trick.

Categories

Resources