Javascript regular expression to add protocol to url string

Javascript regular expression to add protocol to url string - javascript

I have an application to list some website details using JavaScript. There will be a link to website which is generated using JavaScript itself. Sometimes I will get my link as,
Website
But sometimes it will be,
Website
In the second time the link is not working, there is no protocol.
So I am looking for a JavaScript regular expression function to add http:// if there in no protocol.
My code looks like,
var website_link = document.createElement("a");
website_link.innerHTML = "Website";
website_link.href = my_JSON_object.website;
website_link.target = "_blank";
profile.appendChild(website_link);
And no local links will come.

See this link.
function setHttp(link) {
if (link.search(/^http[s]?\:\/\//) == -1) {
link = 'http://' + link;
}
return link;
}
alert(setHttp("www.google.com"));
alert(setHttp("http://www.google.com/"));
In your code it will be like:
var website_link = document.createElement("a");
website_link.innerHTML = "Website";
if (my_JSON_object.website.search(/^http[s]?\:\/\//) == -1) {
my_JSON_object.website = 'http://' + my_JSON_object.website;
}
website_link.href = my_JSON_object.website;
website_link.target = "_blank";
profile.appendChild(website_link);

For example, using negative lookahead:
your_string.replace(/href="(?!http)/, 'href="http://');
Example:
> 'Website'.replace(/href="(?!http)/, 'href="http://');
"Website"
> 'Website'.replace(/href="(?!http)/, 'href="http://');
"Website"

I've wrapped this functionality into the NPM module url-schemify:
var schemify = require('url-schemify');
var assert = require('assert');
// url-schemify adds default scheme (http) to the URLs that miss it
assert.equal(schemify('google.com'), 'http://google.com');
assert.equal(schemify('www.example.com'), 'http://www.example.com');
// default scheme could be configured through the options parameter
assert.equal(schemify('google.com', { scheme: 'https' }), 'https://google.com');
// { scheme: '' } will produce protocol-related URL
assert.equal(schemify('www.example.com', { scheme: '' }), '//www.example.com');
// url-schemify doesn't modify URLs that already have scheme or protocol-related ones:
assert.equal(schemify('http://google.com'), 'http://google.com');
assert.equal(schemify('https://www.example.com'), 'https://www.example.com');
assert.equal(schemify('ftp://example.com'), 'ftp://example.com');
assert.equal(schemify('//example.com'), '//example.com');

Related

How to get avatar file type [duplicate]

How do I find the file extension of a URL using javascript?
example URL:
http://www.adobe.com/products/flashplayer/include/marquee/design.swf?width=792&height=294
I just want the 'swf' of the entire URL.
I need it to find the extension if the url was also in the following format
http://www.adobe.com/products/flashplayer/include/marquee/design.swf
Obviously this URL does not have the parameters behind it.
Anybody know?
Thanks in advance

function get_url_extension( url ) {
return url.split(/[#?]/)[0].split('.').pop().trim();
}
example:
get_url_extension('https://example.com/folder/file.jpg');
get_url_extension('https://example.com/fold.er/fil.e.jpg?param.eter#hash=12.345');
outputs ------> jpg

Something like this maybe?
var fileName = 'http://localhost/assets/images/main.jpg';
var extension = fileName.split('.').pop();
console.log(extension, extension === 'jpg');
The result you see in the console is.
jpg true
if for some reason you have a url like this something.jpg?name=blah or something.jpg#blah then you could do
extension = extension.split(/\#|\?/g)[0];
drop in
var fileExtension = function( url ) {
return url.split('.').pop().split(/\#|\?/)[0];
}

For the extension you could use this function:
function ext(url) {
// Remove everything to the last slash in URL
url = url.substr(1 + url.lastIndexOf("/"));
// Break URL at ? and take first part (file name, extension)
url = url.split('?')[0];
// Sometimes URL doesn't have ? but #, so we should aslo do the same for #
url = url.split('#')[0];
// Now we have only extension
return url;
}
Or shorter:
function ext(url) {
return (url = url.substr(1 + url.lastIndexOf("/")).split('?')[0]).split('#')[0].substr(url.lastIndexOf("."))
}
Examples:
ext("design.swf")
ext("/design.swf")
ext("http://www.adobe.com/products/flashplayer/include/marquee/design.swf")
ext("/marquee/design.swf?width=792&height=294")
ext("design.swf?f=aa.bb")
ext("../?design.swf?width=792&height=294&.XXX")
ext("http://www.example.com/some/page.html#fragment1")
ext("http://www.example.com/some/dynamic.php?foo=bar#fragment1")
Note:
File extension is provided with dot (.) at the beginning. So if result.charat(0) != "." there is no extension.

This is the answer:
var extension = path.match(/\.([^\./\?]+)($|\?)/)[1];

Take a look at regular expressions. Specifically, something like /([^.]+.[^?])\?/.

// Gets file extension from URL, or return false if there's no extension
function getExtension(url) {
// Extension starts after the first dot after the last slash
var extStart = url.indexOf('.',url.lastIndexOf('/')+1);
if (extStart==-1) return false;
var ext = url.substr(extStart+1),
// end of extension must be one of: end-of-string or question-mark or hash-mark
extEnd = ext.search(/$|[?#]/);
return ext.substring (0,extEnd);
}

url.split('?')[0].split('.').pop()
usually #hash is not part of the url but treated separately

This method works fine :
function getUrlExtension(url) {
try {
return url.match(/^https?:\/\/.*[\\\/][^\?#]*\.([a-zA-Z0-9]+)\??#?/)[1]
} catch (ignored) {
return false;
}
}

You can use the (relatively) new URL object to help you parse your url. The property pathname is especially useful because it returns the url path without the hostname and parameters.
let url = new URL('http://www.adobe.com/products/flashplayer/include/marquee/design.swf?width=792&height=294');
// the .pathname method returns the path
url.pathname; // returns "/products/flashplayer/include/marquee/design.swf"
// now get the file name
let filename = url.pathname.split('/').reverse()[0]
// returns "design.swf"
let ext = filename.split('.')[1];
// returns 'swf'

var doc = document.location.toString().substring(document.location.toString().lastIndexOf("/"))
alert(doc.substring(doc.lastIndexOf(".")))

const getUrlFileType = (url: string) => {
const u = new URL(url)
const ext = u.pathname.split(".").pop()
return ext === "/"
? undefined
: ext.toLowerCase()
}

function ext(url){
var ext = url.substr(url.lastIndexOf('/') + 1),
ext = ext.split('?')[0],
ext = ext.split('#')[0],
dot = ext.lastIndexOf('.');
return dot > -1 ? ext.substring(dot + 1) : '';
}

If you can use npm packages, File-type is another option.
They have browser support, so you can do this (taken from their docs):
const FileType = require('file-type/browser');
const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
(async () => {
const response = await fetch(url);
const fileType = await FileType.fromStream(response.body);
console.log(fileType);
//=> {ext: 'jpg', mime: 'image/jpeg'}
})();
It works for gifs too!

Actually, I like to imporve this answer, it means my answer will support # too:
const extExtractor = (url: string): string =>
url.split('?')[0].split('#')[0].split('.').pop() || '';
This function returns the file extension in any case.

If you wanna use this solution. these packages are using latest import/export method.
in case you wanna use const/require bcz your project is using commonJS you should downgrade to older version.
i used
"got": "11.8.5","file-type": "16.5.4",
const FileType = require('file-type');
const got = require('got');
const url ='https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
(async () => {
const stream = got.stream(url);
console.log(await FileType.fromStream(stream));
})();

var fileExtension = function( url ) {
var length=url.split(?,1);
return length
}
document.write("the url is :"+length);

URL transformation in Chrome javascript bookmarklet showing "undefined"

I'm trying to create a Chrome bookmarklet that will take a part of the pathname from one URL and navigate to a new URL using that variable as a parameter (the variable is 1234567 in the example below).
From: 'https://example.com/reporting-dashboard/#/dashboard/1234567?pageId=Page_3a7c73c6-34c9-4ab3-8d1f-5bd437c07115'
To: 'https://example.com/tool/permissions/resources?namespace=1234567'
The hostname differs depending on the environment I'm working in but will always stay the same when I transform it with the bookmarklet so I'm trying to pull that info when I compose the new URL. This is what I've got so far, but I keep getting "undefined" in the transformed URL (below) when I run the code. Any ideas on what I've got wrong here?
'https://example.com/tool/permissions/resources?namespace=undefined'
My code:
//Sample URL: https://example.com/reporting-dashboard/#/dashboard/1234567?pageId=Page_3a7c73c6-34c9-4ab3-8d1f-5bd437c07115
var pathArray = location.pathname.split('/');
let secondLevelLocation = pathArray[3];
var newUrl = location.protocol + '//' + location.hostname + '/tool/permissions/resources?namespace=' + secondLevelLocation;
var w=window.open();w.location=newUrl;w.document.close();

In the code you've shown, there is an assumption that the URL hash (fragment identifier) will be included when accessing the pathname:
//Sample URL: https://example.com/reporting-dashboard/#/dashboard/1234567?pageId=Page_3a7c73c6-34c9-4ab3-8d1f-5bd437c07115
var pathArray = location.pathname.split('/');
let secondLevelLocation = pathArray[3];
This is where the problem occurs. In a URL, the pathname ends when one of the following characters are first encountered:
? (which begins the query string), or
# (which begins the fragment identifier)
The format of the hash / fragment identifier portion of the URL in your example is that of a fully-resolved URL without the origin (starting at the pathname).
Using this knowledge, you can use the native URL class to help you select the desired part of the input URL, then use it again to construct the target URL, as shown in the code below. Once you have the target URL, you can use it to navigate, etc.
function parseNamespace (url) {
const fragment = url.hash.slice(1);
if (!fragment.startsWith('/')) throw new Error('Path fragment not found');
url = new URL(fragment, url);
const namespace = url.pathname.split('/').at(-1);
return namespace;
}
function createUrl (address = window.location.href) {
let url = new URL(address);
const namespace = parseNamespace(url);
const pathname = '/tool/permissions/resources';
url = new URL(pathname, url.origin);
url.searchParams.set('namespace', namespace);
return url;
}
const url = createUrl('https://example.com/reporting-dashboard/#/dashboard/1234567?pageId=Page_3a7c73c6-34c9-4ab3-8d1f-5bd437c07115');
// You can omit the argument when you want to get the address from the current document:
// const url = createUrl();
console.log(url.href); // "https://example.com/tool/permissions/resources?namespace=1234567"

In vanilla JavaScript, turn relative path + base URL into absolute URL

In Ruby, it’s simple to do this, but in JavaScript, I’m not sure.
Given a starting page, such as http://example.org/foo/bar, I want to be able to take any link on the page, which can have any sort of href such as /x.php, ?p=3, y.html, etc., and turn it into a fully qualified absolute URL, such as (in the last example) http://example.org/foo/y.html.
Is there any sort of simple way to do this? If it helps, we can assume these paths do live in an actual web page as actual <a href> elements.

The URL constructor takes a second, base argument, which does exactly what you want:
const base = 'http://example.org/foo/bar';
[ '/x.php',
'?p=3',
'y.html'
].forEach(urlPart => {
const url = new URL(urlPart, base);
console.log(url.href);
});
.as-console-wrapper{min-height:100%}
<script src="//rawgit.com/github/url-polyfill/0.5.6/url.js"></script>
The URL API works in all major browsers except IE. If you need to support IE, there are polyfills available. Node.js also has it built in (const { URL } = require('url');).

If your baseURL is equal to the current page, try this:
var getAbsoluteUrl = (function() {
var a;
return function(url) {
if(!a) a = document.createElement('a');
a.href = url;
return a.href;
};
})();
Found here: https://davidwalsh.name/get-absolute-url
Tried it and it worked well for relative as well as absolute URLs (it makes them all absolute) - assuming your basePath is actually your own page.

Use this script (but test it first for the various cases, I just wrote it and wouldn't guarantee I haven't overlooked any case). Note that if the path of the URL specifies a directory and not a file, it always ends in a /, even though the browser might not show that.
var getAbsoluteURL = function (url, href) {
var path = url.split(/[#?]/)[0];
var basePath = path.slice(0, path.lastIndexOf('/'));
var domain = url.split('/').slice(0,3).join('/');
var protocol = url.split('/')[0];
switch (href.charAt(0)) {
case '/':
{
if (href.length > 1 && href.charAt(1) == '/')
return protocol + href;
else
return domain + href;
}
case '#':
case '?':
return path + href;
default:
return basePath + '/' + href;
}
}

How to find all the links in a webpage that has a specific file extension?

Is it possible to find a href in a website that has a certain file extension. for example it would print http://www.test.com/something.mp3 http://www.test.com/somelinktoamuscifile.mp3 http://www.test.com/music.mp3.
It would show all of links, with a file extension of .mp3 for example.
would you do
var extension = ".mp3"
var checker = url + extension
if(url == checker){console.log(url);}

So you want to extract all links that contain a certain string from any given url?
Maybe this script will help you:
var request = require('request');
var cheerio = require('cheerio');
var url = "http://www.stackoverflow.com";
var toFind = "delete" //use file extension or whatever you want to find
request(url, function(err, resp, body) {
if (err) throw err;
var $ = cheerio.load(body);
$('a').each(function (i, element) {
var a = $(this);
//console.log(a.attr('href'));
var href = a.attr('href');
if (href && href.indexOf(toFind) != -1) {
console.log(href);
}
})
})
Output:
$ node scraping.js
http://ux.stackexchange.com/questions/49991/should-yes-delete-it-be-red-or-green
Just change the content of url and toFind. There is a good tutorial on web scraping here and here. Of course this can be done in a lot of different programming languages. I merely used javascript because you tagged it that way.

Here is a native javascript solution that works in current browsers (IE8+, Chrome, Firefox) without jQuery.
function getLinksWithExtension(extension) {
var links = document.querySelectorAll('a[href$="' + extension + '"]'),
i;
for (i=0; i<links.length; i++){
console.log(links[i]);
}
}

I think it goes like this:
var mp3_extension = '.mp3';
var url_string = url.split('.');
var url_extension = url_string[url_string.length-1];
if(url_extension === mp3_extension){
//go go go!!!
}

JavaScript: Completing relative URL to own site

Is there a better way to write this procedure that completes a relative url for my website?
if (!url.startsWith('http')) {
url = + location.protocol + '//' + location.host + (url.startsWith('/') ? '' : '/') + url
});

You might want to consider having the server side supply the base url to your web site. The reason being, that it is typically easier to get access to the base url of the site on the server. All you need to do is have a server-side script/action that generates a script that looks like:
var siteBaseUrl = 'http://example.com/';
// use string replacement to remove any leading slash on the incoming url.
function makeAbsoluteUrl( url )
{
if (!url.match(/^http/)) {
url = siteBaseUrl + url.replace(/^\//,'');
})
return url;
}
You can refer to it in your web page as:
<script type="text/javscript" src="/scripts/baseUrl.php"> // adjust for platform
</script>
And use it as
url = makeAbsoluteUrl( url );

i think the following would handle all possible urls correctly
lstrip = function(str, prefix) {
return str.indexOf(prefix) == 0 ?
str.substring(prefix.length) :
str;
}
completeURL = function(url, host) {
url = lstrip(url, "http://");
url = lstrip(url, host);
url = lstrip(url, "/");
return "http://" + host + "/" + url
}
//test
urls = [
"http://host.com/foo/bar",
"host.com/foo/bar",
"/foo/bar",
"foo/bar",
"foo.php",
"host.com",
""
]
for(var n = 0; n < urls.length; n++)
console.log(completeURL(urls[n], "host.com"))

On first there can be for example ftp://
You should check if there is any // in url.
And instead of 'location.host' you should use 'location' with cut off the last word after the last: "/". I mean www.page.com/file.html -> www.page.com

Develop Reference

JavaScript is the programming language of the Web.

Javascript regular expression to add protocol to url string - javascript

For example, using negative lookahead: your_string.replace(/href="(?!http)/, 'href="http://'); Example: > 'Website'.replace(/href="(?!http)/, 'href="http://'); "Website" > 'Website'.replace(/href="(?!http)/, 'href="http://'); "Website"

Related

How to get avatar file type [duplicate]

URL transformation in Chrome javascript bookmarklet showing "undefined"

In vanilla JavaScript, turn relative path + base URL into absolute URL

How to find all the links in a webpage that has a specific file extension?

JavaScript: Completing relative URL to own site

Categories

Resources