Node.js, parse filename from URL

Node.js, parse filename from URL - javascript

How can I parse a url?
site.com:8080/someFile.txt?attr=100
or
site.com:8080/someFile.txt/?attr=100
I need to get someFile.txt, where is a file name I set by myself as the format (txt or some other).
UPDATE
I tried
var path = url.parse(req.url).path;
But I still cannot get the path (someFile.txt).

Something like this..
var url = require("url");
var path = require("path");
var parsed = url.parse("http://example.com:8080/test/someFile.txt/?attr=100");
console.log(path.basename(parsed.pathname));

here's my working code, hope it helps
import path from 'path'
const getBasenameFormUrl = (urlStr) => {
const url = new URL(urlStr)
return path.basename(url.pathname)
}

decodeURIComponent(path.basename('http://localhost/node-v18.12.1-x64.msi'))
node-v18.12.1-x64.msi

Your example can easily be dealt with using Node.js’s url module:
var URL = require('url').parse('site.com:8080/someFile.txt?attr=100');
console.log(URL.pathname.replace(/(^\/|\/$)/g,'')); // "someFile.txt"
However, this doesn’t work with Node.js’s exemplary URL (’cause it’s got more path).
By truncanting the complete path starting at its rightmost slash, it’ll yield the file name:
var URL = require('url').parse('http://user:pass#host.com:8080/p/a/t/h?query=string#hash');
console.log(URL.pathname.substring(URL.pathname.lastIndexOf('/')+1)); // "h"
And if that idea is considered safe enough for the appliance, we can do it plain:
var file = url.substring(url.lastIndexOf('/')+1).replace(/((\?|#).*)?$/,'');
/* hashes and query strings ----^ */

Related

Javascript return true if a string contains subdomain

I have an API that returns a domain to my front end.
This domain is the string format.
For eg: "google.com" / "google.co.ok"
or "test.google.com"/ "test.google.co.ok:
Notice that the string does not contain any protocol.
I want to write a method that parses the string and returns true if the string contains a subdomain.
In the above 2 examples, the method should return true for test.google.com or test.google.co.ok
EDIT: If it were python, i would write something like below. But hoping something similat was available in JS.
from tld import get_tld, get_fld
get_tld("www.google.co.uk", fix_protocol=True)
# 'co.uk'
get_fld("www.google.co.uk", fix_protocol=True)
# 'google.co.uk'

There are multiple JavaScript libraries available that can be used the same way you're using tld. psl is older but still has millions of weekly downloads.
You could use psl and implement something like this:
import { parse } from "psl";
function hasSubdomain(str) {
const { subdomain } = parse(str);
return subdomain !== null;
}
hasSubdomain("www.google.com") // true
hasSubdomain("google.co.uk") // false
Feel free to clone and edit this example on RunKit as you see fit.

Sure thing. Since there's no protocol, maybe something like:
"word.domain.com"
.split(".").length > 2 // true
"domain.com"
.split(".").length > 2 // false
"www.domain.co.uk"
.split(".").length > 2 // uh-oh
You'll likely need to parse out "www" and second-level domains (".co", ".gc", etc).

You can use RegExp to perform string manipulation. Please take a look at the following snippet and run the code and see the results from different test cases covering most of the possibilities. Let me know if it's helpful.
function subDomain(url) {
// REMOVE LEADING AND TRAILING WHITE SPACE
url = url.replace(new RegExp(/^\s+/), ""); // START
url = url.replace(new RegExp(/\s+$/), ""); // END
// CONVERT BACK SLASHES TO FORWARD SLASHES
url = url.replace(new RegExp(/\\/g), "/");
// REMOVES 'www.' FROM THE START OF THE STRING
url = url.replace(new RegExp(/^www\./i), "");
// REMOVE STRING FROM FIRST FORWARD SLASH ON
url = url.replace(new RegExp(/\/(.*)/), "");
// REMOVES '.??.??' OR '.???.??' FROM END - e.g. '.CO.UK', '.COM.AU'
if (url.match(new RegExp(/\.[a-z]{2,3}\.[a-z]{2}$/i))) {
url = url.replace(new RegExp(/\.[a-z]{2,3}\.[a-z]{2}$/i), "");
// REMOVES '.??' or '.???' or '.????' FROM END - e.g. '.US', '.COM', '.INFO'
} else if (url.match(new RegExp(/\.[a-z]{2,4}$/i))) {
url = url.replace(new RegExp(/\.[a-z]{2,4}$/i), "");
}
// CHECK TO SEE IF THERE IS A DOT '.' LEFT
var subDomain = url.match(new RegExp(/\./g)) ? true : false;
return subDomain;
}
const subdomainInput = "test.google.com";
const subdomainInputWithPath = "test.google.com/test";
const subdomainInputWithPathWithWS = " test.google.com ";
const subdomainInputWithWS = " test.google.com ";
const subdomainInputWithQueryString = "test.google.com/test?token=33333";
const noSubInput = "google.com"
const noSubInputWithPath = "google.com/search"
const noSubInputWithPathWithQueryString = "google.com/search?token=ttttttt"
console.log("Test Run\n")
conosle.log("With subdomain test cases")
console.log(`subdomainInput: ${subDomain(subdomainInput)}`);
console.log(`subdomainInputWithPath: ${subDomain(subdomainInputWithPath)}`);
console.log(`subdomainInputWithWS: ${subDomain(subdomainInputWithWS)}`);
console.log(`subdomainInputWithPathWithWS: ${subDomain(subdomainInputWithPathWithWS)}`);
console.log(`subdomainInputWithQueryString: ${subDomain(subdomainInputWithQueryString)}`);
conosle.log("Without subdomain test cases")
console.log(`noSubInput: ${subDomain(noSubInput)}`);
console.log(`noSubInput: ${subDomain(noSubInput)}`);
console.log(`noSubInputWithPath: ${subDomain(noSubInputWithPath)}`);
console.log(`noSubInputWithPathWithQueryString: ${subDomain(noSubInputWithPathWithQueryString)}`);
return(subDomain);
}

Javascript getParams breaking JSON

I have a URL with JSON in it.
This is the URL:
http://localhost:1234/asset/view-details.action?id=5&assetJson=%%7B%0A%20%20%22assetDesc%22%3A%20%22AMERICAN%20HARDWARE%20%26%20TOOLS%22%0A%7D
The JSON file can be retrieved with:
const assetJson = getParams.get('assetJson');
const assetDetail = JSON.parse(assetJson);
assetDetail should be equal to:
{
"assetDesc": "AMERICAN HARDWARE & TOOLS"
}
In JavaScript, every time I try getParams.get('assetJson'), I get this error "Unexpected end of JSON input". It's because the returned value is cut off at the ampersand( & ). I end up with:
{
"assetDesc": "AMERICAN HARDWARE
& TOOLS" } is completely cut off.
Is there a way to fix this? I appreciate any help you can give.

You can always use URLSearchParams and it's get function along with JSON.parse.
The substring(1) that I'm using exists to get rid of a % sign that is coming up before the JSON start {.
var str = "http://localhost:1234/asset/view-details.action?id=5&assetJson=%%7B%0A%20%20%22assetDesc%22%3A%20%22AMERICAN%20HARDWARE%20%26%20TOOLS%22%0A%7D";
const urlParams = new URLSearchParams(str);
myParam = urlParams.get('assetJson').substring(1);
myParam = JSON.parse(myParam)
console.log(myParam.assetDesc)

This is how I solved it.
const parseResult = new DOMParser().parseFromString(window.location.search, 'text/html');
// replace "&" with it's URL encoding
const parsed = parseResult.documentElement.textContent.replace('%20&', '%20%26');
const urlParams = new URLSearchParams(parsed);
const assetJson = urlParams.get('assetJson');
const assetDetail = JSON.parse(assetJson);

How to split url to get url path in JavaScript

I have constructed a url path that are pointing to different hostname www.mysite.com, so for example:
var myMainSite = 'www.mymainsite.com' + '/somepath';
so this is equivalent to www.mymainsite.com/path/path/needthispath/somepath.
How I'm doing it now is like the code below and this gives me a bunch of indexes of the url in the console.log.
var splitUrl = myMainSite.split('/');
console.log looks like:
0: http://
1: www.
2: mysite.com
3: path
4: path
5: needthispath
6: somepath
and I concat them like splitUrl[5]+'/'+splitUrl[6] and it doesn't look pretty at all.
So my question is how to split/remove url location http://www.mymainsite.com/ to get the url path needthispath/somepath in js? Is there a quicker and cleaner way of doing this?

First solution (URL object)
The URL object can be used for parsing, constructing, normalizing, encoding URLs, and so on.
var url = 'http://www.mymainsite.com/somepath/path2/path3/path4';
var pathname = new URL(url).pathname;
console.log(pathname);
The URL interface represents an object providing static methods used
for creating object URLs.
See the documentation for URL interface on Mozilla MDN
The Browser support is pretty good in 2017 (~ 90% but not IE11 nor below)
Second solution (a kind of a hack)
var urlHack = document.createElement('a');
urlHack.href = 'http://www.mymainsite.com/somepath/path2/path3/path4';
console.log(urlHack.pathname);
// you can even call this object with these properties:
// protocol, host, hostname, port, pathname, hash, search, origin

Why don't you use the split function and work from there.
The split function will break your URL out fully and from there you just need to look for the second last and last items.
Here is an example:
var initial_url = 'http://www.mymainsite.com/path/path/needthispath/somepath';
var url = initial_url .split( '/' );
var updated_url= document.location.hostname + '/' + url[ url.length - 2 ] + '/' + url[ url.length - 1 ];

You can use the URL API, though support is variable.
Alternatively, you could use URI.js.
Both allow you to get different parts of an URL, as well as build new URLs from parts.

function url($url) {
var url = $url.split( '//' );
if (url[0] === "http:" || url[0] === "https:") {
var protocol = url[0] + "//";
var host = url[1].split( '/' )[0];
url = protocol + host;
var path = $url.split(url)[1];
return {
protocol: protocol,
host: host,
path: path
};
}
}
var $url = url("http://www.mymainsite.com/path/path/needthispath/somepath");
console.log($url.protocol); // http://
console.log($url.host); // www.mymainsite.com
console.log($url.path); // /path/path/needthispath/somepath

Check if a string starts with http using Javascript

I've been searching all over for an answer to this and all of the answers I've found haven't been in JavaScript.
I need a way, in javascript, to check if a string starts with http, https, or ftp. If it doesn't start with one of those I need to prepend the string with http://. indexOf won't work for me I don't think as I need either http, https or ftp. Also I don't want something like google.com/?q=http://google.com to trigger that as being valid as it doesn't start with an http whereas indexOf would trigger that as being true (if I'm not entirely mistaken).
The closest PHP regex I've found is this:
function addhttp($url) {
if (!preg_match("~^(?:f|ht)tps?://~i", $url)) {
$url = "http://" . $url;
}
return $url;
}
Source: How to add http if its not exists in the url
I just don't know how to convert that to javascript. Any help would be greatly appreciated.

export const getValidUrl = (url = "") => {
let newUrl = window.decodeURIComponent(url);
newUrl = newUrl.trim().replace(/\s/g, "");
if(/^(:\/\/)/.test(newUrl)){
return `http${newUrl}`;
}
if(!/^(f|ht)tps?:\/\//i.test(newUrl)){
return `http://${newUrl}`;
}
return newUrl;
};
Tests:
expect(getValidUrl('https://www.test.com')).toBe('https://www.test.com');
expect(getValidUrl('http://www.test.com')).toBe('http://www.test.com');
expect(getValidUrl(' http : / / www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('ftp://www.test.com')).toBe('ftp://www.test.com');
expect(getValidUrl('www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('://www.test.com')).toBe('http://www.test.com');
expect(getValidUrl('http%3A%2F%2Fwww.test.com')).toBe('http://www.test.com');
expect(getValidUrl('www . test.com')).toBe('http://www.test.com');

This should work:
var pattern = /^((http|https|ftp):\/\/)/;
if(!pattern.test(url)) {
url = "http://" + url;
}
jsFiddle

var url = "http://something.com"
if( url.indexOf("http") == 0 ) {
alert("yeah!");
} else {
alert("No no no!");
}

Non-Regex declarative way:
const hasValidUrlProtocol = (url = '') =>
['http://', 'https://', 'ftp://'].some(protocol => url.startsWith(protocol))

This should work:
var re = /^(http|https|ftp)/

Refining previous answers a bit more, I used new RegExp(...) to avoid messy escapes, and also added an optional s.
var pattern = new RegExp('^(https?|ftp)://');
if(!pattern.test(url)) {
url = "http://" + url;
}
var pattern = new RegExp('^(https?|ftp)://');
console.log(pattern.test('http://foo'));
console.log(pattern.test('https://foo'));
console.log(pattern.test('ftp://foo'));
console.log(pattern.test('bar'));

Best readability I'd say is to use .startsWith('theString').
The code below checks for both http://, https:// and ftp:// and sets okUrl to a boolean true if any of them comply, by using the || which means or.
When you know if the url contains none of those strings, then just just add http:// to the start of the string either with literals (${})
let url = 'google.com'
const urlOK = url.startsWith('http://') || url.startsWith('https://') || url.startsWith('ftp://')
if (!urlOk) url = `http://${url}`
// => 'http://google.com'
or with simple string concat, which can be done in various ways, for example:
url = 'http://' + url
Read more about it, and test it, here:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith

remove url parameters with javascript or jquery

I am trying to use the youtube data api to generate a video playlist.
However, the video urls require a format of:
youtube.com/watch?v=3sZOD3xKL0Y
but what the api generates is:
youtube.com/watch?v=3sZOD3xKL0Y&feature=youtube_gdata
So what I need to do is be able to select everything after and including the ampersand(&) and remove it from the url.
Any way to do this with javascript and some sort of regular expression?

What am I missing?
Why not:
url.split('?')[0]

Hmm... Looking for better way... here it is
var onlyUrl = window.location.href.replace(window.location.search,'');

Example: http://jsfiddle.net/SjrqF/
var url = 'youtube.com/watch?v=3sZOD3xKL0Y&feature=youtube_gdata';
url = url.slice( 0, url.indexOf('&') );
or:
Example: http://jsfiddle.net/SjrqF/1/
var url = 'youtube.com/watch?v=3sZOD3xKL0Y&feature=youtube_gdata';
url = url.split( '&' )[0];

Use this function:
var getCleanUrl = function(url) {
return url.replace(/#.*$/, '').replace(/\?.*$/, '');
};
// get rid of hash and params
console.log(getCleanUrl('https://sidanmor.com/?firstname=idan&lastname=mor'));
If you want all the href parts, use this:
var url = document.createElement('a');
url.href = 'https://developer.mozilla.org/en-US/search?q=URL#search-results-close-container';
console.log(url.href); // https://developer.mozilla.org/en-US/search?q=URL#search-results-close-container
console.log(url.protocol); // https:
console.log(url.host); // developer.mozilla.org
console.log(url.hostname); // developer.mozilla.org
console.log(url.port); // (blank - https assumes port 443)
console.log(url.pathname); // /en-US/search
console.log(url.search); // ?q=URL
console.log(url.hash); // #search-results-close-container
console.log(url.origin); // https://developer.mozilla.org

//user113716 code is working but i altered as below. it will work if your URL contain "?" mark or not
//replace URL in browser
if(window.location.href.indexOf("?") > -1) {
var newUrl = refineUrl();
window.history.pushState("object or string", "Title", "/"+newUrl );
}
function refineUrl()
{
//get full url
var url = window.location.href;
//get url after/
var value = url = url.slice( 0, url.indexOf('?') );
//get the part after before ?
value = value.replace('#System.Web.Configuration.WebConfigurationManager.AppSettings["BaseURL"]','');
return value;
}

This worked for me:
window.location.replace(window.location.pathname)

No splits.. :) The correct/foolproof way is to let the native browser BUILT-IN functions do the heavy lifting using urlParams, the heavy lifting is done for you.
//summary answer - this one line will correctly replace in all current browsers
window.history.replaceState({}, '', `${location.pathname}?${params}`);
// 1 Get your URL
let url = new URL('https://tykt.org?unicorn=1&printer=2&scanner=3');
console.log("URL: "+ url.toString());
// 2 get your params
let params = new URLSearchParams(url.search);
console.log("querys: " + params.toString());
// 3 Delete the printer param, Query string is now gone
params.delete('printer');
console.log("Printer Removed: " + params.toString());
// BELOW = Add it back to the URL, DONE!
___________
NOW Putting it all together in your live browser
// Above is a breakdown of how to get your params
// 4 then you simply replace those in your current browser!!
window.history.replaceState({}, '', `${location.pathname}?${params}`);
Sample working Javascript Fiddle here

You could use a RegEx to match the value of v and build the URL yourself since you know the URL is youtube.com/watch?v=...
http://jsfiddle.net/akURz/
var url = 'http://youtube.com/watch?v=3sZOD3xKL0Y';
alert(url.match(/v\=([a-z0-9]+)/i));

Well, I am using this:
stripUrl(urlToStrip){
let stripped = urlToStrip.split('?')[0];
stripped = stripped.split('&')[0];
stripped = stripped.split('#')[0];
return stripped;
}
or:
stripUrl(urlToStrip){
return urlToStrip.split('?')[0].split('&')[0].split('#')[0];
}

For example we have:
example.com/list/search?q=Somethink
And you need use variable url like this by window.location.href:
example.com/list/edit
From url:
example.com/list/search?q=Somethink
example.com/list/
var url = (window.location.href);
url = url.split('/search')[0];
url = (url + '/edit');
This is simple solution:-)

Develop Reference

JavaScript is the programming language of the Web.

Node.js, parse filename from URL - javascript

How can I parse a url? site.com:8080/someFile.txt?attr=100 or site.com:8080/someFile.txt/?attr=100 I need to get someFile.txt, where is a file name I set by myself as the format (txt or some other). UPDATE I tried var path = url.parse(req.url).path; But I still cannot get the path (someFile.txt).

Something like this.. var url = require("url"); var path = require("path"); var parsed = url.parse("http://example.com:8080/test/someFile.txt/?attr=100"); console.log(path.basename(parsed.pathname));

here's my working code, hope it helps import path from 'path' const getBasenameFormUrl = (urlStr) => { const url = new URL(urlStr) return path.basename(url.pathname) }

decodeURIComponent(path.basename('http://localhost/node-v18.12.1-x64.msi')) node-v18.12.1-x64.msi

Related

Javascript return true if a string contains subdomain

Javascript getParams breaking JSON

How to split url to get url path in JavaScript

Check if a string starts with http using Javascript

remove url parameters with javascript or jquery

Categories

Resources