Matching escaped spaces using JavaScript indexOf - javascript

I have the following JavaScript code:
var matchArray = [];
var passedUrl = '/' + url;
var tabLink;
$('.uiAjaxTabs li a').each(function () {
if (passedUrl.indexOf($(this).attr('href')) == 0) {
boverlap_penalty = passedUrl.replace($(this).attr('href'), '').length;
matchArray.push({ 'score': boverlap_penalty, 'dom_obj': this });
}
});
if (matchArray.length) {
tabLink = matchArray.sort(function (a, b) {
return (a.score < b.score) ? -1 : 1
}).shift().dom_obj;
}
$(tabLink).parents('li').addClass('loading');
Which takes a passedUrl and then matches it up with a set of links to see which most closely matches the url, and then adds a class of loading to it.
This works fine EXCEPT if the link has a space in it e.g. domain.com/People?Name=John Doe because the browser sees it as domain.com/People?Name=John%20Doe and therefore doesn't match it correctly when the passedUrl has the escaped spaces and the link does not.
Any ideas on how to fix this?

Any ideas on how to fix this?
Use
var passedUrl = decodeURI('/' + url);
See MDN docs.

Try JavaScript's unescape function, it seem to decode URL-encoded strings.

Related

Escape user-generated chat messages but render links

User enter chat messages, which gets rendered directly to the page using Mustache templates. Obviously, HTML should be escaped to prevent HTML injection, but then again links should be rendered as <a href='...'>.
There are different approaches I've tried to use {{{ ... }}} to return the unescaped HTML content, which means the link would get rendered and I need to take care of HTML escaping myself. Is there a safe way of doing that without relying on a half-baked solution I write myself?
jQuery.text() would be great, but I guess it will render the <a> again as text.
What else can I do here?
If you don't want to write your own escaping or parsing solution there is a jQuery plugin to handle links called Linkify. You could simply escape messages and then parse them client-side.
Example of how it works:
var text = "<div>Test<br>Test<br>Test http://stackoverflow.com</div>";
$('div').text(text);
// Before: <div>Test<br>Test<br>Test http://stackoverflow.com</div>
$('div').linkify();
// After: lt;div>Test<br>Test<br>Test http://stackoverflow.com</div>
Just an idea: You could build your own escaping function
escape : function () {
return function(val, render) {
var $s = $(val);
var $elements = $s.find("*").not("a"); //add other white-listed elements seperated by comma
for (var i = $elements.length - 1; i >= 0; i--) {
var e = $elements[i];
$(e).replaceWith(e.innerHTML);
}
return $s.html();
}
}
You can call the function by
{{#escape}}{{{YOUR_TEXT}}}{{/escape}}
I have not tested this. This solution needs jQuery. The code above is based on this solution: https://stackoverflow.com/a/27764431/1479486
try inserting first in .text() and then use regexp for render the link with .html(). Here you can see a vanilla example:
var a="see formula a<b>c in http://test.com or https://x.com?p=3";
var hold=document.createElement('div');
hold.textContent=a;
hold.innerHTML=hold.innerHTML.replace(
/(https?:\/\/[-$A-Za-z0-9%_?&.~+\/=]+)/g,
'$1'
);
window.addEventListener('load',function(){
document.body.appendChild(hold);
});
For a more acurate regexp you can see here
If you end up going down the regex route the following filter and regex was the most aggressive one i found for picking up all kinds of urls that your users will try to type.
Heres a regexr to play around with it: http://regexr.com/3bjk9
(function () {
'use strict';
angular
.module('core.filters')
.filter('urlToA', urlToA);
// --------------------
function urlToA () {
return function (string, noClick) {
var urlPattern = /((?:(http|https|Http|Https|rtsp|Rtsp):\/\/(?:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,64}(?:\:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,25})?\#)?)?((?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]{0,64}\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|(?:jobs|j[emop])|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\:\d{1,5})?)(\/(?:(?:[a-zA-Z0-9\;\/\?\:\#\&\=\#\~\-\.\+\!\*\'\(\)\,\_])|(?:\%[a-fA-F0-9]{2}))*)?(?:\b|$)/gi; // jshint ignore:line
return string ? string.replace(urlPattern, replace) : string;
function replace (url) {
var httpUrl = url.indexOf('http') === -1 ? 'http://' + url : url;
if (noClick) {
return '<a>' + url + '</a>';
} else {
return '' + url + '';
}
}
};
}
})();

JavaScript regexp parsing an array of URLs

String
url(image1.png), url('image2.png'), url("image3.png")
Note the ' and " delimiters. It would be nice to handle them as well, but I'm happy if the first form is captured.
Result
var results = 'image1.png', 'image2.png', 'image3.png';
How?
I would like to use regular expressions in javascript to parse a string that is used as the CSS property of an element with multiple backgrounds. The aim is to get the url of the images that are put in the background of the element using CSS.
See more: http://www.css3.info/preview/multiple-backgrounds/
JSFiddle
http://jsfiddle.net/fcx9x59r/1/
For this particular string [^'"()]+(?=['")]) seems to work fine:
css = "url(image1.png), url('image2.png'), url(\"image3.png\")";
m = css.match(/[^'"()]+(?=['")])/g)
document.write(m)
In the general case, you have to resort to a loop, because JS doesn't provide a way to return all matching groups from a single call:
css = "url(image1.png), something else, url('image2.png'), url(\"image3.png\")";
urls = []
css.replace(/url\(['"]*([^'")]+)/g, function(_, $1) { urls.push($1) });
document.write(urls)
/url\(([^"']+?)\)|'([^'"]+?)'\)|"([^'"]+?)"\)/g
Try this.Grab the captures.See demo.
http://regex101.com/r/qQ3kG7/1
You could try the below code also.
> var s = 'url(image1.png), url(\'image2.png\'), url("image3.png")';
undefined
> var re = /url\((["'])?((?:(?!\1).|[^'"])*?)\1\)/g;
undefined
> var matches = [];
undefined
> var m;
undefined
> while ((m = re.exec(s)) != null) {
......... matches.push(m[2]);
......... }
3
> console.log(matches)
[ 'image1.png', 'image2.png', 'image3.png' ]
DEMO
Try this:
var results = str.match(/url\(['"]?[^\s'")]+['"]?\)/g)
.map(function(s) {
return s.match(/url\(['"]?([^\s'")]+)['"]?\)/)[1];
});
Demo

Regex to match urls but not urls in hyperlinks

I am trying to wrap any url that is in some text and turn it into a hyperlink... but I do not want to wrap a url that is already wrapped by a hyperlink.
For example:
Go To Twitter
here is a url http://anotherurl.com
The following code:
function replaceURLWithHTMLLinks(text) {
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp, "<a href='$1'>$1</a>");
}
Gives the following output:
#BIR
http://anotherurl.com
How can I modify the regex to exclude already hyperlinked urls?
Thanks
Answer:
The new method is:
function replaceURLWithHTMLLinks(text) {
var exp = /(?:^|[^"'])((ftp|http|https|file):\/\/[\S]+(\b|$))/gi
return text.replace(exp, " <a href='$1'>$1</a>");
}
The above code functions as required. I modified the regex from a link in the comments because it contained a bug where it would include the full stop, it now excludes any full stops that come after a full url.
Since javascript doesn't seem to support negative look-behind, you will have to trick it by using a replace function.
Capture the href (maybe you should also also consider src) :
function repl(text) {
var exp = /((href|src)=["']|)(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp, function() {
return arguments[1] ?
arguments[0] :
"" + arguments[3] + ""
});
}
See the demo
EDIT
A "better" version which will only replace links in actual text nodes:
function repl(node) {
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/i;
var nodes=node.childNodes;
for (var i=0, m=nodes.length; i<m; i++){
var n=nodes[i];
if (n.nodeType==n.TEXT_NODE) {
var g=n.textContent.match(exp);
while(g) {
var idx=n.textContent.indexOf(g[0]);
var pre=n.textContent.substring(0,idx);
var t=document.createTextNode(pre);
var a=document.createElement("a");
a.href=g[0];
a.innerText=g[0];
n.textContent = n.textContent.substring(idx+g[0].length);
n.parentElement.insertBefore(t,n);
n.parentElement.insertBefore(a,n);
g=n.textContent.match(exp);
}
}
else {
repl(n);
}
}
}
var r=repl(document.getElementById("t"))
​
See the demo

Javascript to extract *.com

I am looking for a javascript function/regex to extract *.com from a URI... (to be done on client side)
It should work for the following cases:
siphone.com = siphone.com
qwr.siphone.com = siphone.com
www.qwr.siphone.com = siphone.com
qw.rock.siphone.com = siphone.com
<http://www.qwr.siphone.com> = siphone.com
Much appreciated!
Edit: Sorry, I missed a case:
http://www.qwr.siphone.com/default.htm = siphone.com
I guess this regex should work for a few cases:
/[\w]+\.(com|ca|org|net)/
I'm not good with JavaScript, but there should be a library for splitting URIs out there, right?
According to that link, here's a "strict" regex:
/^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:#]*)(?::([^:#]*))?)?#)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/
As you can see, you're better off just using the "library". :)
This should do it. I added a few cases for some nonmatches.
var cases = [
"siphone.com",
"qwr.siphone.com",
"www.qwr.siphone.com",
"qw.rock.siphone.com",
"<http://www.qwr.siphone.com>",
"hamstar.corm",
"cheese.net",
"bro.at.me.come",
"http://www.qwr.siphone.com/default.htm"];
var grabCom = function(str) {
var result = str.match("(\\w+\\.com)\\W?|$");
if(result !== null)
return result[1];
return null;
};
for(var i = 0; i < cases.length; i++) {
console.log(grabCom(cases[i]));
}
var myStrings = [
'siphone.com',
'qwr.siphone.com',
'www.qwr.siphone.com',
'qw.rock.siphone.com',
'<http://www.qwr.siphone.com>'
];
for (var i = 0; i < myStrings.length; i++) {
document.write( myStrings[i] + '=' + myStrings[i].match(/[\w]+\.(com)/gi) + '<br><br>');
}
I've placed given demo strings to the myStrings array.
i - is index to iterate through this array. The following line does the matching trick:
myStrings[i].match(/[\w]+\.(com)/gi)
and returns the value of siphone.com. If you'd like to match .net and etc. - add (com|net|other) instead of just (com).
Also you may find the following link useful: Regular expressions Cheat Sheet
update: missed case works too %)
You could split the string then search for the .com string like so
var url = 'music.google.com'
var parts = url.split('.');
for(part in parts) {
if(part == 'com') {
return true;
}
{
uri = "foo.bar.baz.com"
uri.split(".").slice(-2).join(".") // returns baz.com
This assumes that you want just the hostname and tld. It also assumes that there is no path information either.
Updated now that you also need to handle uris with paths you could do:
uri.split(".").slice(-2).join(".").split("/")[0]
Use regexp to do that. This way modifications to the detections are quite easy.
var url = 'www.siphone.com';
var domain = url.match(/[^.]\.com/i)[0];
If you use url.match(/(([^.]+)\.com)[^a-z]/i)[1] instead. You can assure that the ".com" is not followed by any other characters.

How to find if a text contains url string

How can I find if text contains a url string. I mean if I have
Sometexthttp://daasddas some text
I want http://daasddas to be achored or maked as a link wit javascript
function replaceURLWithHTMLLinks(text)
{
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1'>$1</a>");
}
While the code above works good if all given URLs are full (http://mydomain.com), I had problems parsing a URL like:
www.mydomain.com
i.e. without a protocol.
So I added some simple code to the function:
var exp = /(\b(((https?|ftp|file|):\/\/)|www[.])[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var temp = text.replace(exp,"$1");
var result = "";
while (temp.length > 0) {
var pos = temp.indexOf("href=\"");
if (pos == -1) {
result += temp;
break;
}
result += temp.substring(0, pos + 6);
temp = temp.substring(pos + 6, temp.length);
if ((temp.indexOf("://") > 8) || (temp.indexOf("://") == -1)) {
result += "http://";
}
}
return result;
If someone should fine a more optimal solution to add a default protocol to URLs, let me know!
You have to use regex(Regular expressions) to find URL patterns in blocks of text.
Here's a link to same question and answers:
Regular Expression to find URLs in block of Text (Javascript)
I tweaked dperinis regex-url script so that a URL embedded in a string can be found. It will not find google.com, this is necessary if it's a user input field, the user might leave out the whitespace after a period/full stop. It will also find www.google.com, since hardly anyone types the protocol.
(?:((?:https?|ftp):\/\/)|ww)(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?
I tested it on www.regextester.com, it worked for me, if you encounter a problem, please comment.
you can use a regular expression to find an URL and replace it by the same with a leading and a trailing tag
Many of the solutions start getting very complex and hard to work with a variety of situations. Here's a function I created to capture any URL beginning with http/https/ftp/file/www. This is working like a charm for me, the only thing it doesn't add a link to is user entered URL's without an http or www at the beginning (i.e. google.com). I hope this solution is helpful for somebody.
function convertText(txtData) {
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
txtData = txtData.replace(urlRegex, '$1');
var urlRegex =/(\b(\swww).[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
txtData = txtData.replace(urlRegex, ' $1');
var urlRegex =/(>\swww)/ig;
txtData = txtData.replace(urlRegex, '>www');
var urlRegex =/(\"\swww)/ig;
txtData = txtData.replace(urlRegex, '"http://www');
return txtData;
}
function replaceURLWithHTMLLinksHere(text)
{
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1'>$1</a>");
}
Okay we got this regular expresion here in function.
/(\b(https?|ftp|file)://[-A-Z0-9+&##/%?=~|!:,.;]*[-A-Z0-9+&##/%=~|])/ig
Lets understand this.
/ / this is how a regex starts.
\b > is maching https or ftp or file that is unique and is in the start of string. these keywords should not have any character attatched to them in
begining like bbhttps or bbhttp it will not match these otherwise.
https? > here ? means zero or one of preceding character or group. In this case s is optional.
| > match one out of given just like OR.
() > create group to be matched
/ > means the next character is special and is not to be interpreted literally. For example, a 'b' without a preceding '\' generally matches lowercase
'b's wherever they occur. But a '\b' by itself doesn't match any character
[] > this is Character Classes or Character Sets. It is used to have a group of characters and only one character out of all will be present at a time.
[-A-Z0-9+&##/%?=~_|!:,.;]* > zero or more occurrences of the preceding element. For example, b*c matches "c", "bc", "bbc", "bbbc", and so on.
[-A-Z0-9+&##/%=~_|] > means one charactor out of these all.
i > Case-insensitive search.
g > Global search.
function replaceURLWithLinks(text){
var text = "";
text= text.replace(/\r?\n/g, '<br />');
var result = URI.withinString(text, function(url) {
return "<a href='"+url+"' target='_blank'>" + url + "</a>";
});
}

Categories

Resources