Regex to match urls but not urls in hyperlinks - javascript

I am trying to wrap any url that is in some text and turn it into a hyperlink... but I do not want to wrap a url that is already wrapped by a hyperlink.
For example:
Go To Twitter
here is a url http://anotherurl.com
The following code:
function replaceURLWithHTMLLinks(text) {
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp, "<a href='$1'>$1</a>");
}
Gives the following output:
#BIR
http://anotherurl.com
How can I modify the regex to exclude already hyperlinked urls?
Thanks
Answer:
The new method is:
function replaceURLWithHTMLLinks(text) {
var exp = /(?:^|[^"'])((ftp|http|https|file):\/\/[\S]+(\b|$))/gi
return text.replace(exp, " <a href='$1'>$1</a>");
}
The above code functions as required. I modified the regex from a link in the comments because it contained a bug where it would include the full stop, it now excludes any full stops that come after a full url.

Since javascript doesn't seem to support negative look-behind, you will have to trick it by using a replace function.
Capture the href (maybe you should also also consider src) :
function repl(text) {
var exp = /((href|src)=["']|)(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp, function() {
return arguments[1] ?
arguments[0] :
"" + arguments[3] + ""
});
}
See the demo
EDIT
A "better" version which will only replace links in actual text nodes:
function repl(node) {
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/i;
var nodes=node.childNodes;
for (var i=0, m=nodes.length; i<m; i++){
var n=nodes[i];
if (n.nodeType==n.TEXT_NODE) {
var g=n.textContent.match(exp);
while(g) {
var idx=n.textContent.indexOf(g[0]);
var pre=n.textContent.substring(0,idx);
var t=document.createTextNode(pre);
var a=document.createElement("a");
a.href=g[0];
a.innerText=g[0];
n.textContent = n.textContent.substring(idx+g[0].length);
n.parentElement.insertBefore(t,n);
n.parentElement.insertBefore(a,n);
g=n.textContent.match(exp);
}
}
else {
repl(n);
}
}
}
var r=repl(document.getElementById("t"))
​
See the demo

Related

Handlebar Helper to Replace URL in String with HTML String Containing the Matching URL?

There is a similar question, but the result is comparatively specific..
I have a string of paragraph lines that looks like this (saved from a textarea):
"Here are some links\n\nhttp://www.example.com is one of them\n\nand http://duckduckgo.com is another"
How would I replace each URL, http://www.example.com and http://duckduckgo.com, with:
www.example.com
and
duckduckgo.com
Such that all URLs become rendered as links, the text of which excludes the http://..
"Here are some links\n\nwww.example.com is one of them\n\nand duckduckgo.com is another"
To render this:
Here are some links
www.example.com is one of them
and duckduckgo.com is another
From a handlebars helper..
Handlebars.registerHelper('linkingplaintext', function(plaintext) {
// some code to replace the URLs with their equivalent links
return new Handlebars.SafeString(linkedplainText);
});
This works with the example string you provided:
var text = "Here are some links\n\nhttp://www.example.com\n\nLorem ipsum dolor\n\nhttp://www.example.com"
var urls = text.split('\n').filter(function(v) {
return v.indexOf('http') > -1;
});
$.each(urls, function(i,v) {
$('<a></a>').attr('href', v).html(v).appendTo('body');
$('body').append('<br />');
});
Example on JSFiddle.
--edit--
Now that you've updated the question, here is a handlebars helper you can use:
Handlebars.registerHelper('wrapURL', function(str) {
str = Handlebars.Utils.escapeExpression(str);
var matches = str.match(/http\S+/);
var wrapped = matches.map(function(v, i, a) {
return '' + v + '';
});
for (var i = 0; i < matches.length; i++) {
str = str.replace(matches[i], wrapped[i]);
}
return new Handlebars.SafeString(str)
});
And a working example on JSFIddle.

get value of href tag through javascript

How do i get the value of the text between >testhere</a> in an href tag using javascript?
<a style="color:#39485;" href="/index.html">Hello, 112</a>
112 and /index.html will always be different, the only thing that will be the same is Hello, so having that in mind how do i get the value which in this case is 112? I don't want the Hello, part, just the 112. The number of digits may vary, it is not necessarily 3 digits long such as this one.
The line above that is:
<div id="nna">
and the one below is:
</span>
Another one is:
<a href="/index/page?number=33" class="page_number">
/index/page?number= will always be the same, and so will the class value.
The line above that is always:
<div id="mla">
the line below that always is:
<div class="friend_block_avatar">
Hopefully that will make it easier.
I want to get the number which will always be different which in this case is 33.
I will be running the javascript in my browser which is chrome through the url bar like this:javascript:javascriptcodegoeshere. So i cannot use jquery. The number of digits for this one may vary too, it is not necessarily 2 digits long.
Both of these will be in source code of the same page.
The first scenario:
Find all the anchors, then if the text starts with 'Hello, ' extract it:
var anchors = document.getElementsByTagName('a'),
values = [],
str = /Hello, /,
i;
for (i = 0; i < anchors.length; i += 1) {
if (str.test(anchors[i].text)) {
values.push(anchors[i].text.replace(str, ''));
}
}
console.log(values); // an array containing all the values
Demo1
The second scenario:
Find all the anchors whose href begin with 'href="/index/page?number=' and extract the value:
var anchors = document.querySelectorAll('a[href^="/index/page?number="]'),
values = [],
i;
for (i = 0; i < anchors.length; i += 1) {
values.push((anchors[i].pathname + anchors[i].search).replace('/index/page?number=', ''));
}
console.log(values); // an array containing all the values
Demo2
Find all of the anchors first:
var anchors = document.links;
Then parse through them and split up by what makes them unique: Hello,:
if(anchors.length > 0) {
for(var i in anchors) {
var str = anchors[i].innerText;
if(undefined !== str) {
var parts = str.split('Hello, ');
if(parts.length > 0 && parts[0] === '') {
console.log(parts[1]);
}
}
}
}
With this sample data:
<a style="color:#39485;" href="/index.html">Hello, 112</a>
<a style="color:#39485;" href="/index1.html">Hello, 113</a>
<a style="color:#39485;" href="/index2.html">Hello, 114</a>
<a style="color:#39485;" href="/index3.html">113</a>
Hello, 123
...your console will show 112, 113, 114 and 123.
Fiddle here
Since your href and innerHTML prefixes will always be the same, and you are simply looking for a number following, you can accomplish this with a simple regular expression. Example:
HTML:
<a style="color:#39485;" href="/index.html">Hello, 112</a>
some link
Should not be included
JS:
var regex = /.*number=(\d+)|Hello,\s(\d+)/
var links = document.getElementsByTagName("a");
var innerHtmlNumbers = [];
var hrefNumbers = [];
for (var index = 0; index < links.length; index++) {
var link = links[index];
var match = regex.exec(link.getAttribute("href")); // Check the href first
if (match) {
// Found an href like "/index/page?number=12381239"
var hrefNumber = match[1];
hrefNumbers.push(hrefNumber);
} else {
// Check innerHTML for something like "Hello, 123813912"
match = regex.exec(link.innerHTML);
if (match) {
var innerHtmlNumber = match[2];
innerHtmlNumbers.push(innerHtmlNumber);
}
}
}
console.log(hrefNumbers);
console.log(innerHtmlNumbers):
Take a look at this jsfiddle to see a working example. With this, you can simply run through all of your <a> tags at once.
Assumptions: you want any number of digits in the href and html of all anchor tags on the page.
You can execute the following from the OmniBar in chrome (you do know that you can use Chrome's built in console to execute javascript on the current page right?).
javascript: var anchors = document.querySelectorAll('a'); anchors.forEach = [].forEach; function grabDigits (string) { var match = string.match(/\d+/); if (match && match[0]) { return match; } } anchors.forEach(function(anchor){ console.log('text: ' + grabDigits(anchor.innerHTML)); console.log('href: ' + grabDigits(anchor.href)); }); /* run with: javascript: var anchors = document.querySelectorAll('a'); anchors.forEach = [].forEach; anchors.forEach(function(anchor){ match = anchor.innerHTML.match(/\d+/); if (match && match[0]) { console.log(match[0]); } }); */
Chrome may strip the initial javascript: bit, so you may need to type that in manually into the omnnibar and then copy and paste the rest of the script.
I'm freewheeling a litle bit here, but it seems to be working ^^:
JS
var anchors = document.querySelectorAll('a');
anchors.forEach = [].forEach;
function grabDigits (string) {
var match = string.match(/\d+/);
if (match && match[0]) {
return match;
}
}
anchors.forEach(function(anchor){
console.log('text: ' + grabDigits(anchor.innerHTML));
console.log('href: ' + grabDigits(anchor.href));
});
jsbin
Im not sure that i understood your question correct - you want to cut the number from the other content? You could do this with string.split: when you have a string with a breakpoint for example
a = "Hello,112"
You could do something like this:
a.split(",") will return an array with:
{"Hello,","112"}
I hope i understood it right ^^

Matching escaped spaces using JavaScript indexOf

I have the following JavaScript code:
var matchArray = [];
var passedUrl = '/' + url;
var tabLink;
$('.uiAjaxTabs li a').each(function () {
if (passedUrl.indexOf($(this).attr('href')) == 0) {
boverlap_penalty = passedUrl.replace($(this).attr('href'), '').length;
matchArray.push({ 'score': boverlap_penalty, 'dom_obj': this });
}
});
if (matchArray.length) {
tabLink = matchArray.sort(function (a, b) {
return (a.score < b.score) ? -1 : 1
}).shift().dom_obj;
}
$(tabLink).parents('li').addClass('loading');
Which takes a passedUrl and then matches it up with a set of links to see which most closely matches the url, and then adds a class of loading to it.
This works fine EXCEPT if the link has a space in it e.g. domain.com/People?Name=John Doe because the browser sees it as domain.com/People?Name=John%20Doe and therefore doesn't match it correctly when the passedUrl has the escaped spaces and the link does not.
Any ideas on how to fix this?
Any ideas on how to fix this?
Use
var passedUrl = decodeURI('/' + url);
See MDN docs.
Try JavaScript's unescape function, it seem to decode URL-encoded strings.

Replace url with link but with an added difficult

In an editable iframe I am replacing the plain urls with links thanks to this regex:
function linkify(text) {
var regex = /(http|https|ftp)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?\/?([a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~])*/g;
return text.replace(regex,"<a href='$&'>$&</a>");
}
var content = linkify($('.div').html());
My problem comes when I try to 'linkify' a string wich contain links, it creates a monster like this:
http://google.com
I think that this problem can be solved if I iterate over the DOM instead of analyze the html string, but before to try it I want to ask here if someone have any idea.
Thanks!
I had a similar problem, and solved it by first stripping the html tags:
function strip_tags(str, allow) {
allow = (((allow || "") + "").toLowerCase().match(/<[a-z][a-z0-9]*>/g) || []).join('');
var tags = /<\/?([a-z][a-z0-9]*)\b[^>]*>/gi;
var commentsAndPhpTags = /<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/gi;
return str.replace(commentsAndPhpTags, '').replace(tags, function ($0, $1) {
return allow.indexOf('<' + $1.toLowerCase() + '>') > -1 ? $0 : '';
});
}
And then running the urlify function:
function urlify(text) {
var exp = /(\b(http):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1'>$1</a>");
}
See my fiddle to try it out: http://jsfiddle.net/gerbenzomp/UJMeR/3/

Extract values from href attribute string using JQuery

I would like to extract values from href attribute string using JQuery
$(this).attr("href")
will give me
?sortdir=ASC&sort=Vendor_Name
What i need is these values parsed into an array
myArray['sort']
myArray['sortdir']
Any ideas?
Thanks!
BTW , I saw somewhere else on SO the following similar idea to be used with a query string.
I could not tweak it for my needs just yet.
var urlParams = {};
(function () {
var match,
pl = /\+/g, // Regex for replacing addition symbol with a space
search = /([^&=]+)=?([^&]*)/g,
decode = function (s) { return decodeURIComponent(s.replace(pl, " ")); },
query = window.location.search.substring(1);
while (match = search.exec(query))
urlParams[decode(match[1])] = decode(match[2]);
})();
Try the following:
var href = $(this).attr("href")
href = href.replace('?', "").split('&');
var myArr = {};
$.each(href, function(i, v){
var s = v.split('=');
myArr[s[0]] = s[1];
});
DEMO
Try this
function getURLParameter(name, string) {
return decodeURI(
(RegExp(name + '=' + '(.+?)(&|$)').exec(string)||[,null])[1]
);
}
var string = $(this).attr("href");
alert(getURLParameter("sort",string));
Demo here http://jsfiddle.net/jbHa6/ You can change the var string value and play around.
EDIT
Removed the second example, since that code is not that good and does not serve the purpose.
Perhaps is there a better solution but to be quick I should have do something like that
var url = $(this).attr("href");
url = url.replace("?sortdir=", "");
url = url.replace("sort=", "");
myArray['sort'] = url.split("&")[1]; // Or use a temporary tab for your split
myArray['sortdir'] = url.split("&")[0];
That solution depends if your url is still like ?sortdir=ASC&sort=Vendor_Name
You could use jQuery BBQ's deparam function from here:
http://benalman.com/code/projects/jquery-bbq/examples/deparam/

Categories

Resources