Javascript regex urls that are not image extensions - javascript

I've got the following JavaScript which matches and replaces URLS with html links, however I need this to exclude urls which end in .png|.jpg|.jpeg|.gif
text = text.replace(
/(\b(?:https?|ftp):\/\/[a-z0-9-+&##\/%?=~_|!:,.;]*[a-z0-9-+&##\/%=~_|])/gim,
'$1'
);

You could just to a test before:
if (!text.match(/\.(png|jpg|jpeg|gif)$/) {
text = text.replace(
/(\b(?:https?|ftp):\/\/[a-z0-9-+&##\/%?=~_|!:,.;]*[a-z0-9-+&##\/%=~_|])/gim,
'$1'
);
}
If you need to do multiple replacements, then you could use a custom replace function that checks the match against the image endings and acts accordingly. That would work like this:
var imageRegex = /\.(png|jpg|jpeg|gif)$/;
text = text.replace(/(\b(?:https?|ftp):\/\/[a-z0-9-+&##\/%?=~_|!:,.;]*[a-z0-9-+&##\/%=~_|])/gim,
function(str) {
if (str.match(imageRegex)) {
return(str);
} else {
return('' + str + '');
}
});

Related

Convert Text into Hyperlinks <a> and Image tags <img>

I am using this function to convert text into clickable links ( ) and image urls into img tags (<img src="" />).
const parsecontent = (text)=>{
var ishyperlink= /([^\S]|^)((https?\:\/\/)(\S+))/gi;
return (text || "").replace(ishyperlink,
function(match, space, url){
var isimglink = /https?:\/\/.*\.(?:png|jpg|gif|jpeg)/i;
if (url.match(isimglink)) {
return space + '<img src="' + url + '" />';
}
return space + '' + url + '';
}
);
}
Using like this
const div = document.querySelector("#content");
div.innerHTML = parsecontent(div.innerHTML);
It works fine, if the content with proper spaces. It links, image urls not separated with spaces it fails. Can you pls help me to fix this?
Codepen here https://codepen.io/dagalti/pen/GRqpYLp
I came up with a regex that's tailored to satisfy your test cases.
This regex is how we will match for a URL:
((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))
In your example, we have 2 urls together with no whitespace in between. What happens if we have a url and a non-url string with no whitespace between? We could terminate the match when it reaches a TLD (such as .com, .org, etc). To satisfy your example however, I am making the assumption that we are dealing with 2 urls separated by no whitespace. In that case, we want to terminate the match when we notice the start of a new url (?=https?):
([^\S]|^)((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))(?=https?)
Next we want to match the urls that are surrounded by whitespace:
((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))
Putting all this together we get this regex:
([^\S]|^)((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))(?=https?)|((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))
We have to adjust your replace logic a bit too:
const parsecontent = (text)=>{
var ishyperlink= /([^\S]|^)((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))(?=https?)|((https?\:\/\/)([\w!"#$%&\'()*+,-./#:;=\^_`{|}~]*))/gi;
return (text || "").replace(ishyperlink,
function(url){
var isimglink = /https?:\/\/.*\.(?:png|jpg|gif|jpeg)/i;
if (url.match(isimglink)) {
return '<img src="' + url + '" />';
}
return '' + url + '';
}
);
}
This regex is quite verbose, there may be more succinct ways of satisfying your test cases (I'm not a regexpert though)

How to clean string from word characters, Javascript

I'm trying to clean strings which has been transformed from word text but I'm stuck on removing special character '…'
By click on button "clean", script removes all dots and only one special character, however I need to remove all of them
Where is my mistake?
Here is my code and plunker with struggles
$scope.string = "My transformed string ………….........…...."
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
var reg = /\./g;
var hellip = /…/g
angular.forEach(em, function (item) {
if(item.innerText.match(reg)){
item.innerText = process(item.innerText)
}
if (item.innerText.match(hellip)){
item.innerText = item.innerText.replace("…", "")
}
});
};
function process( str ) {
return str.replace( /^([^.]*\.)(.*)$/, function ( a, b, c ) {
return b + c.replace( /\./g, '' );
});
}
There's a few problems here, but they can all be resolved by simply reducing the code to a single regex replace within process that will handle both periods and … entities:
$scope.removeDots = function () {
var em = document.getElementsByTagName('em');
angular.forEach(em, function (item) {
item.innerText = process(item.innerText)
});
};
function process( str ) {
return str.replace( /\.|…/g, '');
}
});
Plunker demo
You replace every occurrence of . in process, but only replace … once.
I don't see why don't you just do something like .replace(/(\.|…)/g, ''); the g modifier makes sure every match is replaced.
You can do both replacements by first replacing the occurrences of … with one point (because it might be the only thing you find), and then replacing any sequence of points by one:
function process( str ) {
return str.replace(/…/g, '.').replace(/\.\.+/g, '.');
}
var test="My transformed string ………….........…....";
console.log(process(test));
One of the reasons your code did not replace everything, is that you used a string as find argument, which will result in one replacement only. By using the regular expression as find argument you can get the effect of the g modifier.

regular expression to replace links

I have following code to create html links in a plain text. This works fine but the link should not contain .png or .jpg
Any suggestions in adapting the regexp?
var urlPattern = /(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?/gi;
return function(text, target) {
var replace = text.replace(urlPattern, '<a target="' + target + '" href="$&">$&</a>');
return replace
};
You can add anchors and a look-ahead with alternatives to add the restriction:
var urlPattern = /^(?!.*(?:png|jpg)$)(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?$/gi;
function repl (text, target) {
var replace = text.replace(urlPattern, '<a target="' + target + '" href="$&">$&</a>');
return replace;
};
alert(repl("http://some.site.com/new/1.gif", "_blank"));
alert(repl("http://some.site.com/new/1.png", "_blank"));
The crucial part here is ^(?!.*(?:png|jpg)$): it makes the check start at the beginning of a string, and makes sure there is no png nor jpg at the end.
If you pass longer strings with URLs inside, you can use the following regex that assumes you have no spaces in your URLs:
var urlPattern = /(?!\S*(?:png|jpg)(?:$|\s))(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?/gi;

Replace Regex Multiple Matches between two strings

I'm trying to turn some text into html, but I'm having trouble building a table. Each cell is surrounded by brackets [].
I've already got the rows set:
<tr>[blue][red][yellow][purple][white]</tr>
Now I need to replace turn each cell into a <td>.
This would work fine:
.replace(/\[(.*?)\]/g,'<td>$1</td>')
but I don't want to happen in other parts of the document, just when it's in between tags.
This makes sense to me, but just doesn't work:
.replace(/(<tr>.*?)\[(.*?)\](.*?\<\/tr\>)/g,'$1<td>$2</td>$3')
here is a full code if you want to try it:
alert('<tr>[blue][red][yellow][purple][white]</tr>'.replace(/(<tr>.*?)\[(.*?)\](.*?\<\/tr\>)/g,'$1<td>$2</td>$3'));
it outputs:
<tr><td>blue</td>[red][yellow][purple][white]</tr>
expected output:
<tr><td>blue</td><td>red</td><td>yellow</td><td>purple</td><td>white</td></tr>
You can do this easily in two steps:
var str = "<div>[do not replace]</div><table><tr>[blue][red][yellow][purple][white]</tr></table>";
str = str.replace(/(<tr[\s\S]*?>)([\s\S]*?)(<\/tr>)/g, function(m, start, contents, end) {
return start + contents.replace(/\[([\s\S]*?)\]/g, "<td>$1</td>") + end;
});
alert(str);
Note that I replaced what should have been . with [\s\S] to simulate the s regex option that JS unfortunately lacks.
A way that use a split/join in the replacement function:
var str = '<tr>[blue][red][yellow][purple][white]</tr>';
var res = str.replace(/<tr>(?:\[[^\]]*\])+<\/tr>/g,
function (m) {
return '<tr><td>'
+ m.substring(5, m.length-6).split('][').join('</td><td>')
+ '</td><tr>';
});
console.log(res);

Live replacement for regular expressions with Javascript

I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.
If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}
You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...
If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation"​​​​​​​).addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();​
And the fiddle is : http://jsfiddle.net/jFYuV
In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"

Categories

Resources