Regex to validate a texarea input which must be URLs separated by new lines

Regex to validate a texarea input which must be URLs separated by new lines - javascript

I am trying to create a regex which will ultimately be used with Google Forms to validate a texarea input.
The rule is,
Input area can have one or more URLs (http or https)
Each URL must be separated either by one or more new lines
Each line which has text, must be a single valid URL
Last URL may have or may not have new line character/s after it
Till now, I have written this regex ^(https?://.+[\r\n]+)*(https?://.+[\r\n]+?)$ but the problem is that if a line has more than 1 url, it validates that too.
Here is my testing playground: http://goo.gl/YPdvBH.

Here is what you are looking for
Demo , Demo with your URLS
function validate(ele) {
str = ele.value;
str = str.replace(/\r/g, "");
while (/\s\n/.test(str)) {
str = str.replace(/\s\n/g, "\n");
}
while (/\n\n/.test(str)) {
str = str.replace(/\n\n/g, "\n");
}
ele.value = str;
str = str.replace(/\n/g, "_!_&_!_").split("_!_&_!_")
var result = [], counter = 0;
for (var i = 0; i < str.length; i++) {
str[i] = str[i].replace(/(?:(?:^|\n)\s+|\s+(?:$|\n))/g, '').replace(/\s+/g, ' ');
if(str[i].length !== 0){
if (isValidAddress(str[i])) {
result.push(str[i]);
}
counter += 1;
}
}
function isValidAddress(s) {
return /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(s)
}
return (result.length === str.length);
}
var ele = document.getElementById('urls');
validate(ele);

This is closer to the regex you are looking for:
^(https?://[\S]+[\r\n]+)*(https?://[\S]+[\r\n]+?)$
The difference between your regex and this one is that you use .+ which will match all characters except newline whereas I use [\S]+ (note it is a capital S) which will match all non-whitespace characters. So, this doesn't match more than one token on one line. Hence, on each line you can match at max one token and that must be of the form that you have defined.
For a regex to match a single URL, look at this question on StackOverflow:
What is the best regular expression to check if a string is a valid URL?
I don't know whether google-forms have a length limit. But if they have, it is sure to almost bounce into it.

If i understand right - in your regexp missing m flag for multiline, so you need something like this
/^(https?://.+this your reg exp for one url)$/m
sample with regexp from Javascript URL validation regex
/^(ht|f)tps?:\/\/[a-z0-9-\.]+\.[a-z]{2,4}\/?([^\s<>\#%"\,\{\}\\|\\\^\[\]`]+)?$/m

Related

String replace a url with part of the same url in javascript

I have string that contains a random url:
http://google.com/vocab/prefix#Billy
That needs to be transformed so that everything up to, and including the first # is replaced with the value between the last / and the first # followed by a :.
The result would be:
prefix:Billy
More examples:
http://some.url/a/path#elephant --> path:elephant
http://random.com/cool/black/beans/thing#Bob --> thing:bob
I understand how to capture the prefix part /([^\/]+(?=#))/, but I'm struggling to do a string replace because I can't figure out how to capture the part I need to replace.
myString.replace(/([^\/]+(?=#))/, '$1:')
I would prefer to use string.replace with regex if at all possible

When using replace method you need to match all the patterns you want to replace instead of just the part you need to keep; Here are two options:
let s = 'http://google.com/vocab/prefix#Billy'
// using greedy regex
console.log(s.replace(/.*\/([^#]+)#/, '$1:'))
// adapted from OP's attempt
console.log(s.replace(/.*?([^\/]+?)#/, '$1:'))
Note .* part to match the substring you want to discard, () to capture the pattern you want to keep, then reformat the output.

Try this code:
var myString = "http://google.com/vocab/prefix#Billy";
var hashIndex = myString.indexOf("#"); // find where the "#" is
for(var i = hashIndex; i > 0; i--) { // loop from "#" index *back* to closest "/" symbol
if(myString[i] == "/") break; // break loop when "/" is found
}
myString = myString.replace("#", ":"); // replace "#" with ":" as in your example
console.log(myString.substring(i, hashIndex); // output
Shortened:
var myString = "http://google.com/vocab/prefix#Billy".replace("#",":");
for(var i = myString.indexOf(":"); i > 0; i--) { if(myString[i] == "/") break; }
console.log(myString.substring(i, myString.indexOf(":");

How to remove string between two characters every time they occur [duplicate]

This question already has answers here:
Strip HTML from Text JavaScript
(44 answers)
removing html tags from string
(3 answers)
Closed 7 years ago.
I need to get rid of any text inside < and >, including the two delimiters themselves.
So for example, from string
<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>
I would like to get this one
that
This is what i've tried so far:
var str = annotation.split(' ');
str.substring(str.lastIndexOf("<") + 1, str.lastIndexOf(">"))
But it doesn't work for every < and >.
I'd rather not use RegEx if possible, but I'm happy to hear if it's the only option.

You can simply use the replace method with /<[^>]*>/g.It matches < followed by [^>]* any amount of non> until > globally.
var str = '<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>';
str = str.replace(/<[^>]*>/g, "");
alert(str);

For string removal you can use RegExp, it is ok.
"<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>".replace(/<\/?[^>]+>/g, "")

Since the text you want is always after a > character, you could split it at that point, and then the first character in each String of the array would be the character you need. For example:
String[] strings = stringName.split("<");
String word = "";
for(int i = 0; i < strings.length; i++) {
word += strings[i].charAt(0);
}
This is probably glitchy right now, but I think this would work. You don't need to actually remove the text between the "<>"- just get the character right after a '>'

Using a regular expression is not the only option, but it's a pretty good option.
You can easily parse the string to remove the tags, for example by using a state machine where the < and > characters turns on and off a state of ignoring characters. There are other methods of course, some shorter, some more efficient, but they will all be a few lines of code, while a regular expression solution is just a single replace.
Example:
function removeHtml1(str) {
return str.replace(/<[^>]*>/g, '');
}
function removeHtml2(str) {
var result = '';
var ignore = false;
for (var i = 0; i < str.length; i++) {
var c = str.charAt(i);
switch (c) {
case '<': ignore = true; break;
case '>': ignore = false; break;
default: if (!ignore) result += c;
}
}
return result;
}
var s = "<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>";
console.log(removeHtml1(s));
console.log(removeHtml2(s));

There are several ways to do this. Some are better than others. I haven't done one lately for these two specific characters, so I took a minute and wrote some code that may work. I will describe how it works. Create a function with a loop that copies an incoming string, character by character, to an outgoing string. Make the function a string type so it will return your modified string. Create the loop to scan from incoming from string[0] and while less than string.length(). Within the loop, add an if statement. When the if statement sees a "<" character in the incoming string it stops copying, but continues to look at every character in the incoming string until it sees the ">" character. When the ">" is found, it starts copying again. It's that simple.
The following code may need some refinement, but it should get you started on the method described above. It's not the fastest and not the most elegant but the basic idea is there. This did compile, and it ran correctly, here, with no errors. In my test program it produced the correct output. However, you may need to test it further in the context of your program.
string filter_on_brackets(string str1)
{
string str2 = "";
int copy_flag = 1;
for (size_t i = 0 ; i < str1.length();i++)
{
if(str1[i] == '<')
{
copy_flag = 0;
}
if(str1[i] == '>')
{
copy_flag = 2;
}
if(copy_flag == 1)
{
str2 += str1[i];
}
if(copy_flag == 2)
{
copy_flag = 1;
}
}
return str2;
}

Javascript Remove strings in beginning and end

base on the following string
...here..
..there...
.their.here.
How can i remove the . on the beginning and end of string like the trim that removes all spaces, using javascript
the output should be
here
there
their.here

These are the reasons why the RegEx for this task is /(^\.+|\.+$)/mg:
Inside /()/ is where you write the pattern of the substring you want to find in the string:
/(ol)/ This will find the substring ol in the string.
var x = "colt".replace(/(ol)/, 'a'); will give you x == "cat";
The ^\.+|\.+$ in /()/ is separated into 2 parts by the symbol | [means or]
^\.+ and \.+$
^\.+ means to find as many . as possible at the start.
^ means at the start; \ is to escape the character; adding + behind a character means to match any string containing one or more that character
\.+$ means to find as many . as possible at the end.
$ means at the end.
The m behind /()/ is used to specify that if the string has newline or carriage return characters, the ^ and $ operators will now match against a newline boundary, instead of a string boundary.
The g behind /()/ is used to perform a global match: so it find all matches rather than stopping after the first match.
To learn more about RegEx you can check out this guide.

Try to use the following regex
var text = '...here..\n..there...\n.their.here.';
var replaced = text.replace(/(^\.+|\.+$)/mg, '');

Here is working Demo
Use Regex /(^\.+|\.+$)/mg
^ represent at start
\.+ one or many full stops
$ represents at end
so:
var text = '...here..\n..there...\n.their.here.';
alert(text.replace(/(^\.+|\.+$)/mg, ''));

Here is an non regular expression answer which utilizes String.prototype
String.prototype.strim = function(needle){
var first_pos = 0;
var last_pos = this.length-1;
//find first non needle char position
for(var i = 0; i<this.length;i++){
if(this.charAt(i) !== needle){
first_pos = (i == 0? 0:i);
break;
}
}
//find last non needle char position
for(var i = this.length-1; i>0;i--){
if(this.charAt(i) !== needle){
last_pos = (i == this.length? this.length:i+1);
break;
}
}
return this.substring(first_pos,last_pos);
}
alert("...here..".strim('.'));
alert("..there...".strim('.'))
alert(".their.here.".strim('.'))
alert("hereagain..".strim('.'))
and see it working here : http://jsfiddle.net/cettox/VQPbp/

Slightly more code-golfy, if not readable, non-regexp prototype extension:
String.prototype.strim = function(needle) {
var out = this;
while (0 === out.indexOf(needle))
out = out.substr(needle.length);
while (out.length === out.lastIndexOf(needle) + needle.length)
out = out.slice(0,out.length-needle.length);
return out;
}
var spam = "this is a string that ends with thisthis";
alert("#" + spam.strim("this") + "#");
Fiddle-ige

Use RegEx with javaScript Replace
var res = s.replace(/(^\.+|\.+$)/mg, '');

We can use replace() method to remove the unwanted string in a string
Example:
var str = '<pre>I'm big fan of Stackoverflow</pre>'
str.replace(/<pre>/g, '').replace(/<\/pre>/g, '')
console.log(str)
output:
Check rules on RULES blotter

Why is my RegExp ignoring start and end of strings?

I made this helper function to find single words, that are not part of bigger expressions
it works fine on any word that is NOT first or last in a sentence, why is that?
is there a way to add "" to regexp?
String.prototype.findWord = function(word) {
var startsWith = /[\[\]\.,-\/#!$%\^&\*;:{}=\-_~()\s]/ ;
var endsWith = /[^A-Za-z0-9]/ ;
var wordIndex = this.indexOf(word);
if (startsWith.test(this.charAt(wordIndex - 1)) &&
endsWith.test(this.charAt(wordIndex + word.length))) {
return wordIndex;
}
else {return -1;}
}
Also, any improvement suggestions for the function itself are welcome!
UPDATE: example: I want to find the word able in a string, I waht it to work in cases like [able] able, #able1 etc.. but not in cases that it is part of another word like disable, enable etc

A different version:
String.prototype.findWord = function(word) {
return this.search(new RegExp("\\b"+word+"\\b"));
}
Your if will only evaluate to true if endsWith matches after the word. But the last word of a sentence ends with a full stop, which won't match your alphanumeric expression.

Did you try word boundary -- \b?
There is also \w which match one word character ([a-zA-Z_]) -- this could help you too (depends on your word definition).
See RegExp docs for more details.

If you want your endsWith regexp also matches the empty string, you just need to append |^$ to it:
var endsWith = /[^A-Za-z0-9]|^$/ ;
Anyway, you can easily check if it is the beginning of the text with if (wordIndex == 0), and if it is the end with if (wordIndex + word.length == this.length).
It is also possible to eliminate this issue by operating on a copy of the input string, surrounded with non-alphanumerical characters. For example:
var s = "#" + this + "#";
var wordIndex = this.indexOf(word) - 1;
But I'm afraid there is another problems with your function:
it would never match "able" in a string like "disable able enable" since the call to indexOf would return 3, then startsWith.test(wordIndex) would return false and the function would exit with -1 without searching further.
So you could try:
String.prototype.findWord = function (word) {
var startsWith = "[\\[\\]\\.,-\\/#!$%\\^&\*;:{}=\\-_~()\\s]";
var endsWith = "[^A-Za-z0-9]";
var wordIndex = ("#"+this+"#").search(new RegExp(startsWith + word + endsWith)) - 1;
if (wordIndex == -1) { return -1; }
return wordIndex;
}

How to find if a text contains url string

How can I find if text contains a url string. I mean if I have
Sometexthttp://daasddas some text
I want http://daasddas to be achored or maked as a link wit javascript

function replaceURLWithHTMLLinks(text)
{
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1'>$1</a>");
}

While the code above works good if all given URLs are full (http://mydomain.com), I had problems parsing a URL like:
www.mydomain.com
i.e. without a protocol.
So I added some simple code to the function:
var exp = /(\b(((https?|ftp|file|):\/\/)|www[.])[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var temp = text.replace(exp,"$1");
var result = "";
while (temp.length > 0) {
var pos = temp.indexOf("href=\"");
if (pos == -1) {
result += temp;
break;
}
result += temp.substring(0, pos + 6);
temp = temp.substring(pos + 6, temp.length);
if ((temp.indexOf("://") > 8) || (temp.indexOf("://") == -1)) {
result += "http://";
}
}
return result;
If someone should fine a more optimal solution to add a default protocol to URLs, let me know!

You have to use regex(Regular expressions) to find URL patterns in blocks of text.
Here's a link to same question and answers:
Regular Expression to find URLs in block of Text (Javascript)

I tweaked dperinis regex-url script so that a URL embedded in a string can be found. It will not find google.com, this is necessary if it's a user input field, the user might leave out the whitespace after a period/full stop. It will also find www.google.com, since hardly anyone types the protocol.
(?:((?:https?|ftp):\/\/)|ww)(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?
I tested it on www.regextester.com, it worked for me, if you encounter a problem, please comment.

you can use a regular expression to find an URL and replace it by the same with a leading and a trailing tag

Many of the solutions start getting very complex and hard to work with a variety of situations. Here's a function I created to capture any URL beginning with http/https/ftp/file/www. This is working like a charm for me, the only thing it doesn't add a link to is user entered URL's without an http or www at the beginning (i.e. google.com). I hope this solution is helpful for somebody.
function convertText(txtData) {
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
txtData = txtData.replace(urlRegex, '$1');
var urlRegex =/(\b(\swww).[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
txtData = txtData.replace(urlRegex, ' $1');
var urlRegex =/(>\swww)/ig;
txtData = txtData.replace(urlRegex, '>www');
var urlRegex =/(\"\swww)/ig;
txtData = txtData.replace(urlRegex, '"http://www');
return txtData;
}

function replaceURLWithHTMLLinksHere(text)
{
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1'>$1</a>");
}
Okay we got this regular expresion here in function.
/(\b(https?|ftp|file)://[-A-Z0-9+&##/%?=~|!:,.;]*[-A-Z0-9+&##/%=~|])/ig
Lets understand this.
/ / this is how a regex starts.
\b > is maching https or ftp or file that is unique and is in the start of string. these keywords should not have any character attatched to them in
begining like bbhttps or bbhttp it will not match these otherwise.
https? > here ? means zero or one of preceding character or group. In this case s is optional.
| > match one out of given just like OR.
() > create group to be matched
/ > means the next character is special and is not to be interpreted literally. For example, a 'b' without a preceding '\' generally matches lowercase
'b's wherever they occur. But a '\b' by itself doesn't match any character
[] > this is Character Classes or Character Sets. It is used to have a group of characters and only one character out of all will be present at a time.
[-A-Z0-9+&##/%?=~_|!:,.;]* > zero or more occurrences of the preceding element. For example, b*c matches "c", "bc", "bbc", "bbbc", and so on.
[-A-Z0-9+&##/%=~_|] > means one charactor out of these all.
i > Case-insensitive search.
g > Global search.

function replaceURLWithLinks(text){
var text = "";
text= text.replace(/\r?\n/g, '<br />');
var result = URI.withinString(text, function(url) {
return "<a href='"+url+"' target='_blank'>" + url + "</a>";
});
}

Develop Reference

JavaScript is the programming language of the Web.

Regex to validate a texarea input which must be URLs separated by new lines - javascript

If i understand right - in your regexp missing m flag for multiline, so you need something like this /^(https?://.+this your reg exp for one url)$/m sample with regexp from Javascript URL validation regex /^(ht|f)tps?:\/\/[a-z0-9-\.]+\.[a-z]{2,4}\/?([^\s<>\#%"\,\{\}\\|\\\^\[\]`]+)?$/m

Related

String replace a url with part of the same url in javascript

How to remove string between two characters every time they occur [duplicate]

Javascript Remove strings in beginning and end

Why is my RegExp ignoring start and end of strings?

How to find if a text contains url string

Categories

Resources