Regex to validate a comma separated list of unique numbers - javascript

I am trying to validate a comma separated list of numbers 1-7 unique (not repeating).
i.e.
2,4,6,7,1 is valid input.
2,2,6 is invalid
2 is valid
2, is invalid
1,2,3,4,5,6,7,8 is invalid ( only 7 number)
I tried ^[1-7](?:,[1-7])*$ but it's accepting repeating numbers
var data = [
'2,4,6,7,1',
'2,2,6',
'2',
'2,',
'1,2,3,2',
'1,2,2,3',
'1,2,3,4,5,6,7,8'
];
data.forEach(function(str) {
document.write(str + ' gives ' + /(?!([1-7])(?:(?!\1).)\1)^((?:^|,)[1-7]){1,7}$/.test(str) + '<br/>');
});

Regex are not suited for this. You should split the list into an array and try the different conditions:
function isValid(list) {
var arrList = list.split(",");
if (arrList.length > 7) { // if more than 7, there are duplicates
return false;
}
var temp = {};
for (var i in arrList) {
if (arrList[i] === "") return false; // if empty element, not valid
temp[arrList[i]] = "";
}
if (Object.keys(temp).length !== arrList.length) { // if they're not of same length, there are duplicates
return false;
}
return true;
}
console.log(isValid("2,4,6,7,1")); // true
console.log(isValid("2,2,6")); // false
console.log(isValid("2")); // true
console.log(isValid("2,")); // false
console.log(isValid("1,2,3,4,5,6,7,8")); // false
console.log(isValid("1,2,3")); // true
console.log(isValid("1,2,3,7,7")); // false

No RegEx is needed:
This is much more maintainable and explicit than a convoluted regular expression would be.
function isValid(a) {
var s = new Set(a);
s.delete(''); // for the hanging comma case ie:"2,"
return a.length < 7 && a.length == s.size;
}
var a = '2,4,6,7,1'.split(',');
alert(isValid(a)); // true
a = '2,2,6'.split(',');
alert(isValid(a)); // false
a = '2'.split(',');
alert(isValid(a)); // true
a = '2,'.split(',');
alert(isValid(a)); // false
'1,2,3,4,5,6,7,8'.split(',');
alert(isValid(a)); // false

You were pretty close.
^ # BOS
(?! # Validate no dups
.*
( [1-7] ) # (1)
.*
\1
)
[1-7] # Unrolled-loop, match 1 to 7 numb's
(?:
,
[1-7]
){0,6}
$ # EOS
var data = [
'2,4,6,7,1',
'2,2,6',
'2',
'2,',
'1,2,3,2',
'1,2,2,3',
'1,2,3,4,5,6,7,8'
];
data.forEach(function(str) {
document.write(str + ' gives ' + /^(?!.*([1-7]).*\1)[1-7](?:,[1-7]){0,6}$/.test(str) + '<br/>');
});
Output
2,4,6,7,1 gives true
2,2,6 gives false
2 gives true
2, gives false
1,2,3,2 gives false
1,2,2,3 gives false
1,2,3,4,5,6,7,8 gives false
For a number range that exceeds 1 digit, just add word boundary's around
the capture group and the back reference.
This isolates a complete number.
This particular one is numb range 1-31
^ # BOS
(?! # Validate no dups
.*
( # (1 start)
\b
(?: [1-9] | [1-2] \d | 3 [0-1] ) # number range 1-31
\b
) # (1 end)
.*
\b \1 \b
)
(?: [1-9] | [1-2] \d | 3 [0-1] ) # Unrolled-loop, match 1 to 7 numb's
(?: # in the number range 1-31
,
(?: [1-9] | [1-2] \d | 3 [0-1] )
){0,6}
$ # EOS
var data = [
'2,4,6,7,1',
'2,2,6',
'2,30,16,3',
'2,',
'1,2,3,2',
'1,2,2,3',
'1,2,3,4,5,6,7,8'
];
data.forEach(function(str) {
document.write(str + ' gives ' + /^(?!.*(\b(?:[1-9]|[1-2]\d|3[0-1])\b).*\b\1\b)(?:[1-9]|[1-2]\d|3[0-1])(?:,(?:[1-9]|[1-2]\d|3[0-1])){0,6}$/.test(str) + '<br/>');
});

Like other commenters, I recommend you to use something other than regular expressions to solve your problem.
I have a solution, but it is too long to be a valid answer here (answers are limited to 30k characters). My solution is actually a regular expression in the language-theory sense, and is 60616 characters long. I will show you here the code I used to generate the regular expression, it is written in Python, but easily translated in any language you desire. I confirmed that it is working in principle with a smaller example (that uses only the numbers 1 to 3):
^(2(,(3(,1)?|1(,3)?))?|3(,(1(,2)?|2(,1)?))?|1(,(3(,2)?|2(,3)?))?)$
Here's the code used to generate the regex:
def build_regex(chars):
if len(chars) == 1:
return list(chars)[0]
return ('('
+
'|'.join('{}(,{})?'.format(c, build_regex(chars - {c})) for c in chars)
+
')')
Call it like this:
'^' + build_regex(set("1234567")) + "$"
The concept is the following:
To match a single number a, we can use the simple regex /a/.
To match two numbers a and b, we can match the disjunction /(a(,b)?|b(,a)?)/
Similarily, to match n numbers, we match the disjunction of all elements, each followed by the optional match for the subset of size n-1 not containing that element.
Finally, we wrap the expression in ^...$ in order to match the entire text.

Edit:
Fixed error when repeating digit wasn't the first one.
One way of doing it is:
^(?:(?:^|,)([1-7])(?=(?:,(?!\1)[1-7])*$))+$
It captures a digit and then uses a uses a look-ahead to make sure it doesn't repeats itself.
^ # Start of line
(?: # Non capturing group
(?: # Non capturing group matching:
^ # Start of line
| # or
, # comma
) #
([1-7]) # Capture digit being between 1 and 7
(?= # Positive look-ahead
(?: # Non capturing group
, # Comma
(?!\1)[1-7] # Digit 1-7 **not** being the one captured earlier
)* # Repeat group any number of times
$ # Up to end of line
) # End of positive look-ahead
)+ # Repeat group (must be present at least once)
$ # End of line
var data = [
'2,4,6,7,1',
'2,2,6',
'2',
'2,',
'1,2,3,4,5,6,7,8',
'1,2,3,3,6',
'3,1,5,1,8',
'3,2,1'
];
data.forEach(function(str) {
document.write(str + ' gives ' + /^(?:(?:^|,)([1-7])(?=(?:,(?!\1)[1-7])*$))+$/.test(str) + '<br/>');
});
Note! Don't know if performance is an issue, but this does it in almost half the number of steps compared to sln's solution ;)

Related

Struggling with RegEx validation and formating for specfici ID format

I have couple specific string formatting i want to achieve for different entities:
Entity 1: 1111-abcd-1111 or 1111-abcd-111111
Entity 2: [10 any symbol or letter(all cap) or number]-[3 letters]
Entity 3: [3 letters all cap]-[3 any]-[5 number]
Not sure if Regex is best approach, because i also want to use this as validator when user starts typing the char's it will check against that Entity selected and then against it's RegEx
Here is a regex with some input strings:
const strings = [
'1111-abcd-1111', // match
'1111-abcd-111111', // match
'1111-abcd-1111111', // no match
'ABCS#!%!3!-ABC', // match
'ABCS#!%!3!-ABCD', // nomatch
'ABC-#A3-12345', // match
'ABC-#A3-1234' // no match
];
const re = /^([0-9]{4}-[a-z]{4}-[0-9]{4,6}|.{10}-[A-Za-z]{3}|[A-Z]{3}-.{3}-[0-9]{5})$/;
strings.forEach(str => {
console.log(str + ' => ' + re.test(str));
});
Result:
1111-abcd-1111 => true
1111-abcd-111111 => true
1111-abcd-1111111 => false
ABCS#!%!3!-ABC => true
ABCS#!%!3!-ABCD => false
ABC-#A3-12345 => true
ABC-#A3-1234 => false
Explanation of regex:
^ - anchor text at beginning, e.g. what follows must be at the beginning of the string
( - group start
[0-9]{4}-[a-z]{4}-[0-9]{4,6} - 4 digits, -, 4 lowercase letters, -, 4-6 digits
| - logical OR
.{10}-[A-Za-z]{3} - any 10 chars, -, 3 letters
| - logical OR
[A-Z]{3}-.{3}-[0-9]{5} - 3 uppercase letters, -, any 3 chars, -, 5 digits
) - group end
$ - anchor at end of string
Your definition is not clear; you can tweak the regex as needed.

What will be the best regex Expression for censoring email?

Hello I am stuck on a problem for censoring email in a specific format, but I am not getting how to do that, please help me!
Email : exampleEmail#example.com
Required : e***********#e******.com
Help me getting this in javascript,
Current code I am using to censor :
const email = exampleEmail#example.com;
const regex = /(?<!^).(?!$)/g;
const censoredEmail = email.replace(regex, '*');
Output: e**********************m
Please help me getting e***********#e******.com
You can use
const email = 'exampleEmail#example.com';
const regex = /(^.|#[^#](?=[^#]*$)|\.[^.]+$)|./g;
const censoredEmail = email.replace(regex, (x, y) => y || '*');
console.log(censoredEmail );
// => e***********#e******.com
Details:
( - start of Group 1:
^.| - start of string and any one char, or
#[^#](?=[^#]*$)| - a # and any one char other than # that are followed with any chars other than # till end of string, or
\.[^.]+$ - a . and then any one or more chars other than . till end of string
) - end of group
| - or
. - any one char.
The (x, y) => y || '*' replacement means the matches are replaced with Group 1 value if it matched (participated in the match) or with *.
If there should be a single # present in the string, you can capture all the parts of the string and do the replacement on the specific groups.
^ Start of string
([^\s#]) Capture the first char other than a whitespace char or # that should be unmodified
([^\s#]*) Capture optional repetitions of the same
# Match literally
([^\s#]) Capture the first char other than a whitespace char or # after it that should be unmodified
([^\s#]*) Capture optional repetitions of the same
(\.[^\s.#]+) Capture a dot and 1+ other chars than a dot, # or whitespace char that should be unmodified
$ End of string
Regex demo
In the replacement use all 5 capture groups, where you replace group 2 and 4 with *.
const regex = /^([^\s#])([^\s#]*)#([^\s#])([^\s#]*)(\.[^\s.#]+)$/;
[
"exampleEmail#example.com",
"test"
].forEach(email =>
console.log(
email.replace(regex, (_, g1, g2, g3, g4, g5) =>
`${g1}${"*".repeat(g2.length)}#${g3}${"*".repeat(g4.length)}${g5}`)
)
);

Input string test with a regex in JavaScript

I am trying to test a input string of format AWT=10:15;, based on this regex that I wrote: (([AWT]\w\S=)?(\d{0,9}:)?(\d{0,9});).
Problem 1: I am receiving this error: Uncaught TypeError: Cannot read property '0' of undefined when I try to match the string with the regex.
Problem 2: The string seems to come off as valid even after I enter this: AWT=10:15;12 which shouldn't be the case.
Here is my code:
var reg = new RegExp('(([AWT]\w\S=)?(\d{0,9}:)?(\d{0,9});)');
var x = $('td').find('.awt')[0].value;
console.log(x); // AWT=10:15;
console.log(String(x).match(reg)); // [";", ";", undefined, undefined, "", index: 9, input: "AWT=10:15;", groups: undefined]
if(String(x).match(reg)){
console.log("valid");
}else{
console.log("Invalid")
}
I was wondering if anyone can help me figure out the right regex for the string.
PS: The string needs to be in that exact format: (AWT=[0,9]:[0,9];).
Your regex (([AWT]\w\S=)?(\d{0,9}:)?(\d{0,9});) means:
( # start group 1
( # start group 2
[AWT] # 1 character A or W or T
\w # 1 word character <=> [A-Za-z0-9_]
\S # 1 non space character
= # equal sign
)? # end group 2, optional
( # start group 3
\d{0,9} # from 0 upto 9 digits
: # colon
)? # end group 3, optional
( # start group 4
\d{0,9} # from 0 upto 9 digits
) # end group 4
; # semicolon
) # end group 1
It matches AWT=10:15; as well as:
blahAWT=123456789:123456789;blah
Wx!=123;
12345;
Not sure it's what you want!
According to your requierement “The string needs to be in that exact format: AWT=[0,9]:[0,9];”, this regex will do the job: ^AWT=\d\d:\d\d;$
Explanation:
^ # beginning of string
AWT= # literally AWT=
\d\d # 2 digits
: # a colon
\d\d # 2 digits
; # a semicolon
$ # end of string
Use it this way:
var reg = new RegExp('^AWT=\\d\\d:\\d\\d;$');
or, better:
var reg = /^AWT=\d\d:\d\d;$/;
#user120242 is totally right about the cause of your bug.
I just want to add another option instead of escaping the characters. If you use /someRegex/ this is not necessary.
Moreover, if you are always expecting AWT at the beginning, this ([AWT]\w\S=) is not optimal. It means: either A, W or T, then a word character, then a non space character and then =. If it always starts with AWT= use (AWT=).
var reg = /(([AWT]\w\S=)?(\d{0,9}:)?(\d{0,9});)/
var x = $('td').find('.awt')[0].value;
console.log(x); // AWT=10:15;
console.log(String(x).match(reg));
if (String(x).match(reg)) {
console.log("valid");
} else {
console.log("Invalid")
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<table>
<tr>
<td><input class="awt" value="AWT=10:15;"></td>
</tr>
</table>
You need to escape backslashes in the string, or \w becomes w
You need to use ^$ to match on beginning and end, so it only matches if the full string matches
Another option depending on what you are trying to do, is to use negative lookahead (?!.*)
or a capture group behind the last (;.*) where you would check matches for the final match group for content and reject based on that.
console.log('(([AWT]\w\S=)?(\d{0,9}:)?(\d{0,9});)'); // this is not what you want
function matcher(){
var reg = new RegExp('^(([AWT]\\w\\S=)?(\\d{1,9}:)?(\\d{1,9});)$');
var x = $('td').find('input')[0].value;
console.log(x); // AWT=10:15;
console.log(x.match(reg)); // [";", ";", undefined, undefined, "", index: 9, input: "AWT=10:15;", groups: undefined]
if (reg.test(x)) {
console.log("valid");
} else {
console.log("Invalid")
}
}
matcher();
$('td').find('input').on('input',matcher);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<table><tr>
<td><input class="awt" value="AWT=10:15;"></td></tr></table>

Inverting a rather complex set of regexes

I'm sort of new to regular expressions, and none of the solutions I found online helped/worked.
I'm dealing with a one-line String in JavaScript, it'll contain five types of data mixed in.
A "#" followed by six numbers/letters (HTML color) (/#....../g)
A forward slash followed by any of a few specific characters (/\/(\+|\^|\-|#|!\+|_|#|\*|%|&|~)/g)
A "$" followed by a sequence of letters and a "|" (/\$([^\|]+)/g)
A "|" alone (/\|/g)
Alphanumeric characters that do not fall under any of these categories
The thing is, I have regexes to match the first four categories, that are important.
The problem is that I need a single Regex that I'll use to replace all the characters that DO NOT match for the first four regexes with a single character, such as "§".
Example:
This#00CC00 is green$Courier| and /^mono|spaced
§§§§#00CC00§§§§§§§§§$Courier|§§§§§/^§§§§|§§§§§§
I know I may be attacking this problem the wrong way, I'm rather new to regular expressions.
Essentially, how do I make a regex that means "anything that doesn't have any matches for regexes x, y, or z"?
Thank you for your time.
use this pattern
((#\w{6}|\/[\/\(\+\^\-]|\$\w+\||\|)*).
and replace w/ $1§
Downside is your preserved pattern has to be followed by at least one character
Demo
( # Capturing Group (1)
( # Capturing Group (2)
# # "#"
\w # <ASCII letter, digit or underscore>
{6} # (repeated {6} times)
| # OR
\/ # "/"
[\/\(\+\^\-] # Character Class [\/\(\+\^\-]
| # OR
\$ # "$"
\w # <ASCII letter, digit or underscore>
+ # (one or more)(greedy)
\| # "|"
| # OR
\| # "|"
) # End of Capturing Group (2)
* # (zero or more)(greedy)
) # End of Capturing Group (1)
. # Any character except line break
Code copied from Regex101
var re = /((#\w{6}|\/[\/\(\+\^\-]|\$\w+\||\|)*)./gm;
var str = 'This#00CC00 is green$Courier| and /^mono|spaced|\n';
var subst = '$1§';
var result = str.replace(re, subst);
This isn't as efficient as a working regular expression but it works. Basically it gets all of the matches and fills the parts between with § characters. One nice thing is you don't have to be a regular expression genius to update it, so hopefully more people can use it.
var str = 'This#00CC00 is green$Courier| and /^mono|spaced';
var patt=/#(\d|\w){6}|\/(\+|\^|\-|#|!\+|_|#|\*|%|&|~)|\$([^\|]+)\||\|/g;
var ret = "";
pos = [];
while (match=patt.exec(str)) {
pos.push(match.index);
pos.push(patt.lastIndex);
console.log(match.index + ' ' + patt.lastIndex);
}
for (var i=0; i<pos.length; i+=2) {
ret += Array(1+pos[i]- (i==0 ? 0 : pos[i-1])).join("§");
ret += str.substring(pos[i], pos[i+1]);
}
ret += Array(1+str.length-pos[pos.length-1]).join("§");
document.body.innerHTML = str +"<br>"+ret;
console.log(str);
console.log(ret);
demo here

Javascript regex assistance

I have the following javascript regex...
.replace("/^(a-zA-Z\-)/gi", '');
It isn't complete... in fact it's wrong. Essentially what I need to do is take a string like "XJ FC-X35 (1492)" and remove the () and it's contents and the whitespace before the parenthesis.
replace(/^(.*?)\s?\(([^)]+)\)$/gi, "$1$2")
Takes XJ FC-X35 (1492) and returns XJ FC-X351492.
Remove the $2 to turn XJ FC-X35 (1492) into XJ FC-X35, if that's what you wanted instead.
Long explanation
^ // From the start of the string
( // Capture a group ($1)
.*? // That contains 0 or more elements (non-greedy)
) // Finish group $1
\s? // Followed by 0 or 1 whitespace characters
\( // Followed by a "("
( // Capture a group ($2)
[ // That contains any characters in the following set
^) // Not a ")"
]+ // One or more times
) // Finish group $2
\)$ // Followed by a ")" followed by the end of the string.
Try this:
x = "XJ FC-X35 (1492)"
x.replace(/\s*\(.*?\)/,'');

Categories

Resources