Parsing BBCode in Javascript - javascript

I am using this (http://coursesweb.net/javascript/convert-bbcode-html-javascript_cs) as my script for parsing BBCode. I have extended the BBCodes that it can process, however I am encountering a problem when a newline immediately follows an opening tag, e.g.
[code]
code....
[/code]
The problem does not occur if the code is 'inline'
[code]code....[/code]`
The regex being used to match what's inside these tags is (.*?) which I know does not match newlines. I have tried ([^\r\n]) to match newlines but this hasn't worked either.
I imagine it's a simple issue but I have little experience with regex so any help would be appreciated
EDIT: this is the full list of regex's I am using
var tokens = {
'URL' : '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:#|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:#\\/?|]+|%[\\dA-F]{2})*)?)))',
'LINK' : '([a-z0-9\-\./]+[^"\' ]*)',
'EMAIL' : '((?:[\\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*(?:[\\w\!\#$\%\'\*\+\-\/\=\?\^\`{\|\}\~]|&)+#(?:(?:(?:(?:(?:[a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(?:\\d{1,3}\.){3}\\d{1,3}(?:\:\\d{1,5})?))',
'TEXT' : '(.*?)',
'SIMPLETEXT' : '([a-zA-Z0-9-+.,_ ]+)',
'INTTEXT' : '([a-zA-Z0-9-+,_. ]+)',
'IDENTIFIER' : '([a-zA-Z0-9-_]+)',
'COLOR' : '([a-z]+|#[0-9abcdef]+)',
'NUMBER' : '([0-9]+)',
'ALL' : '([^\r\n])',
};
EDIT 2: Full JS for matching
var token_match = /{[A-Z_]+[0-9]*}/ig;
var _getRegEx = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
if (nrmatches <= 0) {
return new RegExp(preg_quote(str), 'g'); // no tokens so return the escaped string
}
for(; i < nrmatches; i += 1) {
// Remove {, } and numbers from the token so it can match the
// keys in tokens
var token = matches[i].replace(/[{}0-9]/g, '');
if (tokens[token]) {
// Escape everything before the token
replacement += preg_quote(str.substr(0, str.indexOf(matches[i]))) + tokens[token];
// Remove everything before the end of the token so it can be used
// with the next token. Doing this so that parts can be escaped
str = str.substr(str.indexOf(matches[i]) + matches[i].length);
}
}
replacement += preg_quote(str);
return new RegExp(replacement, 'gi');
};
var _getTpls = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
var positions = {};
var next_position = 0;
if (nrmatches <= 0) {
return str; // no tokens so return the string
}
for(; i < nrmatches; i += 1) {
// Remove {, } and numbers from the token so it can match the
// keys in tokens
var token = matches[i].replace(/[{}0-9]/g, '');
var position;
// figure out what $# to use ($1, $2)
if (positions[matches[i]]) {
position = positions[matches[i]];
} else {
// token doesn't have a position so increment the next position
// and record this token's position
next_position += 1;
position = next_position;
positions[matches[i]] = position;
}
if (tokens[token]) {
replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position;
str = str.substr(str.indexOf(matches[i]) + matches[i].length);
}
}
replacement += str;
return replacement;
};

This does the trick for me: (updated this one too to avoid confusion)
\[code\]([\s\S]*?)\[\/code\]
See regexpal and enter the following:
[code]
code....
[/code]
[code]code.... [/code]
Update:
Fixed the regex to the following and this works in the Chrome Console for me:
/\[code\]([\s\S]*?)\[\/code\]/g.exec("[code]hello world \n[/code]")

JavaScript does not handle multi-line RegExp matches. Instead you have to use the [\s\S] trick described in this SO answer. Perhaps?
/\[code\][\s\S]*\[code\]/
Also RegExps probably isn't the best choice for parsing syntax. It's is extremely over complicated. I would suggest parsing the string and building an Abstract Syntax Tree then rendering the HTML from that.

Related

Extract domain from a string using Javascript?

I have an string which include many website url but i want to extract the only url that is out side these bracket [ ].
Can someone correct this ?
Note : Output Must be www.google.com and it not necessary that domain name outside [ ] will come at the end of string.
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
// String can include https and instead of .com there can be .in
var arr = str.split("|");
function domainName(str) {
var match = str.match(/^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/);
if (match != null && match.length > 0) {
return match;
} else {
return null;
}
}
var domainname = domainName(str);
var domain = domainname;
console.log(domain);
Replace all occurrences of [[, followed by non-brackets, followed by ]] with a space::
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
const result = str.replace(/\[\[[^[\]]*\]\]/g, ' ');
console.log(result);
Then you can search for URLs in the replaced string.
As CertainPerformance Suggest you can exclude the url that is in [ ] using replace then by using regex you can extract the domain name. Below is the code :
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
var str = str.replace(/\[\[[^[\]]*\]\]/g, '');
var ptrn = /^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/g;
var i, value, domain, len, array;
array = str.split("|");
len = array.length;
for(i=0; len > i; i++) {
value = array[i].match(ptrn);
if (value !== null) {
domain = value;
}
else {
domain = "Not Found";
}
}
document.write("Domain is = ", domain);
Two main steps:
Create a regular expression that matches your desired pattern.
Use String.match()
Example:
// match all URLs
// let regex = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+#)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+#)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%#\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;
// match only the URL outside of the brackets
let regex = /(((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+#)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+#)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%#\.\w_]*)#?(?:[\.\!\/\\\w]*))?))(([^[\]]+)(?:$|\[))/g;
function getUrlsFromText(input) {
return input.match(regex);
}
console.log(getUrlsFromText('[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3'));
Note that I borrowed the URL matching part of the regular expression from here. If you don't want the query string to be included (as it is on the google.com match), you can modify the regex as desired.
this can be achieve by split
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
let ans=str.split("]")[6]
let finalAns=ans.split("&")[0]
console.log(finalAns)
var dirtySource = "https://example.com/subdirectory";
var dirtyPos = dirtySource.indexOf("://"); // works for http and https
var cleanSource = dirtySource.substr(dirtyPos+3);
cleanSource = cleanSource.split("/")[0]; // trim off subdirectories

Match or search string by comma separated values by jquery or js

I want to match/search string partially by js array to a string. my string and array example are below
var str = "host, gmail, yahoo";
var search = 'new#gmail.com';
i already tried as below:
if( str.split(',').indexOf(search) > -1 ) {
console.log('Found');
}
It should match with gmail for string new#gmail.com
i am using this with reference of: https://stackoverflow.com/a/13313857/2384642
There's a few issues here. Firstly, your input string has spaces after the comma, yet you're splitting by just the comma hence you'd get ' gmail' as a value, which would break the indexOf() result. Either remove the spaces, or use split(', ').
Secondly, you need to loop through the resulting array from the split() operation and check each value in the search string individually. You're also currently using indexOf() backwards, ie. you're looking for new#gmail.com within gmail. With these issues in mind, try this:
var str = "host,gmail,yahoo";
var search = 'new#gmail.com';
str.split(',').forEach(function(host) {
if (search.indexOf(host) != -1) {
console.log('Found');
}
});
Also note that you could define the array of hosts explicitly and avoid the need to split():
var hosts = ['host', 'gmail', 'yahoo'];
var search = 'new#gmail.com';
hosts.forEach(function(host) {
if (search.indexOf(host) != -1) {
console.log('Found');
}
});
As the split method returns an array, you'd have to iterate through that array and check for matchs.
Here's a demo:
// added gmail.com to the string so you can see more matched results(gmail and gmail.com).
var str = "host, gmail, yahoo, gmail.com",
search = 'new#gmail.com',
splitArr = str.replace(/(\,\s+)/g, ',').split(','),
/* the replace method above is used to remove whitespace(s) after the comma. The str variable stays the same as the 'replace' method doesn't change the original strings, it returns the replaced one. */
l = splitArr.length,
i = 0;
for(; i < l; i++) {
if(search.indexOf(splitArr[i]) > -1 ) {
console.log('Found a match: "' + splitArr[i] + '" at the ' + i + ' index.\n');
}
}
As you can see, none substring in str contains search value. So you need to invert the logic. something like this.
var str = "host, gmail, yahoo";
var search = 'new#gmail.com';
var res = str.split(', ').filter(function(el) {
return search.indexOf(el) > -1;
});
console.log(res);
Declare the Array with this code so you don't need to separate it with the ','
var str = new Array ("host","gmail","yahoo");
To find the element, use something this
for (i = 0; i < str.length; ++i)
{
val = str[i];
if (val.substring(0) === "gmail")
{
res = val;
break;
}
}
//Use res (result) here
Note: This is my first answer, so plese forgive me if there are some errors...
It should match with gmail for string new#gmail.com
In order to achieve your result you need to extract gmail from the search string.
You can achieve this with regex:
search.match( /\S+#(\S+)\.\S+/)[1]
var str = "host, gmail, yahoo, qwegmail";
var search = 'new#gmail.com';
if( str.split(', ').indexOf(search.match( /\S+#(\S+)\.\S+/)[1]) > -1 ) {
console.log(search + ': Found');
} else {
console.log(search + ': Not found');
}
search = 'asd#qwegmail.com';
if( str.split(', ').indexOf(search.match( /\S+#(\S+)\.\S+/)[1]) > -1 ) {
console.log(search + ': Found');
} else {
console.log(search + ': Not found');
}

encode parenthesis underscore tilt star with encodeURIComponent

encodeURIComponent escapes all characters except the following: - _ . ! ~ * ' ( )
But is it possible to extend the functionality encode the above special characters as well.
I know i can do something like this:
encodeURIComponent(str).replace(/\(/g, "%28").replace(/\)/g, "%29");
but I want functionality like this, without using additional functions on the encodeURIComponent
encodeURIComponent(str);
You should create your own function.
You should create your own function, really.
If you really know what you're doing, go to step 1.
Don't say I didn't warn you; there be dragons here:
(function() {
var _fn = encodeURIComponent;
window.encodeURIComponent = function(str) {
return _fn(str).replace(/\(/g, "%28").replace(/\)/g, "%29");
};
}());
you can write your custom encoding function
customEncodeURI(str : string){
var iChars = ':",_{}/\\'; // provide all the set of chars that you want
var encodedStr = ''
for (var i = 0; i < str.length; i++) {
if (iChars.indexOf(str.charAt(i)) != -1) {
var hex = (str.charCodeAt(i)).toString(16);
encodedStr += '%' + hex;
}else {
encodedStr += str[i];
}
}
console.log("Final encoded string is "+encodedStr);
console.log("Final decoded string is "+decodeURIComponent(encodedStr));
return encodedStr;
}

Live replacement for regular expressions with Javascript

I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.
If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}
You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...
If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation"​​​​​​​).addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();​
And the fiddle is : http://jsfiddle.net/jFYuV
In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"

JavaScript Regex Help!

I am trying to match a # tags within this string:
#sam #gt #channel:sam dfgfdh sam#sam
Now in regex testers this works #[\S]+ (with settings on JS testing) to pick out all strings starting with # so in them I get:
#sam #gt #channel:sam #sam
But then in browsers using this code:
function detect_extractStatusUsers(status){
var e = new RegExp('#[\S]+', 'i');
m = e.exec(status);
var s= "";
if (m != null) {
for (i = 0; i < m.length; i++) {
s = s + m[i] + "\n";
}
alert(s);
}
return true;
}
I can only get one single match of # (if I'm lucky, normally no match).
I must be missing something here and my eyes have just been looking at this for too long to see what it is.
Can anyone see what's wrong in this function?
Thanks,
You need to:
use the global search g setting
escape your \
use match instead of exec
var e = new RegExp('#[\\S]+', 'gi');
m = status.match(e);
You need to call exec repeatedly, until it returns null. Each time it will return a match object, containing all the captures for that match.
I've taken the liberty of rewriting your function the way I would have written it:
function detect_extractStatusUsers(status){
var rx = /(#[\S]+)/gi,
match,
tags = [];
while (match = e.exec(status)) {
tags.push(match[1]);
}
if (tags.length > 0) {
alert(tags.join('\n'));
}
}

Categories

Resources