String manipulations with Javascript - javascript

I have this string:
Table {is {red|blue|orange|white|green-{yellow|black}} |has {{twenty|thirty}two|{{two hundered and |three hundered and }fourty |fifty }three|four} legs} and is placed { in corner | in the middle } of office and {printer|phone} is {{gray-|}black|white}.
I want some data structure that I would be able to work with it, Can you suggest something?
This is my try:
var matches = $scope.fileContent.match(/{([^}]+)}/g);
for (var i = 0; i < matches.length; i++) {
console.log(matches[i]);
}
I want random sentences
Possible results:
- Table is blue and is placed in corner of office and printer is black.
- Table has three hundered and fourty three legs and is placed in the middle of office and phone is gray-black.

A grammar for this sort of sentence structure would be:
SENTENCE := PARTIAL optional SENTENCE
PARTIAL := TEXT or BRANCH
BRANCH := '{' SENTENCE ALTERNATIVES '}'
ALTERNATIVES := '|' SENTENCE optional ALTERNATIVES
Maybe I could have used clearer names for the different stages, but you get the point. Any of your sentences can be broken down using the rules of this grammar. After parsing, you will get your sentence in a tree structure.
Once you have your string parsed into this tree structure, you can traverse it, and pick randomly as to which branch you take. An example in JavaScript:
var string = "A table {is {red|blue|green}|has {four|five} legs}"
var index = 0
var root = new Node()
var current = root
function Node() {
this.text = ""
this.options = []
this.next = null
}
Node.prototype.toString = function(){
var string = this.text;
if (this.options.length > 0) {
var rnd = Math.floor(Math.random() * this.options.length)
string += this.options[rnd].toString()
}
if (this.next != null)
string += this.next.toString()
return string
}
function parse() {
text()
if (index == string.length)
return
if (string[index] == "{") {
index++
options()
var next = new Node()
current.next = next
current = next
parse()
}
}
function options() {
var parent = current
while(true) {
current = new Node()
parent.options.push(current)
parse()
index++
if (string[index - 1] != '|')
break
}
current = new Node()
parent.next = current
}
function text() {
while (index < string.length && "{|}".indexOf(string[index]) == -1) {
current.text += string[index]
index++
}
}
parse()
alert(root.toString())
Just a heads up - this doesn't handle these strings:
"some text { without a closing curly brace"
"an unexpected | pipe symbol"
"an unexpected } closing curly brace"
I'll let you add that in yourself.

Related

How to get odd and even position characters from a string?

I'm trying to figure out how to remove every second character (starting from the first one) from a string in Javascript.
For example, the string "This is a test!" should become "hsi etTi sats!"
I also want to save every deleted character into another array.
I have tried using replace method and splice method, but wasn't able to get them to work properly. Mostly because replace only replaces the first character.
function encrypt(text, n) {
if (text === "NULL") return n;
if (n <= 0) return text;
var encArr = [];
var newString = text.split("");
var j = 0;
for (var i = 0; i < text.length; i += 2) {
encArr[j++] = text[i];
newString.splice(i, 1); // this line doesn't work properly
}
}
You could reduce the characters of the string and group them to separate arrays using the % operator. Use destructuring to get the 2D array returned to separate variables
let str = "This is a test!";
const [even, odd] = [...str].reduce((r,char,i) => (r[i%2].push(char), r), [[],[]])
console.log(odd.join(''))
console.log(even.join(''))
Using a for loop:
let str = "This is a test!",
odd = [],
even = [];
for (var i = 0; i < str.length; i++) {
i % 2 === 0
? even.push(str[i])
: odd.push(str[i])
}
console.log(odd.join(''))
console.log(even.join(''))
It would probably be easier to use a regular expression and .replace: capture two characters in separate capturing groups, add the first character to a string, and replace with the second character. Then, you'll have first half of the output you need in one string, and the second in another: just concatenate them together and return:
function encrypt(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
console.log(encrypt('This is a test!'));
Pretty simple with .reduce() to create the two arrays you seem to want.
function encrypt(text) {
return text.split("")
.reduce(({odd, even}, c, i) =>
i % 2 ? {odd: [...odd, c], even} : {odd, even: [...even, c]}
, {odd: [], even: []})
}
console.log(encrypt("This is a test!"));
They can be converted to strings by using .join("") if you desire.
I think you were on the right track. What you missed is replace is using either a string or RegExp.
The replace() method returns a new string with some or all matches of a pattern replaced by a replacement. The pattern can be a string or a RegExp, and the replacement can be a string or a function to be called for each match. If pattern is a string, only the first occurrence will be replaced.
Source: String.prototype.replace()
If you are replacing a value (and not a regular expression), only the first instance of the value will be replaced. To replace all occurrences of a specified value, use the global (g) modifier
Source: JavaScript String replace() Method
So my suggestion would be to continue still with replace and pass the right RegExp to the function, I guess you can figure out from this example - this removes every second occurrence for char 't':
let count = 0;
let testString = 'test test test test';
console.log('original', testString);
// global modifier in RegExp
let result = testString.replace(/t/g, function (match) {
count++;
return (count % 2 === 0) ? '' : match;
});
console.log('removed', result);
like this?
var text = "This is a test!"
var result = ""
var rest = ""
for(var i = 0; i < text.length; i++){
if( (i%2) != 0 ){
result += text[i]
} else{
rest += text[i]
}
}
console.log(result+rest)
Maybe with split, filter and join:
const remaining = myString.split('').filter((char, i) => i % 2 !== 0).join('');
const deleted = myString.split('').filter((char, i) => i % 2 === 0).join('');
You could take an array and splice and push each second item to the end of the array.
function encrypt(string) {
var array = [...string],
i = 0,
l = array.length >> 1;
while (i <= l) array.push(array.splice(i++, 1)[0]);
return array.join('');
}
console.log(encrypt("This is a test!"));
function encrypt(text) {
text = text.split("");
var removed = []
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed.push(letter)
return false;
}
return true
}).join("")
return {
full: encrypted + removed.join(""),
encrypted: encrypted,
removed: removed
}
}
console.log(encrypt("This is a test!"))
Splice does not work, because if you remove an element from an array in for loop indexes most probably will be wrong when removing another element.
I don't know how much you care about performance, but using regex is not very efficient.
Simple test for quite a long string shows that using filter function is on average about 3 times faster, which can make quite a difference when performed on very long strings or on many, many shorts ones.
function test(func, n){
var text = "";
for(var i = 0; i < n; ++i){
text += "a";
}
var start = new Date().getTime();
func(text);
var end = new Date().getTime();
var time = (end-start) / 1000.0;
console.log(func.name, " took ", time, " seconds")
return time;
}
function encryptREGEX(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
function encrypt(text) {
text = text.split("");
var removed = "";
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed += letter;
return false;
}
return true
}).join("")
return encrypted + removed
}
var timeREGEX = test(encryptREGEX, 10000000);
var timeFilter = test(encrypt, 10000000);
console.log("Using filter is faster ", timeREGEX/timeFilter, " times")
Using actually an array for storing removed letters and then joining them is much more efficient, than using a string and concatenating letters to it.
I changed an array to string in filter solution to make it the same like in regex solution, so they are more comparable.

Regex split on comma don't split on comma between double quotes [duplicate]

I'm looking for [a, b, c, "d, e, f", g, h]to turn into an array of 6 elements: a, b, c, "d,e,f", g, h. I'm trying to do this through Javascript. This is what I have so far:
str = str.split(/,+|"[^"]+"/g);
But right now it's splitting out everything that's in the double-quotes, which is incorrect.
Edit: Okay sorry I worded this question really poorly. I'm being given a string not an array.
var str = 'a, b, c, "d, e, f", g, h';
And I want to turn that into an array using something like the "split" function.
Here's what I would do.
var str = 'a, b, c, "d, e, f", g, h';
var arr = str.match(/(".*?"|[^",\s]+)(?=\s*,|\s*$)/g);
/* will match:
(
".*?" double quotes + anything but double quotes + double quotes
| OR
[^",\s]+ 1 or more characters excl. double quotes, comma or spaces of any kind
)
(?= FOLLOWED BY
\s*, 0 or more empty spaces and a comma
| OR
\s*$ 0 or more empty spaces and nothing else (end of string)
)
*/
arr = arr || [];
// this will prevent JS from throwing an error in
// the below loop when there are no matches
for (var i = 0; i < arr.length; i++) console.log('arr['+i+'] =',arr[i]);
regex: /,(?=(?:(?:[^"]*"){2})*[^"]*$)/
const input_line = '"2C95699FFC68","201 S BOULEVARDRICHMOND, VA 23220","8299600062754882","2018-09-23"'
let my_split = input_line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)[4]
Output:
my_split[0]: "2C95699FFC68",
my_split[1]: "201 S BOULEVARDRICHMOND, VA 23220",
my_split[2]: "8299600062754882",
my_split[3]: "2018-09-23"
Reference following link for an explanation: regexr.com/44u6o
Here is a JavaScript function to do it:
function splitCSVButIgnoreCommasInDoublequotes(str) {
//split the str first
//then merge the elments between two double quotes
var delimiter = ',';
var quotes = '"';
var elements = str.split(delimiter);
var newElements = [];
for (var i = 0; i < elements.length; ++i) {
if (elements[i].indexOf(quotes) >= 0) {//the left double quotes is found
var indexOfRightQuotes = -1;
var tmp = elements[i];
//find the right double quotes
for (var j = i + 1; j < elements.length; ++j) {
if (elements[j].indexOf(quotes) >= 0) {
indexOfRightQuotes = j;
break;
}
}
//found the right double quotes
//merge all the elements between double quotes
if (-1 != indexOfRightQuotes) {
for (var j = i + 1; j <= indexOfRightQuotes; ++j) {
tmp = tmp + delimiter + elements[j];
}
newElements.push(tmp);
i = indexOfRightQuotes;
}
else { //right double quotes is not found
newElements.push(elements[i]);
}
}
else {//no left double quotes is found
newElements.push(elements[i]);
}
}
return newElements;
}
Here's a non-regex one that assumes doublequotes will come in pairs:
function splitCsv(str) {
return str.split(',').reduce((accum,curr)=>{
if(accum.isConcatting) {
accum.soFar[accum.soFar.length-1] += ','+curr
} else {
accum.soFar.push(curr)
}
if(curr.split('"').length % 2 == 0) {
accum.isConcatting= !accum.isConcatting
}
return accum;
},{soFar:[],isConcatting:false}).soFar
}
console.log(splitCsv('asdf,"a,d",fdsa'),' should be ',['asdf','"a,d"','fdsa'])
console.log(splitCsv(',asdf,,fds,'),' should be ',['','asdf','','fds',''])
console.log(splitCsv('asdf,"a,,,d",fdsa'),' should be ',['asdf','"a,,,d"','fdsa'])
This works well for me. (I used semicolons so the alert message would show the difference between commas added when turning the array into a string and the actual captured values.)
REGEX
/("[^"]*")|[^;]+/
var str = 'a; b; c; "d; e; f"; g; h; "i"';
var array = str.match(/("[^"]*")|[^;]+/g);
alert(array);
Here's the regex we're using to extract valid arguments from a comma-separated argument list, supporting double-quoted arguments. It works for the outlined edge cases. E.g.
doesn't include quotes in the matches
works with white spaces in matches
works with empty fields
(?<=")[^"]+?(?="(?:\s*?,|\s*?$))|(?<=(?:^|,)\s*?)(?:[^,"\s][^,"]*[^,"\s])|(?:[^,"\s])(?![^"]*?"(?:\s*?,|\s*?$))(?=\s*?(?:,|$))
Proof: https://regex101.com/r/UL8kyy/3/tests (Note: currently only works in Chrome because the regex uses lookbehinds which are only supported in ECMA2018)
According to our guidelines it avoids non-capturing groups and greedy matching.
I'm sure it can be simplified, I'm open to suggestions / additional test cases.
For anyone interested, the first part matches double-quoted, comma-delimited arguments:
(?<=")[^"]+?(?="(?:\s*?,|\s*?$))
And the second part matches comma-delimited arguments by themselves:
(?<=(?:^|,)\s*?)(?:[^,"\s][^,"]*[^,"\s])|(?:[^,"\s])(?![^"]*?"(?:\s*?,|\s*?$))(?=\s*?(?:,|$))
I almost liked the accepted answer, but it didn't parse the space correctly, and/or it left the double quotes untrimmed, so here is my function:
/**
* Splits the given string into components, and returns the components array.
* Each component must be separated by a comma.
* If the component contains one or more comma(s), it must be wrapped with double quotes.
* The double quote must not be used inside components (replace it with a special string like __double__quotes__ for instance, then transform it again into double quotes later...).
*
* https://stackoverflow.com/questions/11456850/split-a-string-by-commas-but-ignore-commas-within-double-quotes-using-javascript
*/
function splitComponentsByComma(str){
var ret = [];
var arr = str.match(/(".*?"|[^",]+)(?=\s*,|\s*$)/g);
for (let i in arr) {
let element = arr[i];
if ('"' === element[0]) {
element = element.substr(1, element.length - 2);
} else {
element = arr[i].trim();
}
ret.push(element);
}
return ret;
}
console.log(splitComponentsByComma('Hello World, b, c, "d, e, f", c')); // [ 'Hello World', 'b', 'c', 'd, e, f', 'c' ]
Parse any CSV or CSV-String code based on TYPESCRIPT
public parseCSV(content:string):any[string]{
return content.split("\n").map(ar=>ar.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(refi=>refi.replace(/[\x00-\x08\x0E-\x1F\x7F-\uFFFF]/g, "").trim()));
}
var str='"abc",jkl,1000,qwerty6000';
parseCSV(str);
output :
[
"abc","jkl","1000","qwerty6000"
]
I know it's a bit long, but here's my take:
var sample="[a, b, c, \"d, e, f\", g, h]";
var inQuotes = false, items = [], currentItem = '';
for(var i = 0; i < sample.length; i++) {
if (sample[i] == '"') {
inQuotes = !inQuotes;
if (!inQuotes) {
if (currentItem.length) items.push(currentItem);
currentItem = '';
}
continue;
}
if ((/^[\"\[\]\,\s]$/gi).test(sample[i]) && !inQuotes) {
if (currentItem.length) items.push(currentItem);
currentItem = '';
continue;
}
currentItem += sample[i];
}
if (currentItem.length) items.push(currentItem);
console.log(items);
As a side note, it will work both with, and without the braces in the start and end.
This takes a csv file one line at a time and spits back an array with commas inside speech marks intact. if there are no speech marks detected it just .split(",")s as normal... could probs replace that second loop with something but it does the job as is
function parseCSVLine(str){
if(str.indexOf("\"")>-1){
var aInputSplit = str.split(",");
var aOutput = [];
var iMatch = 0;
//var adding = 0;
for(var i=0;i<aInputSplit.length;i++){
if(aInputSplit[i].indexOf("\"")>-1){
var sWithCommas = aInputSplit[i];
for(var z=i;z<aInputSplit.length;z++){
if(z !== i && aInputSplit[z].indexOf("\"") === -1){
sWithCommas+= ","+aInputSplit[z];
}else if(z !== i && aInputSplit[z].indexOf("\"") > -1){
sWithCommas+= ","+aInputSplit[z];
sWithCommas.replace(new RegExp("\"", 'g'), "");
aOutput.push(sWithCommas);
i=z;
z=aInputSplit.length+1;
iMatch++;
}
if(z === aInputSplit.length-1){
if(iMatch === 0){
aOutput.push(aInputSplit[z]);
}
iMatch = 0;
}
}
}else{
aOutput.push(aInputSplit[i]);
}
}
return aOutput
}else{
return str.split(",")
}
}
Use the npm library csv-string to parse the strings instead of split: https://www.npmjs.com/package/csv-string
This will handle the empty entries
Something like a stack should do the trick. Here I vaguely use marker boolean as stack (just getting my purpose served with it).
var str = "a,b,c,blah\"d,=,f\"blah,\"g,h,";
var getAttributes = function(str){
var result = [];
var strBuf = '';
var start = 0 ;
var marker = false;
for (var i = 0; i< str.length; i++){
if (str[i] === '"'){
marker = !marker;
}
if (str[i] === ',' && !marker){
result.push(str.substr(start, i - start));
start = i+1;
}
}
if (start <= str.length){
result.push(str.substr(start, i - start));
}
return result;
};
console.log(getAttributes(str));
jsfiddle setting image code output image
The code works if your input string in the format of stringTocompare.
Run the code on https://jsfiddle.net/ to see output for fiddlejs setting.
Please refer to the screenshot.
You can either use split function for the same for the code below it and tweak the code according to you need.
Remove the bold or word with in ** from the code if you dont want to have comma after split attach=attach**+","**+actualString[t+1].
var stringTocompare='"Manufacturer","12345","6001","00",,"Calfe,eto,lin","Calfe,edin","4","20","10","07/01/2018","01/01/2006",,,,,,,,"03/31/2004"';
console.log(stringTocompare);
var actualString=stringTocompare.split(',');
console.log("Before");
for(var i=0;i<actualString.length;i++){
console.log(actualString[i]);
}
//var actualString=stringTocompare.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/);
for(var i=0;i<actualString.length;i++){
var flag=0;
var x=actualString[i];
if(x!==null)
{
if(x[0]=='"' && x[x.length-1]!=='"'){
var p=0;
var t=i;
var b=i;
for(var k=i;k<actualString.length;k++){
var y=actualString[k];
if(y[y.length-1]!=='"'){
p++;
}
if(y[y.length-1]=='"'){
flag=1;
}
if(flag==1)
break;
}
var attach=actualString[t];
for(var s=p;s>0;s--){
attach=attach+","+actualString[t+1];
t++;
}
actualString[i]=attach;
actualString.splice(b+1,p);
}
}
}
console.log("After");
for(var i=0;i<actualString.length;i++){
console.log(actualString[i]);
}
[1]: https://i.stack.imgur.com/3FcxM.png
I solved this with a simple parser.
It simply goes through the string char by char, splitting off a segment when it finds the split_char (e.g. comma), but also has an on/off flag which is switched by finding the encapsulator_char (e.g. quote). It doesn't require the encapsulator to be at the start of the field/segment (a,b","c,d would produce 3 segments, with 'b","c' as the second), but it should work for a well formed CSV with escaped encapsulator chars.
function split_except_within(text, split_char, encapsulator_char, escape_char) {
var start = 0
var encapsulated = false
var fields = []
for (var c = 0; c < text.length; c++) {
var char = text[c]
if (char === split_char && ! encapsulated) {
fields.push(text.substring(start, c))
start = c+1
}
if (char === encapsulator_char && (c === 0 || text[c-1] !== escape_char) )
encapsulated = ! encapsulated
}
fields.push(text.substring(start))
return fields
}
https://jsfiddle.net/7hty8Lvr/1/
const csvSplit = (line) => {
let splitLine = [];
var quotesplit = line.split('"');
var lastindex = quotesplit.length - 1;
// split evens removing outside quotes, push odds
quotesplit.forEach((val, index) => {
if (index % 2 === 0) {
var firstchar = (index == 0) ? 0 : 1;
var trimmed = (index == lastindex)
? val.substring(firstchar)
: val.slice(firstchar, -1);
trimmed.split(",").forEach(v => splitLine.push(v));
} else {
splitLine.push(val);
}
});
return splitLine;
}
this works as long as quotes always come on the outside of values that contain the commas that need to be excluded (i.e. a csv file).
if you have stuff like '1,2,4"2,6",8'
it will not work.
Assuming your string really looks like '[a, b, c, "d, e, f", g, h]', I believe this would be 'an acceptable use case for eval():
myString = 'var myArr ' + myString;
eval(myString);
console.log(myArr); // will now be an array of elements: a, b, c, "d, e, f", g, h
Edit: As Rocket pointed out, strict mode removes eval's ability to inject variables into the local scope, meaning you'd want to do this:
var myArr = eval(myString);
I've had similar issues with this, and I've found no good .net solution so went DIY. NOTE: This was also used to reply to
Splitting comma separated string, ignore commas in quotes, but allow strings with one double quotation
but seems more applicable here (but useful over there)
In my application I'm parsing a csv so my split credential is ",". this method I suppose only works for where you have a single char split argument.
So, I've written a function that ignores commas within double quotes. it does it by converting the input string into a character array and parsing char by char
public static string[] Splitter_IgnoreQuotes(string stringToSplit)
{
char[] CharsOfData = stringToSplit.ToCharArray();
//enter your expected array size here or alloc.
string[] dataArray = new string[37];
int arrayIndex = 0;
bool DoubleQuotesJustSeen = false;
foreach (char theChar in CharsOfData)
{
//did we just see double quotes, and no command? dont split then. you could make ',' a variable for your split parameters I'm working with a csv.
if ((theChar != ',' || DoubleQuotesJustSeen) && theChar != '"')
{
dataArray[arrayIndex] = dataArray[arrayIndex] + theChar;
}
else if (theChar == '"')
{
if (DoubleQuotesJustSeen)
{
DoubleQuotesJustSeen = false;
}
else
{
DoubleQuotesJustSeen = true;
}
}
else if (theChar == ',' && !DoubleQuotesJustSeen)
{
arrayIndex++;
}
}
return dataArray;
}
This function, to my application taste also ignores ("") in any input as these are unneeded and present in my input.

Pure Javascript - Find/Replace a list of words in DOM

I have a list of words and I want to replace them in DOM.
The replace is working but it has an issue. Before and after the replaced text, it puts an undesired slash bar (i.e. reduced to a brigade > reduced/ OTHER WORD 1 /a brigade)
Also, I need to wrap the replaced word with an html tag, like .
(i.e. reduced to a brigade > reduced OTHER WORD 1 a brigade)
You can see the code here:
https://jsfiddle.net/realgrillo/9uL6ofge/
var elements = document.getElementsByTagName('*');
var words = [
[/(^|\s)([Tt]o)(\s|$)/ig, 'OTHER WORD 1'],
[/(^|\s)([Aa]nd)(\s|$)/ig, 'OTHER WORD 2']
];
for (var i = 0; i < elements.length; i++) {
var element = elements[i];
for (var j = 0; j < element.childNodes.length; j++) {
var node = element.childNodes[j];
if (node.nodeType === 3) {
var text = node.data;
for (var k = 0; k < words.length; k++)
{
var from = new RegExp(words[k][0]);
var to = new RegExp('$1' + words[k][1] + '$3');
var replacedText = text.replace(from, to);
//console.log("from: " + from);
//console.log("to: " + to);
//console.log("toDo: " + toDo);
if (replacedText !== text)
{
element.innerHTML = element.textContent.replace(from, to);
console.log("text: " + text);
console.log("replacedText: " + replacedText);
}
}
/*
//
//!!!!THIS CODE FOR 1 WORD WORKS!!!! I need to do this for the list of words.
//
var replacedText = text.replace(/(^|\s)([Tt]o)(\s|$)/ig, '$1OTHER WORD$3');
if (replacedText !== text) {
element.innerHTML = element.textContent.replace(/(^|\s)([Tt]o)(\s|$)/ig, '$1<b class=modified>OTHER WORD</b>$3');
}
*/
}
}
}
Masters of JS, can you help me? Pure javascript, not jQuery please.
Thanks.
I can give you some much more simple function to use. First define a function for replacing words like this:
String.prototype.replaceAll = function (search, replacement) {
try {
var target = this;
return target.split(search).join(replacement);
} catch (e) {
return "";
}
};
That's why the pure javascript replace function only replace one time in a whole case. After that you can use it in your code like here:
var replacedText = text.replaceAll(/(^|\s)([Tt]o)(\s|$)/ig, '$1OTHER WORD$3');
You don't need to parse words[k][0] into a RegExp as it is already one, even though it won't break because the constructor will handle a RegExp well.
Also for the words[k][1], you don't need to parse it into a RegExp either because the second parameter of String.prototype.replace is expected to be a plain string. The slashes are there because you cast your RegExp to a string (and will end up output).
So this is a change that will fix it:
var to = '$1' + words[k][1] + '$3';
And this is optional but probably a good practice:
var from = words[k][0];
For getting an html tag, you can either change it directly in each words[k][1] or in the to var declaration, depending on what you want.
Extra:
In the for loop, the code assumes that there are always at least three groups (because of the '$3' reference), an alternative could be to delegate this replacement directly in words[k][1], but it is up to you to decide.

Javascript Markdown Parsing

I'm working on a markdown to html parser. I understand this is a big project and there are third party libraries, but none the less I want to roll a simple solution on my own that doesn't have to handle every single aspect of markdown.
So far the process is to take an input (in my case the value of a textarea) and parse it line by line.
var html = [];
var lines = txt.split('\n'); //Convert string to array
//Remove empty lines
for(var index = lines.length-1; index >= 0; index--) {
if(lines[index] == '') lines.splice(index, 1);
}
//Parse line by line
for(var index = 0; index <= lines.length-1; index++) {
var str = lines[index];
if(str.match(/^#[^#]/)) {
//Header
str = str.replace(/#(.*?)$/g, '<h1>$1</h1>');
} else if(str.match(/^##[^#]/)) {
//Header 2
str = str.replace(/##(.*?)$/g, '<h2>$1</h2>');
} else if(str.match(/^###[^#]/)) {
//Header 3
str = str.replace(/###(.*?)$/g, '<h3>$1</h3>');
} else if(str.trim().startsWith('+')) {
//Unordered List
var orig = str;
str = str.replace(/\+(.*?)$/, '<li>$1</li>');
var previous, next;
if(index > 0) previous = lines[index-1];
if(!previous || previous && previous.indexOf('+') < orig.indexOf('+')) {
str = '<ul>' + str;
}
if(index < lines.length-1) next = lines[index+1];
if(!next || next && next.indexOf('+') < orig.indexOf('+')) {
var count = Math.max(0, orig.indexOf('+') / 4);
if(next) count = count - Math.max(0, next.indexOf('+') / 4);
for(var i=1; i<=count; i++) {
str = str + '</ul>';
}
}
if(next && next.trim().indexOf('+') == -1) str = str + '</ul>';
} else if(str.match(/^[0-9a-zA-Z]/)) {
//Paragraph
str = str.replace(/^([0-9a-zA-Z].*?)$/g, '<p>$1</p>');
}
//Inline formatting
str = str.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>'); //Bold
str = str.replace(/\_\_(.*?)\_\_/g, '<strong>$1</strong>'); //Another bold
str = str.replace(/\*(.*?)\*/g, '<em>$1</em>'); //Italics
str = str.replace(/\_(.*?)\_/g, '<em>$1</em>'); //Another italics
//Append formatted to return string
html.push(str);
}
Where I run into problems is with nested blocks such as ul. Currently the code looks at a line that starts with a + and wraps it in an li. Great, but these list items never get placed within a ul. I could run through the output again after the line by line and just wrap every group of li's, but that screws me up when I have nested li's that require their own ul.
Any thoughts on how to apply these additional wrapper tags? I've considered using my own special characters around list type elements so I know where to add the wrapper tags, but that breaks traditional markdown. I wouldn't be able to pass the raw markdown to someone other than myself and know they'd understand what was going on.
Edit I updated my code sample to include a working sample. The working sample also supports nested lists.
You need a very simple state machine.
When you encounter the first + you add <ul> and raise a flag.
If you don't see a line that starts with + and your flag is raised, then close the </ul>

Return url for the first item for which the content part matches pattern

How can I return the url part of the first page in pages for which the content part matches pattern (in the style of function 1, case insensitive) but return an empty string if no page is found?
e.g.
url1(pages,"GREAT") returns "www.xyz.ac.uk"
url1(pages,"xyz") returns ""
Here is my code so far:
var pg = [ "|www.cam.ac.uk|Cambridge University offers degree programmes and world class research." , "!www.xyz.ac.uk!An great University" , "%www%Yet another University" ];
var pt = "great";
function url1(pages, pattern) {
var result = "";
for (x in pages) {
current = pages[x].split(pattern);
result = current[1];
}
return result;
}
alert(url1(pg, pt));
Try this:
var pg = [ "|www.cam.ac.uk|Cambridge University offers degree programmes and world class research." , "!www.xyz.ac.uk!An great University" , "%www%Yet another University" ];
function find(pages, pattern) {
var i, l, page, arr;
pattern = pattern.toLowerCase();
for(i=0, l=pages.length; i<l; i++) {
page = pages[i];
arr = page.split(page[0]);
if(arr.slice(2).join(page[0]).toLowerCase().indexOf(pattern) >=0) {
return arr[1];
}
}
return '';
}
console.log(find(pg,'great')); // 'www.xyz.ac.uk'
console.log(find(pg,'xyz')); // ''
Fiddle here: http://jsbin.com/uGebeQi/2/edit
Here's another solution using a regular expression. I feel this is slightly better than blindly splitting on the first character since the content could always contain the first character as well.
Note: The accepted answer was corrected.
function url1(pages, pattern) {
var rx = /^(.)(.+?)\1(.+)/,
i = 0,
len = pages.length,
matches;
for (; i < len; i++) {
if ((matches = pages[i].match(rx)) && matches[3].indexOf(pattern) !== -1)
return matches[2];
}
return '';
}
//www.xyz.ac.uk
url1(['!www.xyz.ac.uk!An great University! The best in the world!'], 'best');
You can always lowercase the pattern and the content if you want case-insensitive searches.

Categories

Resources