Improving combinations from abc[d[e,f],gh] pattern algorithm - javascript

I wrote an algorithm that is inadequate, namely because it does not handle [,abc] cases (see the string variations and conditions below), and would like to know how it can be improved so it covers those cases:
Given
Pattern, that describes strings variations: abc[de[f,g],hk], which gives
abcdef
abcdeg
abchk
Pattern consists of "arrays", that followed by strings: abc[...], and strings adj,kg,q
Another possible more complex example: utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]].
Conditions
Strings itself can contain only letters and numbers. There couldn't be abc[h\,k,b] or abc[h\[k,b] that gives abch,k or abch[k.
"Arrays" always not empty, and has at least 2 elements.
There can be any order of "array", or "only string" value, i.e.: abc[a,b[c,d]] or abc[a[b,c],d]. The order is strict from left to right, there can not be from pattern abc[d,e] combinations eabc or dabc.
abc[d,e] doesn't gives abcde nor abced string, only abcd and abce.
Pattern always starts with string with array: something[...].
There can be string without array: abc[a,bc[d,f]], but array without string is not allowed: abc[a,[d,f]].
There can be an empty string, i.e.: a[,b], that gives a and ab
My solution
function getStrings(pat) {
if(pat.indexOf('[') == -1)
return pat;
String.prototype.insert = function(index, string) {
if (index > 0) {
return this.substring(0, index) + string + this.substr(index);
}
return string + this;
};
function getArray(str, start, isSource = false) {
if (start < 0) return null;
var n = 0;
var ret = "";
var i = start;
for (; i < str.length; i++) {
if (str[i] == "[") n++;
else if (str[i] == "]") n--;
if (n == 0) break;
}
var ret = {
str: "",
arr: "",
end: 0,
};
ret.arr = str.slice(start, i) + "]";
ret.end = i;
start--;
var end = start;
for (
;
start > 0 &&
str[start] != "," &&
str[start] != "]" &&
str[start] != "[";
start--
) {}
if(!isSource)
start++;
end++;
ret.str = str.slice(start, end);
return ret;
}
function getElement(source, start) {
var ret = [];
start++;
for (
;
start < source.length && source[start] != "," && source[start] != "]";
start++
)
ret[ret.length] = source[start];
return ret;
}
var source = getArray(pat, pat.indexOf("["), true); // parsing
var ar = source.arr;
source.arrs = getArrays(source); // parsing
source.source = true;
var fi = "";
var temp_out = [];
var tol = 0;
return getVariations(source); // getting variations of parsed
function getVariations(source) {
if (source.arrs == undefined) {
} else
for (var i = 0; i < source.arrs.length; i++) {
if (source.source) fi = source.str;
if (!source.arrs[i].arrs) {
temp_out[tol] = fi + source.arrs[i].str;
tol++;
} else {
var lafi = fi;
fi += source.arrs[i].str;
getVariations(source.arrs[i]);
if(i != source.arrs.length - 1)
fi = lafi;
}
if (source.source && i == source.arrs.length - 1) {
var temp = temp_out;
temp_out = [];
tol = 0;
return temp;
}
}
}
function getArrays(source) {
var la = 1;
var start = 0;
var arrs = [];
if (!source.arr) return;
while (start != -1) {
start = source.arr.indexOf("[", la);
var qstart = source.arr.indexOf(",", la);
if(source.arr[la] == ',')
qstart = source.arr.indexOf(",", la+1);
var pu = false;
if(qstart != la && qstart != -1 && qstart < start && start != -1)
{
pu = true;
var str = source.arr;
var buf = [];
qstart--;
var i = -1;
for(i = qstart; i > 0 && str[i] != '[' && str[i] != ','; i--)
{}
i++;
for(; i < str.length && str[i]!= ','; i++)
{
buf[buf.length] = str[i];
}
if(buf.length == 0)
{
la = start;
alert("1!")
}
else
{
buf = buf.join('');
arrs[arrs.length] = {str:buf};
la += buf.length+1;
}
}
else
if (start != -1) {
arrs[arrs.length] = getArray(source.arr, start);
la = arrs[arrs.length - 1].end + 1;
} else {
start = source.arr.indexOf(",", la);
if (start != -1) {
var ret = getElement(source.arr, start);
arrs[arrs.length] = ret;
la += ret.length;
}
}
}
for (var i = 0; i < arrs.length; i++)
if (typeof arrs[i] != "string" && arrs[i].arr) {
arrs[i].arrs = getArrays(arrs[i]);
var st = arrs[i].arr;
if (occ(arrs[i].arr, "[") == 1 && occ(arrs[i].arr, "]") == 1) {
st = st.replaceAll("[", '["');
st = st.replaceAll("]", '"]');
st = st.replaceAll(",", '","');
st = JSON.parse(st);
for (var j = 0; j < st.length; j++) st[j] = { str: st[j] };
arrs[i].arrs = st;
}
} else if (typeof arrs[i] == "string") {
arrs[i] = { str: arrs[i] };
}
RecursArrs(arrs);
return arrs;
}
function RecursArrs(arrs) {
for (var i = 0; i < arrs.length; i++) {
if (!arrs[i].source)
if (arrs[i].arr) {
delete arrs[i].arr;
delete arrs[i].end;
}
if (!arrs[i].str) {
try{
arrs[i] = { str: arrs[i].join("") };
}catch(er)
{
arrs[i] = {str:''};
}
if (i && arrs[i - 1].str == arrs[i].str) {
arrs.splice(i, 1);
i--;
}
} else if (arrs[i].arrs) RecursArrs(arrs[i].arrs);
}
}
function occ(string, word) {
return string.split(word).length - 1;
}
}
// getStrings('IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]')

I would use a regular expression to break up the input into tokens. In this case I chose to take pairs of (letters, delimiter), where the delimiter is one of "[", "]", ",". The letters part could be empty.
Then I would use a recursive function like you did, but I went for a recursive generator function.
Here is the suggested implementation:
function* getStrings(pattern) {
const tokens = pattern.matchAll(/([^[\],]*)([[\],])?/g);
function* dfs(recur=false) {
let expectToken = true;
while (true) {
const [, token, delim] = tokens.next().value;
if (delim === "[") {
for (const deep of dfs(true)) yield token + deep;
} else {
if (token || expectToken) yield token;
if (delim === "]" && !recur) throw "Invalid pattern: too many `]`";
if (!delim && recur) throw "Invalid pattern: missing `]`";
if (delim !== ",") return;
}
expectToken = delim !== "["; // After [...] we don't expect a letter
}
}
yield* dfs();
}
const input = 'IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]';
for (const s of getStrings(input))
console.log(s);
This implementation should match the patterns according to the given restrictions, but it will also allow the following:
An "array" can start without a prefix of letters. So [a,b] is allowed and will produce the same output as a,b.
An "array" may be followed immediately by letters or a new "array", but this will be interpreted as if they were separated by a comma. So x[a,b]c will be interpreted as x[a,b],c
An "array" can be empty. In that case the array is ignored. So x[] is the same as x.
There is some basic error checking: an error will be generated when the brackets are not balanced.

We can do this in an inside-out fashion. If we replace the innermost group (e.g. 'de[fg]' with its expansion, 'def,deg', and recur until there are no more groups remaining, we will have created a comma-separated list of final strings, which we can simply split apart and return.
const _expand = (
s,
match = s .match (/(.*?)(\w*)\[([^\[\]]+)\](.*)/),
[_, a, b, c, d] = match || []
) => match ? _expand (a + c .split (',') .map (x => b + x) .join (',') + d) : s
const expand = (s) => _expand (s) .split (',')
console .log (expand ('abc[de[f,g],hk]'))
console .log (expand ('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'))
.as-console-wrapper {max-height: 100% !important; top: 0}
Our main recursive function -- _expand -- uses a regular expression that extracts the first group, and breaks it into constituent parts, and puts it back together by mapping over the parts of the array. Then our public function, expand simply calls the recursive one and splits the result into an array.
For example, this is how the recursive calls would be handled for the string, 'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]':
'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]' //-->
// ^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,lo[z,x]],bm]]' //-->
// ^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,loz,lox],bm]]' //-->
// ^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[tij,tloz,tlox,bm]]' //-->
// ^^^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,qtij,qtloz,qtlox,qbm]' //-->
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
'utvkfvu,utvkgnu,utvkgnk,utvkgnr,utvknl,utvkqtij,utvkqtloz,utvkqtlox,utvkqbm'
Update: Regex explanation:
The regex used here can be broken down into six sections:
(.*?): captures (non-greedy) an initial set of characters, stored as a
(\w*): captures our letters before an opening brace, stored as b
\[: captures an opening brace ([)
([^\[\]]+): captures everything but braces ([ or ]), stored as c
\]: captures a closing brace (])
(.*): captures everything after the closing brace, stored as d
The point is for the group inside the braces to include no other braces. An example might look like this:
utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]
`----+---'\/|`-+-'|`----------+-----------'
\ | \ \ \__ \
| \ \_ \__ \____ \
a: (.*?) \_ \_ \ \ \
~~~~~ | \ \__ \ \
b: (\w*) | \ \ \
~~~~~ | \ \ \
[: \[ | \ \
~~ | \ \
c: ([^\[\]]+) \ \
~~~~~~~~~~ | |
]: \] |
~~ |
d: (.*)
~~~~

Vanialla solution without recursion:
const expander = /([^,[\]]*?)\[([^[\]]*?)]/;
const parse = (fields) => {
let result = fields;
while (result.match(expander)) {
result = result.replace(expander, (m, p1, p2) => p2.split(',').map((e) => `${p1}${e}`).join(','));
}
return result.split(',');
};
console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
.as-console-wrapper {max-height: 100% !important; top: 0}
Basically I just took the code from object-fields, which one could use as follows
// const objectFields = require('object-fields');
const parse = (input) => objectFields.split(input.replace(/\[/g, '(').replace(/]/g, ')')).map((e) => e.replace(/\./g, ''));
console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
.as-console-wrapper {max-height: 100% !important; top: 0}
<script src="https://bundle.run/object-fields#3.0.1"></script>
Disclaimer: I'm the author of object-fields

Let's describe an algorithm in words. Let's define word as a group of consecutive letters without a comma or bracket, which can also be an empty string. Then one way to think about this process is as a stack with two types of entries:
A word.
An opening bracket, [.
As we traverse the string,
(1) push words and opening brackets onto the stack, not commas.
(2a) when we reach a closing bracket, ], we start a list and keep popping the stack, adding words to that list until we pop an opening bracket from the stack. We then (2b) pop the next entry in the stack, which is the prefix for our current list, and (2c) push each entry from the list onto the stack with the prefix prepended.
Finally, return the stack.
Here's an implementation of the algorithm described above.
function f(s) {
if (s.length == 0) {
return [];
}
const stack = [""];
let i = 0;
while (i < s.length) {
if (s[i] == "[") {
i += 1;
stack.push("[", "");
} else if (s[i] == "]") {
i += 1;
const suffixes = [];
while (true) {
const word = stack.pop();
if (word == "[") {
const prefix = stack.pop();
for (let j = suffixes.length - 1; j >= 0; j--) {
stack.push(prefix + suffixes[j]);
}
break;
} else {
suffixes.push(word);
}
}
} else if (s[i] == ",") {
i += 1;
stack.push("");
} else {
stack[stack.length - 1] += s[i];
i += 1;
}
}
return stack;
}
// Output
var s = "a[bp,c[,d]],b[yx,]"
console.log(s);
for (const w of f(s)) {
console.log(w);
}
console.log("");
s = "abc[de[f,g],hk]"
console.log(s);
for (const w of f(s)) {
console.log(w);
}

Here is a recursion free solution using object-scan.
This solution is probably more of academic interest since it uses library internals and I wrote it to satisfy my curiosity whether it could be done this way. Also serves as a head scratcher for #ScottSauyet - payback for his answer which took me a while to figure out =)
Anyways, enjoy!
.as-console-wrapper {max-height: 100% !important; top: 0}
<script type="module">
import objectScan from 'https://cdn.jsdelivr.net/npm/object-scan#18.4.0/lib/index.min.js';
import { compile } from 'https://cdn.jsdelivr.net/npm/object-scan#18.4.0/lib/core/compiler.js';
const parse = (input) => {
const compiled = compile([input.replace(/\[/g, '.{').replace(/]/g, '}')], {});
return objectScan(['++{children[*]}.value'], {
filterFn: ({ parent }) => parent.children.length === 0,
rtn: ({ parents }) => parents.filter((e) => !Array.isArray(e)).map(({ value }) => value).reverse().slice(1).join('')
})(compiled);
};
console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
</script>
Disclaimer: I'm the author of object-scan

Related

Return the first non-repeating character of a string

In the first chunk of my code I have an ' if ' statement that is not working as it should, and I can't figure out why.
When using the argument 'hous', it should enter the first ' if ' statement and return 0. It returns -1 instead.
var firstUniqChar = function(s) {
for (let i = 0; i < s.length; i++){
let letter = s[i];
// console.log('s[i]: ' + letter);
// console.log(s.slice(1));
// console.log( 'i: ' + i);
if ((i = 0) && !(s.slice(1).includes(letter))) {
return 0;
}
if ((i = s.length - 1) && !(s.slice(0, i).includes(letter))) {
return 1;
}
if(!(s.slice(0, i).includes(letter)) && !(s.slice(i + 1).includes(letter))) {
return 2;
}
}
return -1;
};
console.log(firstUniqChar("hous"));
This is another way you can write your function:
const firstUniqChar = s => [...s].filter(c=>!(s.split(c).length-2))[0] || -1;
console.log(firstUniqChar("hous"));
console.log(firstUniqChar("hhoous"));
console.log(firstUniqChar("hhoouuss"));
Look up method for scattered repeated characters and functional find()-based approach
You may break your input string into array of characters (e.g. using spread syntax ...) and make use of Array.prototype.find() (to get character itserlf) or Array.prototype.findIndex() (to get non repeating character position) by finding the character that is different from its neighbors:
const src = 'hhoous',
getFirstNonRepeating = str =>
[...str].find((c,i,s) =>
(!i && c != s[i+1]) ||
(c != s[i-1] && (!s[i+1] || c != s[i+1])))
console.log(getFirstNonRepeating(src))
.as-console-wrapper{min-height:100%;}
Above will work perfectly when your repeating characters are groupped together, if not, I may recommend to do 2-passes over the array of characters - one, to count ocurrence of each character, and one more, to find out the first unique:
const src = 'hohuso',
getFirstUnique = str => {
const hashMap = [...str].reduce((r,c,i) =>
(r[c]=r[c]||{position:i, count:0}, r[c].count++, r), {})
return Object
.entries(hashMap)
.reduce((r,[char,{position,count}]) =>
((count == 1 && (!r.char || position < r.position)) &&
(r = {char, position}),
r), {})
}
console.log(getFirstUnique(src))
.as-console-wrapper{min-height:100%;}
function nonRepeat(str) {
return Array
.from(str)
.find((char) => str.match(newRegExp(char,'g')).length === 1);
}
console.log(nonRepeat('abacddbec')); // e
console.log(nonRepeat('1691992933')); // 6
console.log(nonRepeat('thhinkninw')); // t

READ TEXT FILE AND CONVERT TO JSON IN JavaScript [duplicate]

Where could I find some JavaScript code to parse CSV data?
You can use the CSVToArray() function mentioned in this blog entry.
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>
jQuery-CSV
It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.
Example:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
Update:
Oh yeah, I should also probably mention that it's completely configurable.
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
Update 2:
It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.
Update 3:
Since the Google Code shutdown, jquery-csv has been migrated to GitHub.
Disclaimer: I am also the author of jQuery-CSV.
Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.
Test it at http://jsfiddle.net/vHKYH/.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
I have an implementation as part of a spreadsheet project.
This code is not yet tested thoroughly, but anyone is welcome to use it.
As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
csvToArray v1.3
A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.
https://code.google.com/archive/p/csv-to-array/downloads
Common Usage: jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
Common usage: JavaScript
csvAsArray = csvAsString.csvToArray();
Override field separator
csvAsArray = csvAsString.csvToArray("|");
Override record separator
csvAsArray = csvAsString.csvToArray("", "#");
Override Skip Header
csvAsArray = csvAsString.csvToArray("", "", 1);
Override all
csvAsArray = csvAsString.csvToArray("|", "#", 1);
Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.
Here's another solution. This uses:
a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
also, has type correction differentiating strings, numbers, boolean values and null values
For the following input string:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
The code outputs:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
Here's my implementation of parseCSVLine() in a runnable code snippet:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );
Here's my simple vanilla JavaScript code:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...
This one seems to handle both.
I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.
var Strings = {
/**
* Wrapped CSV line parser
* #param s String delimited CSV string
* #param sep Separator override
* #attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js
Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';
const parseCSV = text => {
const lines = text.split('\n');
const output = [];
lines.forEach(line => {
line = line.trim();
if (line.length === 0) return;
const skipIndexes = {};
const columns = line.split(',');
output.push(columns.reduce((result, item, index) => {
if (skipIndexes[index]) return result;
if (item.startsWith('"') && !item.endsWith('"')) {
while (!columns[index + 1].endsWith('"')) {
index++;
item += `,${columns[index]}`;
skipIndexes[index] = true;
}
index++;
skipIndexes[index] = true;
item += `,${columns[index]}`;
}
result.push(item);
return result;
}, []));
});
return output;
};
console.log(parseCSV(csvString));
Personally I like to use deno std library since most modules are officially compatible with the browser
The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/
so you can use it simply like this
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std#0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
Just use .split(','):
var str = "How are you doing today?";
var n = str.split(" ");

Get function parameter length including default params

If you make use of the Function.length property, you get the total amount of arguments that function expects.
However, according to the documentation (as well as actually trying it out), it does not include Default parameters in the count.
This number excludes the rest parameter and only includes parameters before the first one with a default value
- Function.length
Is it possible for me to somehow get a count (from outside the function) which includes Default parameters as well?
Maybe you can parse it yourself, something like:
function getNumArguments(func) {
var s = func.toString();
var index1 = s.indexOf('(');
var index2 = s.indexOf(')');
return s.substr(index1 + 1, index2 - index1 - 1).split(',').length;
}
console.log(getNumArguments(function(param1, param3 = 'test', ...param2) {})); //3
Copying my answer over to here from a duplicate question:
Well, it's a bit of a mess but I believe this should cover most edge cases.
It works by converting the function to a string and counting the commas, but ignoring commas that are in strings, in function calls, or in objects/arrays. I can't think of any scenarios where this won't return the proper amount, but I'm sure there is one, so this is in no way foolproof, but should work in most cases.
UPDATE: It's been pointed out to me that this won't work for cases such as getNumArgs(a => {}) or getNumArgs(function(a){}.bind(null)), so be aware of that if you try to use this.
function getNumArgs(func) {
var funcStr = func.toString();
var commaCount = 0;
var bracketCount = 0;
var lastParen = 0;
var inStrSingle = false;
var inStrDouble = false;
for (var i = 0; i < funcStr.length; i++) {
if (['(', '[', '{'].includes(funcStr[i]) && !inStrSingle && !inStrDouble) {
bracketCount++;
lastParen = i;
} else if ([')', ']', '}'].includes(funcStr[i]) && !inStrSingle && !inStrDouble) {
bracketCount--;
if (bracketCount < 1) {
break;
}
} else if (funcStr[i] === "'" && !inStrDouble && funcStr[i - 1] !== '\\') {
inStrSingle = !inStrSingle;
} else if (funcStr[i] === '"' && !inStrSingle && funcStr[i - 1] !== '\\') {
inStrDouble = !inStrDouble;
} else if (funcStr[i] === ',' && bracketCount === 1 && !inStrSingle && !inStrDouble) {
commaCount++;
}
}
// Handle no arguments (last opening parenthesis to the last closing one is empty)
if (commaCount === 0 && funcStr.substring(lastParen + 1, i).trim().length === 0) {
return 0;
}
return commaCount + 1;
}
Here are a few tests I tried it on: https://jsfiddle.net/ekzuvL0c/
Here is a function to retrieve the 'length' of a function (expression or object) or an arrow function expression (afe). It uses a regular expression to extract the arguments part from the stringified function/afe (the part between () or before =>) and a regular expression to cleanup default values that are strings. After the cleanups, it counts the comma's, depending on the brackets within the arguments string.
Note This will always be an approximation. There are edge cases that won't be covered. See the tests in this Stackblitz snippet
const determineFnLength = fnLenFactory();
console.log(`fnTest.length: ${determineFnLength(fnTest)}`);
function fnTest(a,
b,
c = 'with escaped \' quote and, comma',
d = "and double \" quotes, too!" ) { console.log(`test123`); }
function fnLenFactory() {
const fnExtractArgsRE = /(^[a-z_](?=(=>|=>{)))|((^\([^)].+\)|\(\))(?=(=>|{)))/g;
const valueParamsCleanupRE = /(?<=[`"'])([^\`,].+?)(?=[`"'])/g;
const countArgumentsByBrackets = params => {
let [commaCount, bracketCount, bOpen, bClose] = [0, 0, [...`([{`], [...`)]}`]];
[...params].forEach( chr => {
bracketCount += bOpen.includes(chr) ? 1 : bClose.includes(chr) ? -1 : 0;
commaCount += chr === ',' && bracketCount === 1 ? 1 : 0; } );
return commaCount + 1; };
const extractArgumentsPartFromFunction = fn => {
let fnStr = fn.toString().replace(RegExp(`\\s|function|${fn.name}`, `g`), ``);
fnStr = (fnStr.match(fnExtractArgsRE) || [fn])[0]
.replace(valueParamsCleanupRE, ``);
return !fnStr.startsWith(`(`) ? `(${fnStr})` : fnStr; };
return (func, forTest = false) => {
const params = extractArgumentsPartFromFunction(func);
const nParams = params === `()` ? 0 : countArgumentsByBrackets(params);
return forTest ? [params, nParams] : nParams;
};
}

Comma separated values except for those inside of double quotes [duplicate]

This question already has answers here:
Regex to match all instances not inside quotes
(4 answers)
Closed 7 years ago.
I have a service that formats strings in certain fields. Basically, when a user clicks out of the input box (on blur), the string is cleansed of illegal characters, and whitespace is replaced with commas. This is fine, but I would like to allow a user to add double quotes around grouped words. On blur, this should remove the quotes, but maintain the space in between the words, and then add a comma afterwards. I have tried everything but I can't get this to work. Here is how my service is currently set up:
angular.module('testApp')
.factory('formatStringService', [
function () {
return {
formatString: function (string) {
var styleStr = string;
if (styleStr === undefined) {
return;
}
styleStr = this.stringReplace(styleStr, '\n', ',');
styleStr = this.stringReplace(styleStr, '\t', ',');
styleStr = this.stringReplace(styleStr, ' ', ',');
styleStr = this.stringReplace(styleStr, ';', ',');
styleStr = this.newLine(styleStr);
styleStr = this.validated(styleStr);
for (var g = 0; g < 9; g++) {
styleStr = this.stringReplace(styleStr, ',,', ',');
}
if (styleStr.charAt(styleStr.length - 1) === ',') {
styleStr = styleStr.substr(0, (styleStr.length - 1));
}
if (styleStr.charAt(0) === '*') {
styleStr = styleStr.substr(1, (styleStr.length - 1));
}
if (styleStr.charAt(styleStr.length - 1) === '*') {
styleStr = styleStr.substr(0, (styleStr.length - 1));
}
return styleStr;
},
stringReplace: function (string, text, by) {
var strLength = string.length,
txtLength = text.length;
if ((strLength === 0) || (txtLength === 0)) {
return string;
}
var i = string.indexOf(text);
if ((!i) && (text !== string.substring(0, txtLength))) {
return string;
}
if (i === -1) {
return string;
}
var newstr = string.substring(0, i) + by;
if (i + txtLength < strLength) {
newstr += this.stringReplace(string.substring(i + txtLength, strLength), text, by);
}
return newstr;
},
validated: function (string) {
for (var i = 0, output = '', valid = '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,~#+/\\*- '; i < string.length; i++) {
if (valid.indexOf(string.charAt(i)) !== -1) {
output += string.charAt(i);
}
}
return output;
},
newLine: function (string) {
for (var i = 0, output = ''; i < string.length; i++) {
if (string.charCodeAt(i) !== 10) {
output += string.charAt(i);
}
}
return output;
}
};
}
]);
Input string example: 1 2 3 "test test" 7 8
Should output: 1,2,3,test test,7,8
Here's a neat regex trick that you can use for this purpose:
var indices = [],
re = /"[^"]*"|( )/g,
str = '1 2 3 "test test" 7 8';
while ((match = re.exec(str)) !== null) {
if (match[1] !== undefined) indices.push(match.index);
}
var split = [], prevIndex = -1;
indices.forEach(function(index) {
split.push(str.slice(prevIndex + 1, index));
prevIndex = index;
});
document.getElementById('output').innerText = split.join('\n');
<pre id='output'></pre>
What we're doing here is matching on the regex /"[^"]*"|( )/—that is, either "stuff between quotes" or "a single space." So if we find a quote, we immediately start matching "stuff between quotes" (because regex is greedy), and hence any spaces between quotes are just gobbled up in that section of the regex.
Then we know that the ( ) will only be matched if we're not inside double quotes. So we stick the space into a capture group, and then for every match we can simply check whether the capture group exists.
Using a positive look ahead, something like this should do it
'1 2 3 "test test" 7 8'.match(/(".*?"|[^"\s]+)(?=\s|$)/g)
Reg Exp Visualizer

How can I put data from csv file to array using jquery? [duplicate]

Where could I find some JavaScript code to parse CSV data?
You can use the CSVToArray() function mentioned in this blog entry.
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>
jQuery-CSV
It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.
Example:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
Update:
Oh yeah, I should also probably mention that it's completely configurable.
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
Update 2:
It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.
Update 3:
Since the Google Code shutdown, jquery-csv has been migrated to GitHub.
Disclaimer: I am also the author of jQuery-CSV.
Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.
Test it at http://jsfiddle.net/vHKYH/.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
I have an implementation as part of a spreadsheet project.
This code is not yet tested thoroughly, but anyone is welcome to use it.
As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
csvToArray v1.3
A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.
https://code.google.com/archive/p/csv-to-array/downloads
Common Usage: jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
Common usage: JavaScript
csvAsArray = csvAsString.csvToArray();
Override field separator
csvAsArray = csvAsString.csvToArray("|");
Override record separator
csvAsArray = csvAsString.csvToArray("", "#");
Override Skip Header
csvAsArray = csvAsString.csvToArray("", "", 1);
Override all
csvAsArray = csvAsString.csvToArray("|", "#", 1);
Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.
Here's another solution. This uses:
a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
also, has type correction differentiating strings, numbers, boolean values and null values
For the following input string:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
The code outputs:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
Here's my implementation of parseCSVLine() in a runnable code snippet:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );
Here's my simple vanilla JavaScript code:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...
This one seems to handle both.
I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.
var Strings = {
/**
* Wrapped CSV line parser
* #param s String delimited CSV string
* #param sep Separator override
* #attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js
Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';
const parseCSV = text => {
const lines = text.split('\n');
const output = [];
lines.forEach(line => {
line = line.trim();
if (line.length === 0) return;
const skipIndexes = {};
const columns = line.split(',');
output.push(columns.reduce((result, item, index) => {
if (skipIndexes[index]) return result;
if (item.startsWith('"') && !item.endsWith('"')) {
while (!columns[index + 1].endsWith('"')) {
index++;
item += `,${columns[index]}`;
skipIndexes[index] = true;
}
index++;
skipIndexes[index] = true;
item += `,${columns[index]}`;
}
result.push(item);
return result;
}, []));
});
return output;
};
console.log(parseCSV(csvString));
Personally I like to use deno std library since most modules are officially compatible with the browser
The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/
so you can use it simply like this
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std#0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
Just use .split(','):
var str = "How are you doing today?";
var n = str.split(" ");

Categories

Resources