non-recursive JavaScript JSON parser - javascript

I have a very large JSON string that I need to parse with in-browser JavaScript. Right now, in a few browsers, I run out of stack space. Unfortunately, my JSON can contain user strings, so I can't use eval or otherwise let the browser parse it.
I've looked at a few of the standard JavaScript JSON parsers, and they are recursive. Wondering if anyone knows of any JSON parser that is safe and non-recursive. I'd be willing for it to have fewer features -- I just have a giant array of objects.
Alternatively, if someone knows of one that might be easy to modify, that would be a big help too.
EDIT: On closer inspection the stack overflow is thrown by eval() used inside the parser. So, it must be recursive.

If eval throws stackoverflow, you can use this
http://code.google.com/p/json-sans-eval/
A JSON parser that doesn't use eval() at all.

I have written json parsers that are not recursive in several languages, but until now not in javascript. Instead of being recursive, this uses a local array named stack. In actionscript this was substantially faster and more memory efficient than recursion, and I assume javascript would be similar.
This implementation uses eval only for quoted strings with backslash escapes, as both an optimization and simplification. That could easily be replaced with the string handling from any other parser. The escape handling code is long and not related to recursion.
This implementation is not strict in (at least) the following ways. It treats 8 bit characters as whitespace. It allows leading "+" and "0" in numbers. It allows trailing "," in arrays and objects. It ignores input after the first result. So "[+09,]2" returns [9] and ignores "2".
function parseJSON( inJSON ) {
var result;
var parent;
var string;
var depth = 0;
var stack = new Array();
var state = 0;
var began , place = 0 , limit = inJSON.length;
var letter;
while ( place < limit ) {
letter = inJSON.charCodeAt( place++ );
if ( letter <= 0x20 || letter >= 0x7F ) { // whitespace or control
} else if ( letter === 0x22 ) { // " string
var slash = 0;
var plain = true;
began = place - 1;
while ( place < limit ) {
letter = inJSON.charCodeAt( place++ );
if ( slash !== 0 ) {
slash = 0;
} else if ( letter === 0x5C ) { // \ escape
slash = 1;
plain = false;
} else if ( letter === 0x22 ) { // " string
if ( plain ) {
result = inJSON.substring( began + 1 , place - 1 );
} else {
string = inJSON.substring( began , place );
result = eval( string ); // eval to unescape
}
break;
}
}
} else if ( letter === 0x7B ) { // { object
stack[depth++] = state;
stack[depth++] = parent;
parent = new Object();
result = undefined;
state = letter;
} else if ( letter === 0x7D ) { // } object
if ( state === 0x3A ) {
parent[stack[--depth]] = result;
state = stack[--depth];
}
if ( state === 0x7B ) {
result = parent;
parent = stack[--depth];
state = stack[--depth];
} else {
// error got } expected state {
result = undefined;
break;
}
} else if ( letter === 0x5B ) { // [ array
stack[depth++] = state;
stack[depth++] = parent;
parent = new Array();
result = undefined;
state = letter;
} else if ( letter === 0x5D ) { // ] array
if ( state === 0x5B ) {
if ( undefined !== result ) parent.push( result );
result = parent;
parent = stack[--depth];
state = stack[--depth];
} else {
// error got ] expected state [
result = undefined;
break;
}
} else if ( letter === 0x2C ) { // , delimiter
if ( undefined === result ) {
// error got , expected previous value
break;
} else if ( state === 0x3A ) {
parent[stack[--depth]] = result;
state = stack[--depth];
result = undefined;
} else if ( state === 0x5B ) {
parent.push( result );
result = undefined;
} else {
// error got , expected state [ or :
result = undefined;
break;
}
} else if ( letter === 0x3A ) { // : assignment
if ( state === 0x7B ) {
// could verify result is string
stack[depth++] = state;
stack[depth++] = result;
state = letter;
result = undefined;
} else {
// error got : expected state {
result = undefined;
break;
}
} else {
if ( ( letter >= 0x30 && letter <= 0x39 ) || letter === 0x2B || letter === 0x2D || letter === 0x2E ) {
var exponent = -2;
var real = ( letter === 0x2E );
var digits = ( letter >= 0x30 && letter <= 0x39 ) ? 1 : 0;
began = place - 1;
while ( place < limit ) {
letter = inJSON.charCodeAt( place++ );
if ( letter >= 0x30 && letter <= 0x39 ) { // digit
digits += 1;
} else if ( letter === 0x2E ) { // .
if ( real ) break;
else real = true;
} else if ( letter === 0x45 || letter === 0x65 ) { // e E
if ( exponent > began || 0 === digits ) break;
else exponent = place - 1;
real = true;
} else if ( letter === 0x2B || letter === 0x2D ) { // + -
if ( place != exponent + 2 ) break;
} else {
break;
}
}
place -= 1;
string = inJSON.substring( began , place );
if ( 0 === digits ) break; // error expected digits
if ( real ) result = parseFloat( string );
else result = parseInt( string , 10 );
} else if ( letter === 0x6E && 'ull' === inJSON.substr( place , 3 ) ) {
result = null;
place += 3;
} else if ( letter === 0x74 && 'rue' === inJSON.substr( place , 3 ) ) {
result = true;
place += 3;
} else if ( letter === 0x66 && 'alse' === inJSON.substr( place , 4 ) ) {
result = false;
place += 4;
} else {
// error unrecognized literal
result = undefined;
break;
}
}
if ( 0 === depth ) break;
}
return result;
}

I recommend you to divide JSON string in chunks, and bring them on demand. May be using AJAX too, you can have a recipe that just fit your needs.
Using a "divide and conquer" mechanism, I think you can still use common JSON parsing methods.
Hope that helps,

JSON parsing in browser is usually done with just eval, but preceding the eval with a regular expression "lint", that is supposed to make it safe to evaluate the JSON.
There is an example on this on wikipedia:
http://en.wikipedia.org/wiki/JSON#Security_issues

Related

check the alphabetical order

I am a newbie who is trying hard to have a grip on javascript. please help me to consolidate my fundamentals.
input will be a string of letters.
following are the requirements.
function should return true if following conditions satisfy:
letters are in alphabetical order. (case insensitive)
only one letter is passed as input. example :
isAlphabet ('abc') === true
isAlphabet ('aBc') === true
isAlphabet ('a') === true
isAlphabet ('mnoprqst') === false
isAlphabet ('') === false
isAlphabet ('tt') === false
function isAlphabet(letters) {
const string = letters.toLowerCase();
for (let i = 0; i < string.length; i++) {
const diff = string.charCodeAt(i + 1) - string.charCodeAt(i);
if (diff === 1) {
continue;
} else if (string === '') {
return false;
} else if (string.length === 1) {
return true;
} else {
return false;
}
}
return true;
}
It's generally a better practice to start your function off with dealing with the edge-cases rather than putting them somewhere in the middle. That way, the function returns as soon as it can - and it's a lot easier to read than a waterfall of if..else statements.
function isAlphabet(letters) {
if ("" == letters) {
return false;
}
if (1 == letters.length) {
return true;
}
const string = letters.toLowerCase();
// carry on with your loop here.
}
You've got the right idea, but it can be simplified to just fail on a particular error condition, i.e when a smaller character follows a larger one:
function isAlphabet(letters) {
const string = letters.toLowerCase();
let lastChar;
for (let i = 0; i < string.length; i++) {
// Grab a character
let thisChar = string.charCodeAt(i);
// Check for the failure case, when a lower character follows a higher one
if (i && (thisChar < lastChar)) {
return false;
}
// Store this character to check the next one
lastChar = thisChar;
}
// If it got this far then input is valid
return true;
}
console.log(isAlphabet("abc"));
console.log(isAlphabet("aBc"));
console.log(isAlphabet("acb"));
You can use the simple way to achieve the same as below
function isAlphabet(inputString)
{
var sortedString = inputString.toLowerCase().split("").sort().join("");
return sortedString == inputString.toLowerCase();
}
console.log("abc = " + isAlphabet("abc"));
console.log("aBc = " + isAlphabet("aBc"));
console.log("acb = " + isAlphabet("acb"));
console.log("mnoprqst = " + isAlphabet("mnoprqst"));
Note: Mark the answer is resolves your problem.

READ TEXT FILE AND CONVERT TO JSON IN JavaScript [duplicate]

Where could I find some JavaScript code to parse CSV data?
You can use the CSVToArray() function mentioned in this blog entry.
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>
jQuery-CSV
It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.
Example:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
Update:
Oh yeah, I should also probably mention that it's completely configurable.
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
Update 2:
It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.
Update 3:
Since the Google Code shutdown, jquery-csv has been migrated to GitHub.
Disclaimer: I am also the author of jQuery-CSV.
Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.
Test it at http://jsfiddle.net/vHKYH/.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
I have an implementation as part of a spreadsheet project.
This code is not yet tested thoroughly, but anyone is welcome to use it.
As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
csvToArray v1.3
A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.
https://code.google.com/archive/p/csv-to-array/downloads
Common Usage: jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
Common usage: JavaScript
csvAsArray = csvAsString.csvToArray();
Override field separator
csvAsArray = csvAsString.csvToArray("|");
Override record separator
csvAsArray = csvAsString.csvToArray("", "#");
Override Skip Header
csvAsArray = csvAsString.csvToArray("", "", 1);
Override all
csvAsArray = csvAsString.csvToArray("|", "#", 1);
Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.
Here's another solution. This uses:
a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
also, has type correction differentiating strings, numbers, boolean values and null values
For the following input string:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
The code outputs:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
Here's my implementation of parseCSVLine() in a runnable code snippet:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );
Here's my simple vanilla JavaScript code:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...
This one seems to handle both.
I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.
var Strings = {
/**
* Wrapped CSV line parser
* #param s String delimited CSV string
* #param sep Separator override
* #attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js
Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';
const parseCSV = text => {
const lines = text.split('\n');
const output = [];
lines.forEach(line => {
line = line.trim();
if (line.length === 0) return;
const skipIndexes = {};
const columns = line.split(',');
output.push(columns.reduce((result, item, index) => {
if (skipIndexes[index]) return result;
if (item.startsWith('"') && !item.endsWith('"')) {
while (!columns[index + 1].endsWith('"')) {
index++;
item += `,${columns[index]}`;
skipIndexes[index] = true;
}
index++;
skipIndexes[index] = true;
item += `,${columns[index]}`;
}
result.push(item);
return result;
}, []));
});
return output;
};
console.log(parseCSV(csvString));
Personally I like to use deno std library since most modules are officially compatible with the browser
The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/
so you can use it simply like this
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std#0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
Just use .split(','):
var str = "How are you doing today?";
var n = str.split(" ");

Get function parameter length including default params

If you make use of the Function.length property, you get the total amount of arguments that function expects.
However, according to the documentation (as well as actually trying it out), it does not include Default parameters in the count.
This number excludes the rest parameter and only includes parameters before the first one with a default value
- Function.length
Is it possible for me to somehow get a count (from outside the function) which includes Default parameters as well?
Maybe you can parse it yourself, something like:
function getNumArguments(func) {
var s = func.toString();
var index1 = s.indexOf('(');
var index2 = s.indexOf(')');
return s.substr(index1 + 1, index2 - index1 - 1).split(',').length;
}
console.log(getNumArguments(function(param1, param3 = 'test', ...param2) {})); //3
Copying my answer over to here from a duplicate question:
Well, it's a bit of a mess but I believe this should cover most edge cases.
It works by converting the function to a string and counting the commas, but ignoring commas that are in strings, in function calls, or in objects/arrays. I can't think of any scenarios where this won't return the proper amount, but I'm sure there is one, so this is in no way foolproof, but should work in most cases.
UPDATE: It's been pointed out to me that this won't work for cases such as getNumArgs(a => {}) or getNumArgs(function(a){}.bind(null)), so be aware of that if you try to use this.
function getNumArgs(func) {
var funcStr = func.toString();
var commaCount = 0;
var bracketCount = 0;
var lastParen = 0;
var inStrSingle = false;
var inStrDouble = false;
for (var i = 0; i < funcStr.length; i++) {
if (['(', '[', '{'].includes(funcStr[i]) && !inStrSingle && !inStrDouble) {
bracketCount++;
lastParen = i;
} else if ([')', ']', '}'].includes(funcStr[i]) && !inStrSingle && !inStrDouble) {
bracketCount--;
if (bracketCount < 1) {
break;
}
} else if (funcStr[i] === "'" && !inStrDouble && funcStr[i - 1] !== '\\') {
inStrSingle = !inStrSingle;
} else if (funcStr[i] === '"' && !inStrSingle && funcStr[i - 1] !== '\\') {
inStrDouble = !inStrDouble;
} else if (funcStr[i] === ',' && bracketCount === 1 && !inStrSingle && !inStrDouble) {
commaCount++;
}
}
// Handle no arguments (last opening parenthesis to the last closing one is empty)
if (commaCount === 0 && funcStr.substring(lastParen + 1, i).trim().length === 0) {
return 0;
}
return commaCount + 1;
}
Here are a few tests I tried it on: https://jsfiddle.net/ekzuvL0c/
Here is a function to retrieve the 'length' of a function (expression or object) or an arrow function expression (afe). It uses a regular expression to extract the arguments part from the stringified function/afe (the part between () or before =>) and a regular expression to cleanup default values that are strings. After the cleanups, it counts the comma's, depending on the brackets within the arguments string.
Note This will always be an approximation. There are edge cases that won't be covered. See the tests in this Stackblitz snippet
const determineFnLength = fnLenFactory();
console.log(`fnTest.length: ${determineFnLength(fnTest)}`);
function fnTest(a,
b,
c = 'with escaped \' quote and, comma',
d = "and double \" quotes, too!" ) { console.log(`test123`); }
function fnLenFactory() {
const fnExtractArgsRE = /(^[a-z_](?=(=>|=>{)))|((^\([^)].+\)|\(\))(?=(=>|{)))/g;
const valueParamsCleanupRE = /(?<=[`"'])([^\`,].+?)(?=[`"'])/g;
const countArgumentsByBrackets = params => {
let [commaCount, bracketCount, bOpen, bClose] = [0, 0, [...`([{`], [...`)]}`]];
[...params].forEach( chr => {
bracketCount += bOpen.includes(chr) ? 1 : bClose.includes(chr) ? -1 : 0;
commaCount += chr === ',' && bracketCount === 1 ? 1 : 0; } );
return commaCount + 1; };
const extractArgumentsPartFromFunction = fn => {
let fnStr = fn.toString().replace(RegExp(`\\s|function|${fn.name}`, `g`), ``);
fnStr = (fnStr.match(fnExtractArgsRE) || [fn])[0]
.replace(valueParamsCleanupRE, ``);
return !fnStr.startsWith(`(`) ? `(${fnStr})` : fnStr; };
return (func, forTest = false) => {
const params = extractArgumentsPartFromFunction(func);
const nParams = params === `()` ? 0 : countArgumentsByBrackets(params);
return forTest ? [params, nParams] : nParams;
};
}

splitting a string based on AND OR logic in javascript

My problem is to split a string which contains a logical operation.
For example, here is my sample string:
var rule = "device2.temperature > 20 || device2.humidity>68 && device3.temperature >10"
I need to parse that string in a way that I can easily operate my logic and I am not sure which approach would be better.
PS: Please keep in mind that those rule strings can have 10 or more different condition combinations, like 4 ANDs and 6 ORs.
Assuming no parentheses, I might go with something like this (JavaScript code):
function f(v,op,w){
var ops = {
'>': function(a,b){ return a > b; },
'<': function(a,b){ return a < b; },
'||': function(a,b){ return a || b; },
'&&': function(a,b){ return a && b; },
'==': function(a,b){ return a == b;}
}
if (ops[op]){
return ops[op](v,w);
} else alert('Could not recognize the operator, "' + op + '".');
}
Now if you can manage to get a list of expressions, you can evaluate them in series:
var exps = [[6,'>',7],'||',[12,'<',22], '&&', [5,'==',5]];
var i = 0,
result = typeof exps[i] == 'object' ? f(exps[i][0],exps[i][1],exps[i][2]) : exps[i];
i++;
while (exps[i] !== undefined){
var op = exps[i++],
b = typeof exps[i] == 'object' ? f(exps[i][0],exps[i][1],exps[i][2]) : exps[i];
result = f(result,op,b);
i++;
}
console.log(result);
If you are absolutely sure that the input is always going to be valid JavaScript
var rule = "device2.temperature > 20 || device2.humidity>68 && device3.temperature >10"
var rulePassed = eval(rule);
Keep in mind that in most cases "eval" is "evil" and has the potential to introduce more problems than it solves.
function parse(rule){
return Function("ctx", "return("+rule.replace(/[a-z$_][a-z0-9$_\.]*/gi, "ctx.$&")+")");
}
a little bit better than eval, since it will most likely throw errors, when sbd. tries to inject some code.
Because it will try to access these properties on the ctx-object instead of the window-object.
var rule = parse("device2.temperature > 20 || device2.humidity>68 && device3.temperature >10");
var data = {
device2: {
temperature: 18,
humidity: 70
},
device3: {
temperature: 15,
humidity: 75
}
};
console.log( rule.toString() );
console.log( rule(data) );
Overkill:
beware, not fully tested. may still contain errors
And, code doesn't check wether syntax is valid, only throws on a few obvious errors.
var parse = (function(){
function parse(){
var cache = {};
//this may be as evil as eval, so take care how you use it.
function raw(v){ return cache[v] || (cache[v] = Function("return " + v)) }
//parses Strings and converts them to operator-tokens or functions
function parseStrings(v, prop, symbol, number, string){
if(!prop && !symbol && !number && !string){
throw new Error("unexpected/unhandled symbol", v);
}else{
var w;
switch(prop){
//keywords
case "true":
case "false":
case "null":
w = raw( v );
break;
}
tokens.push(
w ||
~unary.indexOf(prop) && v ||
prop && parse.fetch(v) ||
number && raw( number ) ||
string && raw( string ) ||
symbol
);
}
}
var tokens = [];
for(var i = 0; i < arguments.length; ++i){
var arg = arguments[i];
switch(typeof arg){
case "number":
case "boolean":
tokens.push(raw( arg ));
break;
case "function":
tokens.push( arg );
break;
case "string":
//abusing str.replace() as kind of a RegEx.forEach()
arg.replace(matchTokens, parseStrings);
break;
}
}
for(var i = tokens.lastIndexOf("("), j; i>=0; i = tokens.lastIndexOf("(")){
j = tokens.indexOf(")", i);
if(j > 0){
tokens.splice(i, j+1-i, process( tokens.slice( i+1, j ) ));
}else{
throw new Error("mismatching parantheses")
}
}
if(tokens.indexOf(")") >= 0) throw new Error("mismatching parantheses");
return process(tokens);
}
//combines tokens and functions until a single function is left
function process(tokens){
//unary operators like
unary.forEach(o => {
var i = -1;
while((i = tokens.indexOf(o, i+1)) >= 0){
if((o === "+" || o === "-") && typeof tokens[i-1] === "function") continue;
tokens.splice( i, 2, parse[ unaryMapping[o] || o ]( tokens[i+1] ));
}
})
//binary operators
binary.forEach(o => {
for(var i = tokens.lastIndexOf(o); i >= 0; i = tokens.lastIndexOf(o)){
tokens.splice( i-1, 3, parse[ o ]( tokens[i-1], tokens[i+1] ));
}
})
//ternary operator
for(var i = tokens.lastIndexOf("?"), j; i >= 0; i = tokens.lastIndexOf("?")){
if(tokens[i+2] === ":"){
tokens.splice(i-1, 5, parse.ternary(tokens[i-1], tokens[i+1], tokens[i+3] ));
}else{
throw new Error("unexpected symbol")
}
}
if(tokens.length !== 1){
throw new Error("unparsed tokens left");
}
return tokens[0];
}
var unary = "!,~,+,-,typeof".split(",");
var unaryMapping = { //to avoid collisions with the binary operators
"+": "plus",
"-": "minus"
}
var binary = "**,*,/,%,+,-,<<,>>,>>>,<,<=,>,>=,==,!=,===,!==,&,^,|,&&,||".split(",");
var matchTokens = /([a-z$_][\.a-z0-9$_]*)|([+\-*/!~^]=*|[\(\)?:]|[<>&|=]+)|(\d+(?:\.\d*)?|\.\d+)|(["](?:\\[\s\S]|[^"])+["]|['](?:\\[\s\S]|[^'])+['])|\S/gi;
(function(){
var def = { value: null };
var odp = (k,v) => { def.value = v; Object.defineProperty(parse, k, def) };
unary.forEach(o => {
var k = unaryMapping[o] || o;
k in parse || odp(k, Function("a", "return function(ctx){ return " + o + "(a(ctx)) }"));
})
//most browsers don't support this syntax yet, so I implement this manually
odp("**", (a,b) => (ctx) => Math.pow(a(ctx), b(ctx)));
binary.forEach(o => {
o in parse || odp(o, Function("a,b", "return function(ctx){ return a(ctx) "+o+" b(ctx) }"));
});
odp("ternary", (c,t,e) => ctx => c(ctx)? t(ctx): e(ctx));
odp("fetch", key => {
var a = key.split(".");
return ctx => {
//fetches a path, like devices.2.temperature
//does ctx["devices"][2]["temperature"];
for(var i=0, v = ctx /*|| window*/; i<a.length; ++i){
if(v == null) return void 0;
v = v[a[i]];
}
return v;
}
});
/* some sugar */
var aliases = {
"or": "||",
"and": "&&",
"not": "!"
}
for(var name in aliases) odp(name, parse[aliases[name]]);
})();
return parse;
})();
and your code:
var data = {
device2: {
temperature: 18,
humidity: 70
},
device3: {
temperature: 15,
humidity: 75
}
};
//you get back a function, that expects the context to work on (optional).
//aka. (in wich context/object is `device2` defined?)
var rule = parse("device2.temperature > 20 || device2.humidity>68 && device3.temperature >10");
console.log("your rule resolved:", rule(data));
sugar:
var rule1 = parse("device2.temperature > 20");
var rule2 = parse("device2.humidity>68 && device3.temperature >10");
//partials/combining rules to new ones
//only `and` (a && b), `or` (a || b), `plus` (+value), `minus` (-value) and 'not', (!value) have named aliases
var rule3 = parse.or(rule1, rule2);
//but you can access all operators like this
var rule3 = parse['||'](rule1, rule2);
//or you can combine functions and strings
var rule3 = parse(rule1, "||", rule2);
console.log( "(", rule1(data), "||", rule2(data), ") =", rule3(data) );
//ternary operator and Strings (' and " supported)
var example = parse(rule1, "? 'device2: ' + device2.temperature : 'device3: ' + device3.temperature");
console.log( example(data) )
What else to know:
Code handles operator precedence and supports round brackets
If a Path can't be fetched, it the particular function returns undefined (no Errors thrown here)
Access to Array-keys in the paths: parse("devices.2.temperature") fetches devices[2].temperature
not implemented:
parsing Arrays and parsing function-calls and everything around value modification. This engine does some computation, it expects some Value in, and gives you a value out. No more, no less.

How can I put data from csv file to array using jquery? [duplicate]

Where could I find some JavaScript code to parse CSV data?
You can use the CSVToArray() function mentioned in this blog entry.
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>
jQuery-CSV
It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.
Example:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
Update:
Oh yeah, I should also probably mention that it's completely configurable.
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
Update 2:
It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.
Update 3:
Since the Google Code shutdown, jquery-csv has been migrated to GitHub.
Disclaimer: I am also the author of jQuery-CSV.
Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.
Test it at http://jsfiddle.net/vHKYH/.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
I have an implementation as part of a spreadsheet project.
This code is not yet tested thoroughly, but anyone is welcome to use it.
As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
csvToArray v1.3
A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.
https://code.google.com/archive/p/csv-to-array/downloads
Common Usage: jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
Common usage: JavaScript
csvAsArray = csvAsString.csvToArray();
Override field separator
csvAsArray = csvAsString.csvToArray("|");
Override record separator
csvAsArray = csvAsString.csvToArray("", "#");
Override Skip Header
csvAsArray = csvAsString.csvToArray("", "", 1);
Override all
csvAsArray = csvAsString.csvToArray("|", "#", 1);
Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.
Here's another solution. This uses:
a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
also, has type correction differentiating strings, numbers, boolean values and null values
For the following input string:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
The code outputs:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
Here's my implementation of parseCSVLine() in a runnable code snippet:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );
Here's my simple vanilla JavaScript code:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...
This one seems to handle both.
I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.
var Strings = {
/**
* Wrapped CSV line parser
* #param s String delimited CSV string
* #param sep Separator override
* #attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js
Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';
const parseCSV = text => {
const lines = text.split('\n');
const output = [];
lines.forEach(line => {
line = line.trim();
if (line.length === 0) return;
const skipIndexes = {};
const columns = line.split(',');
output.push(columns.reduce((result, item, index) => {
if (skipIndexes[index]) return result;
if (item.startsWith('"') && !item.endsWith('"')) {
while (!columns[index + 1].endsWith('"')) {
index++;
item += `,${columns[index]}`;
skipIndexes[index] = true;
}
index++;
skipIndexes[index] = true;
item += `,${columns[index]}`;
}
result.push(item);
return result;
}, []));
});
return output;
};
console.log(parseCSV(csvString));
Personally I like to use deno std library since most modules are officially compatible with the browser
The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/
so you can use it simply like this
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std#0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>
I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}
Just use .split(','):
var str = "How are you doing today?";
var n = str.split(" ");

Categories

Resources