I'm new to javaScript and am trying to load a CSV or TXT file into the browser.
When the file is selected an event handler displays the file name and details, once the user hits the load button the script should double check the file extension, load the file then carry out some further checks on the file.
My problem is that the file load function seems to always be called last meaning the other checks happen first.
The file is held here: http://bananamountain.net/project/20140703pm/file-loader2.html
Code pasted below:
</head>
<body>
<script>
// Check for the various File API support.
if (window.File && window.FileReader && window.FileList && window.Blob) {
// Great success! All the File APIs are supported.
} else {
alert('The File APIs are not fully supported in this browser.');
}
</script>
<h3>File Load test</h3>
<p>Use only test-data-csv.csv just now</p>
<input type="file" id="file" name="file" required="required" accept=".csv, .txt" />
<button onclick="handleFileLoad()">Load button</button>
<output id="list"></output>
<script>
// global variables
var content;
var fileName;
var splitString = ",";
var rows = new Array();
var headerRow = new Array();
var values = new Array();
function handleFileLoad() {
//var suitableFileType = checkFileType();
//document.write("<strong>Suitable file type: " + suitableFileType + "</strong><br />");
loadFile();
var suitableContent = checkFileContent();
document.write("<strong>Suitable file content: " + suitableContent + "</strong><br />");
}
function checkFileType() {
document.write("inside checkFileType<br/>");
// var testFile = fileName.split(".")[1].toUpperCase();
// document.write("file extension is '" + testFile+ "'<br />");
if ((fileName.split(".")[1].toUpperCase() === "CSV")) {
document.write('suitable file selected<br/>');
return (true);
} else if (fileName.split(".")[1].toUpperCase() === "TXT") {
document.write('suitable file selected<br/>');
return (true);
}else {
document.write('Invalid file format! \nPlease select a suitable .txt or .csv file<br/>');
return (false);
}
} // end of checkFileType - tested WORKING
function loadFile() {
// checkFileType();
var file = document.getElementById("file").files[0];
var reader = new FileReader();
var link_reg = /(http:\/\/|https:\/\/)/i;
reader.onload = function(file) {
// content = reader.result;
content = file.target.result;
rows = file.target.result.split(/[\r\n|\n]+/);
for (var i=0; i<rows.length; i++) {
document.write("row found at line " + i + " is " + rows[i] +".<br/>");
}
};
reader.readAsText(file);
/*
var suitableFileType = checkFileType();
document.write("<strong>Suitable file type: " + suitableFileType + "</strong><br />");
var suitableContent = checkFileContent();
document.write("<strong>Suitable file content: " + suitableContent + "</strong><br />");
var splitStringFound = getSplitString();
document.write("<strong>Split string found: " + splitStringFound + "</strong><br />");
document.write("<strong>Split String: " + splitString + "</strong><br/>");
var replacedHeaders = checkHeaderRow();
document.write("<strong>Header row complete<br />" + replacedHeaders +" headers replaced</strong><br/>");
document.write(content);
document.write(fileName);
document.write(splitString);
document.write(rows);
document.write( headerRow);
document.write(values);*/
return;
}
function checkFileContent() {
document.write("inside check file content<br/>");
// check for file content
// identifies blank lines and deletes them
// checking content of rows
for (var i=0;i<rows.length;i++) {
document.write ("Row " + i + " is " + rows[i]);
}
var filteredArr = rows.filter(function (val) {
return !(val === "" || typeof val == "undefined" || val === null || val === ",," || val === "\t\t");
});
// identifies empty file (e.g. all blank lines deleted)
if (filteredArr.length === 0) {
document.write("Empty file - no data found <br/>");
rows = filteredArr;
return false;
// check for row deletions
} else if (rows.length < filteredArr.length) {
rows = filteredArr;
document.write("blank rows deleted - " + (rows.length - filteredArr.length) + " rows remaining. <br/>");
return ("deletions");
} else {
document.write("No blank rows <br/>");
return true;
}
} // end of check file content - empty file tested, file with one line tested
function checkHeaderRow() {
// check for header row
// words in first non-empty row
var replaceCount = 0;
var checkArray = rows[0].split(splitString);
for (var i = 1; i < checkArray.length; i++) {
// start at array[1] as array[0] not likely to be a header value
// loop through inserting non numeric values into headerRow array
if (isNaN(checkArray[i])) {
headerRow[i - 1] = checkArray[i];
// need a flag to remove this from file once it has been done
replaceCount++;
} else {
headerRow[i - 1] = "Risk " + i;
}
}
// if non numeric values in array[1] delete rows[0]
// so the header row is not included with the data set
if (isNaN(checkArray[1])) {
rows[0] = null;
}
return (replaceCount);
} // end of checkHeaderRow works for all non-numeric, all numeric and mixed
function getSplitString() {
// call countCharacter to return count of comma and tab characters in first five lines
var tabCount = countCharacter("\t");
var commaCount = countCharacter(",");
// compare tabCount and commaCount values
if (tabCount === 0 && commaCount === 0) {
document.write("Cannot detect the value seperator,\n please ammend file to seperate values with tabs or commas");
return false;
}
else if (tabCount === commaCount) {
splitString = prompt("Cannot detect the value seperator,\n please input \"\\t\" for tabs or \",\" for commas");
if ((splitString === null) || (splitString != '\t') || (splitString != ',')) {
document.write("please check file and try again<br/>");
splitString = ',';
return false;
} // NOT WORKING
else {
return true;
}
} else if (tabCount>commaCount) {
splitString = "\t";
if (commaCount!=0) {
document.write("tab character selected as value seperator.<br/>");
// alert as this may not be the case
}
return true;
} else {
splitString=",";
if (tabCount!=0){
document.write("tab character selected as value seperator.<br/>");
// alert as this may not be the case
}
return true;
}
} // end of getSplitString - NOT FULLY WORKING
function splitRows() {
// what if rows is now empty? (e.g. header row only in file)
if (rows[0] != null) {
for (var i=0; i<rows.length;i++) {
values.push(rows[i].split(splitString));
}
return values;
} else {
return false;
}
} // end of splitRows fully working
function checkEmptyCells () {
for (var i=0; i<values.length; i++) {
for (var j=0; j<values[i].length; j++)
if (!((values[i][j] === "") || (typeof values[i][j] == "undefined") || (values[i][j] === null) || (values[i][j] === ",,") || (values[i][j] === "\t\t"))) {
// remove line values[i][j]
document.write("in here");
}
}
} // NOT FININSHED - STOPPED HERE
function countCharacter (character) {
// count the instances of a specified character over first 5 lines (or length of rows array)
// number of rows to loop through
var loopCount=0;
var characterCount=0;
if (rows.length < 5) {
loopCount = rows.length;
} else {
loopCount = 5;
}
for (var count=0; count < loopCount; count++) {
characterCount += rows[count].split(character).length-1;
}
return characterCount;
} // End of countCharacter - WORKING - TESTED
function handleFileSelect(evt) {
var files = evt.target.files; // FileList object
// files is a FileList of File objects. List some properties.
var output = [];
for (var i = 0, f; f = files[i]; i++) { // THIS IS NOT NEEDING TO BE IN A LOOP
output.push('<strong>', escape(f.name), '</strong> ', ' - ',
f.size, ' bytes, last modified: ',
f.lastModifiedDate ? f.lastModifiedDate.toLocaleDateString() : 'n/a',
'');
fileName = escape(f.name);
}
document.getElementById('list').innerHTML = '<div class="file-name">' + output.join('') + '</div>';
}
document.getElementById('file').addEventListener('change', handleFileSelect, false);
</script>
</body>
</html>
So first of all I think I asked the question wrong and it was for that reason that there were limited responses. I'm adding the answer so that if anyone has the same issue and comes across this that it can help them.
The problem was not in the html file loading but a file being loaded through javascript. The script carried out some checks on the file, loaded the file and then carried out further checks on the contents of the file.
This was all happening correctly however javascript does a thing called asyncronious loading where it calls the functions in turn but moves to the next function before the current function has finished doing what it is doing.
Imagine you go to a bar, order drinks, pay for your drinks and go to your table. Javascript would do this but without the normal pauses of waiting to get served, the useing of the drinks and getting change.
Essentially my code was going back to the table without drinks (or checking the contents of the file without it finishing loading).
To fix it I put a time out in, this probably isn't the best as the load speed will depend on the size of the file. However it works for just now and allows me to get on with other stuff.
A snippet of the working code is as follows:
{
function handleFileLoad() {
if (checkFileType()) {
values = [];
loadFile();
} else {
alert("Invalid file format! \nPlease select a suitable .txt or .csv file<br/>");
return;
}
//setTimeout(fileContentChecks(), 1000);
if (!setTimeout(fileContentChecks(), 1000)) {
return;
} else {
setData(); //PROBABLY PUT THESE IN A FUNCTION OR TWO
setComboLists(); //SO THESE CAN BE CALLED LATER TO UPDATE PAGE
UpdateAssetList();
UpdateXAxisList();
UpdateYAxisList();
UpdateTable();
}
}
function checkFileType() { // CHECK FILE NAME EXTENSION
if ((fileName.split(".")[1].toUpperCase() === "CSV")) {
return (true);
} else if (fileName.split(".")[1].toUpperCase() === "TXT") {
return (true);
} else {
return (false);
}
} // end of checkFileType - tested WORKING
function loadFile() { // LOADS FILE AND SPLITS INTO ROWS
var file = document.getElementById("file").files[0];
var reader = new FileReader();
var link_reg = /(http:\/\/|https:\/\/)/i;
reader.onload = function(file) {
content = file.target.result;
rows = file.target.result.split(/[\r\n|\n]+/);
};
reader.readAsText(file);
// NEEDS TIMEOUT HERE.....
return;
} // end of loadFile - TESTED WORKS WHEN STEPPING THROUGH - NEEDS TIMEOUT
function fileContentChecks() {
if (checkFileContent()) {
if (getSplitString()) {
checkHeaderRow();
} else {
alert("Seperator value not found"); // not sure if this is required?
return false;
}
} else {
alert("File contents not verified, please check file and try again.");
return false;
}
Related
I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.
I couldn't find anything on SO that matched my question. I'm using Sheetjs plugin to convert an excel sheet into json, and displaying it using jquery in the browser. I'm able to do the conversion and display, but I have a use-case where I need to validate each of the json rows with data returned from a jquery ajax 'GET' call.
I'm able to perform that validation as well. Once each excel json row is validated against the values from the ajax response, based on a set of rules, the excel json row is marked either a success row or an error row. For success rows, I perform no action. For error row, I need to add an additional key/value pair in the json element, denoting the error type, and the error description. Further, this error row, when displayed in the browser needs to have a css style with a color:red for red text, to indicate an error.
I haven't seen anything in Sheetjs documentation that might allow me to do this, but I'm pretty sure it can be done. In the code below, I have to modify the helper function called BindTable() in order to add the css style to set the text color to red IF it is an error row. I also have to somehow add a for each of the error rows in order to display the error type and error description.
In the below code, I need to be able to display the invalidRequests JSON object with the css style applied to display the text in red color. Or, if there is a way to directly manipulate the exceljson JSON object to somehow append the key/value pairs of MSG1/message to each of the error rows, that would be even better. I realize that due to the nature of this question, I can't create a jsfiddle, but any ideas/suggestion/comments would be extremely helpful, even if it doesn't provide the complete solution.
Expected format:
author1 JOHN DOE USA N.AMERICA
ERROR: THIS AUTHOR NAME ALREADY EXISTS IN THE SYSTEM!
This is the code that I currently have:
//Excel Reader
function ExcelToTable(event) {
event.preventDefault();
var regex = /^([a-zA-Z0-9\s_\\.\-:])+(.xlsx|.xls)$/;
/*Checks whether the file is a valid excel file*/
if (regex.test($("#excelfile").val().toLowerCase())) {
var xlsxflag = false; /*Flag for checking whether excel is .xls
format or .xlsx format*/
if ($("#excelfile").val().toLowerCase().indexOf(".xlsx") > 0) {
xlsxflag = true;
}
/*Checks whether the browser supports HTML5*/
if (typeof (FileReader) != "undefined") {
var reader = new FileReader();
reader.onload = function (e) {
var data = e.target.result;
//pre-process data
var binary = "";
var bytes = new Uint8Array(data);
var length = bytes.byteLength;
for(var i=0;i<length;i++){
binary += String.fromCharCode(bytes[i]);
}
// /pre-process data
/*Converts the excel data in to object*/
if (xlsxflag) {
// var workbook = XLSX.read(data, { type: 'binary' });
var workbook = XLSX.read(binary, {type: 'binary'});
}
else {
var workbook = XLS.read(binary, { type: 'binary' });
}
/*Gets all the sheetnames of excel in to a variable*/
var sheet_name_list = workbook.SheetNames;
// console.log('Sheet name list : ' + sheet_name_list);
var cnt = 0; /*This is used for restricting the script to
consider only first sheet of excel*/
// sheet_name_list.forEach(function (y) { /*Iterate through
all sheets*/
/*Convert the cell value to Json*/
if (xlsxflag) {
exceljson =
XLSX.utils.sheet_to_json(workbook.Sheets['CUSTOM_EXCEL_TAB'],{defval:
"NULL"});
var emptyAuthorCells =[];
var invalidCountryCells = [];
Object.keys(exceljson).forEach(function(value, key) {
if(exceljson[key].AUTHOR == 'ADD'){
}
else if(exceljson[key].AUTHOR == 'NULL'){
emptyAuthorCells.push({'MARKET':
exceljson[key].MARKET, 'REGION':exceljson[key].REGION,
'PARTNER':exceljson[key].PARTNER, 'AUTHOR': exceljson[key].AUTHOR });
}
//check effective end date
if((exceljson[key].DATE_ENDING != '') ||
(exceljson[key].DATE_ENDING <= getTodayDate())){
invalidCountryCells.push({
'MARKET': exceljson[key].MARKET,
'REGION':exceljson[key].REGION, 'PARTNER':exceljson[key].PARTNER, 'AUTHOR':
exceljson[key].AUTHOR
});
}
});
var emptyActionCellsMessage = "There were " +
emptyAuthorCells.length + " rows with Author=Null <br />";
var completedActionCellsMessage = " Success! There
were " + emptyAuthorCells.length + " rows with authro=Null <br />";
var invalidDateMsg = "There are missing or incorrect
date values.";
var validCompareDataMessage = "Success! All data has been successfully validated!";
var invalidCompareDataMessage = "Validation Failed!
Data does not match Rules.";
}
else {
var exceljson =
XLS.utils.sheet_to_row_object_array(workbook.Sheets[y]);
}
var conflictRows = [];
var returnedRows = [];
var errorReturnedRows = [];
if(emptyAuthorCells.length == 0){
var uniqueAuthor = $.unique(exceljson.map(function
(d){
return d.MARKET;
}));
var doAllValidations = function(){
var ajaxList = [];
var ajxIndex = 1;
$.each(uniqueAuthor, function (index, value){
var jqResponse =
$.ajax({
type: "get",
url: "authorlist.cfm?method=getlist&name=" +
value,
dataType: "json"
});
ajaxList.push(jqResponse);
jqResponse.then(
function( apiResponse ){
$.each (apiResponse, function (cc) {
if(apiResponse[cc].hasOwnProperty('SUCCESS')){
errorReturnedRows.push({
'success':
apiResponse[cc].SUCCESS,
'message':
apiResponse[cc].MESSAGE,
'country_code' : value
});
}
else{
returnedRows.push(apiResponse[cc]);
}
// }
// }
});
}
);
});
return ajaxList;
};
// /LOOP OVER country_code
}
var invalidRequests = [];
var validRequests = [];
$(function() {
var ajaxCalls = doAllValidations();
//begin apply
$.when.apply($, ajaxCalls).done(function(){
//console.log(ajaxList);
$('#hidReturnedRows').val();
$('#hidReturnedRows').val(JSON.stringify(returnedRows));
if (exceljson.length > 0 && cnt == 0) {
if((emptyAuthorCells.length != 0) ||
(errorReturnedRows.length!=0) ) {
//data is invalid
console.log("data is invalid");
$('#displayErrors tr
td.previewSuccessClass').html("");
$('#displayErrors tr
td.previewErrorsClass').html(emptyActionCellsMessage);
$('#export-file').addClass('hidebtn');
}
else{
//outer loop
var found = false;
var book_found = false;
var response_validation_errors = [];
var message = "The author's zone is
incorrect";
var message2 = "This book already
exists";
$.each(exceljson, function(x, ej){
// console.log("inside outer
loop");
found = false;
$.each(returnedRows, function(y,
rr){
//compare inner row with outer
row to make sure they're the same
if(rr.AUTHOR_ID == ej.ID &&
rr.AUTHOR_NAME == ej.NAME)
{
if((rr.AUTHOR ==
ej.NATIVE_AUTHOR) && (rr.BOOK_QUALITY == ej.AUTHOR_ZONE)){
// console.log("found!");
found = true;
}
}
});
if(found){
invalidRequests.push({
"AUTHOR": ej.NAME,
"AUTHOR_ZONE":
ej.AUTHOR_ZONE,
"COUNTRY": ej.COUNTRY
});
}
else{
validRequests.push(ej);
}
});
// /outer loop
}
BindTable(exceljson, '#exceltable');
cnt++;
}
})();
//end apply
});
};
if (xlsxflag) {/*If excel file is .xlsx extension than creates a
Array Buffer from excel*/
reader.readAsArrayBuffer($("#excelfile")[0].files[0]);
}
else {
reader.readAsBinaryString($("#excelfile")[0].files[0]);
}
}
else {
alert("Sorry! Your browser does not support HTML5!");
}
}
else {
alert("Please upload a valid Excel file!");
}
}
//Helper funcs
function BindTable(jsondata, tableid, invalidreqs) {/*Function used to convert the JSON
array to Html Table*/
var columns = BindTableHeader(jsondata, tableid); /*Gets all the column
headings of Excel*/
//ADDED .map() & .find() INSTEAD OF NESTED LOOPS
jsondata.map(a => {
// SEARCH FOR AN ELEMENT IN invalidreqs THAT MATCH THE
// CRITERIA TESTED FOR IN THE FUNCTION
if (invalidreqs.find(b => {
return a.AUTHOR == b.AUTHOR && a.BOOKNAME == b.BOOKNAME && a.COUNTRY ==
b.COUNTRY;
})) {
a.MSG = "THIS ROW ALREADY EXISTS";
}
});
console.log (jsondata);
//THE BELOW CODE NEEDS TO BE CHANGED
var row$ = $('<tr/>');
for (var colIndex = 0; colIndex < columns.length; colIndex++) {
var cellValue = jsondata[i][columns[colIndex]];
row$.append($('<td/>').html(cellValue));
}
//console.log("before table append");
$(tableid).append(row$);
if( has_error ){
row$.addClass( 'response-errors' );//add class to make text red
var error_row = $('<tr/>');
var error_cell = $('<td/>');
error_cell.attr('colspan', column.length); //set cols to span lenght of row
error_cell.html("SET ERROR MESSAGE TO DISPLAY BASED ON invalidreq object");
error_row.append( error_cell );
$( tableid ).append( error_row );
}
}
// /Outer loop
}
function BindTableHeader(jsondata, tableid) {/*Function used to get all
column names from JSON and bind the html table header*/
var columnSet = [];
var headerTr$ = $('<tr/>');
for (var i = 0; i < jsondata.length; i++) {
var rowHash = jsondata[i];
for (var key in rowHash) {
if (rowHash.hasOwnProperty(key)) {
if ($.inArray(key, columnSet) == -1) {/*Adding each unique
column names to a variable array*/
columnSet.push(key);
// console.log(key);
headerTr$.append($('<th/>').html(key));
}
}
}
}
$(tableid).append(headerTr$);
return columnSet;
}
Ok so what you want to do is:
1) Assign the row index to the invalidRequests object, on line 191 like this:
invalidRequests.push({
"AUTHOR": ej.NAME,
"AUTHOR_ZONE": ej.AUTHOR_ZONE,
"COUNTRY": ej.COUNTRY,
"index": x,
"MSG1": "Put the error message here"
});
Now it is very easy to determine which row has an error.
Since the invalidRequests is a private object of the ExcelTable function, you will need to
2) pass it on to the BindTable function like this:
BindTable(exceljson, '#exceltable', invalidRequests);
3) modify the BindTable function to check for invalidRequests and handle them:
function BindTable(jsondata, tableid, invalidreqs) {
var columns = BindTableHeader(jsondata, tableid);
for (var i = 0; i < jsondata.length; i++) {
//look for rows with error
var has_error = false
var invalidreq
for(var u=0;u<invalidreqs.length;u++){
if(i==invalidreqs[u].index){
//found invalid request belonging to current row, set flag
has_error = true
invalidreq = invalidreqs[u] // and store the current invalidrequest infos on temp obj
//break - not really needed
}
}
var row$ = $('<tr/>');
for (var colIndex = 0; colIndex < columns.length; colIndex++) {
var cellValue = jsondata[i][columns[colIndex]];
row$.append($('<td/>').html(cellValue));
}
$(tableid).append(row$);
if(has_error){
row$.addClass('error') // add css class which will make the text red or whatever
var error_row = $('<tr/>') // create error row
var error_cell = $('<td/>')
error_cell.attr('colspan',columns.length) // set column to span over all columns of table
error_cell.html(invalidreq.MSG1)
error_row.append(error_cell)
$(tableid).append(error_row);
}
}
}
Please note it is not clear, nor specified in your code, in which column the error should appear. Try to implement that yourself by pushing that info into the invalidRequests object and reading it out on BindTable.
I am using javascript to load in data from a XML file. The file is not being loaded in after an if statement that checks the ready state and the status. The ready state brings back 4 and the status brings back 200, so the last condition (the responseXML) should not be null, but for some reason, it remains null and the XML file is not loaded.
function load() {
try {
console.log("in load");
asyncRequest = new XMLHttpRequest();
asyncRequest.addEventListener("readystatechange", function() {
processResponse();
}, false);
asyncRequest.open('GET', 'Catalog.xml', true);
asyncRequest.send(null);
} catch (exception) {
alert("Request Failed");
console.log("failed");
}
}
function processResponse() {
console.log(asyncRequest.readyState + " response" + asyncRequest.status + asyncRequest.responseXML);
if (asyncRequest.readyState == 4 && asyncRequest.status == 200 && asyncRequest.responseXML) {
console.log("found");
var planets = asyncRequest.responseXML.getElementsByTagName("planet");
var name = document.getElementById("planetinfo").value;
console.log(name);
for (var i = 0; i < planets.length; ++i) {
var planet = planets.item(i);
var planetName = planet.getElementsByTagName("name").item(0).firstChild.nodeValue;
if (name == planetName) {
document.getElementById("name").innerHTML = planet.getElementsByTagName("name").item(0).firstChild.nodeValue;
document.getElementById("discovered").innerHTML = planet.getElementsByTagName("discovered").item(0).firstChild.nodeValue;
document.getElementById("distance").innerHTML = planet.getElementsByTagName("distance").item(0).firstChild.nodeValue;
document.getElementById("contact").innerHTML = planet.getElementsByTagName("contact").item(0).firstChild.nodeValue;
document.getElementById("image").innerHTML = "<img src='../images/" + planet.getElementsByTagName("image").item(0).firstChild.nodeValue + "' + '/ width = '250' height = '250'>";
}
}
}
}
This is the code from the javascript file that pertains to the loading of the XML. Opening up the console shows logs that tells me the code does not get past the if statement checking the asyncRequest.
I'm trying to create a parser to parse my coordinate data into json. The data is in a text file in a simple x,y format. I'm trying to get the the text before [i], is that possible with .split()?
Code:
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
var length = data.length;
var commaCount = 0;
for(var i=0;i<length;i++)
{
if(data[i] == ",")
{
commaCount += 1;
if(commaCount == 2)
{
//get text before [i]
}
}
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
If your data is delimited by commas like this x1,y1,x2,y2,...,xn,yn you can use the split function to split the string into tokens. Then you can iterate through them to collect whatever you need from the input.
For example if you need x and y pairs you would do something like this:
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
// split the string 'x1,y1,...,xn,yn' into tokens ['x1', 'y1', ... 'xn', 'yn']
var tokens = data.split(',');
// iterate over all tokens using a step of 2 (i += 2)
// Note: if you have an odd number of tokens the last one will be ignored
// (this is by design because you are expecting x,y pairs)
for(var i = 1; i < tokens.length; i += 2)
{
// print the (x,y) pair to the console
console.log("New pair (" + tokens[i-1] + "," + tokens[i] + ")");
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
I would add a pointer and a second array witch is filled by characters when you hit a comma join the array and you have the previews text,
Does it make any sense.
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
var length = data.length;
var commaCount = 0;
var charArray = [];
for(var i=0;i<length;i++)
{
if(data[i] == ",")
{
console.log('text' , charArray.join(''));
charArray = [];
commaCount += 1;
if(commaCount == 2)
{
//get text before [i]
}
}else {
charArry.puch(data[i]);
}
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
I got a csv-reader directive and let's user upload a csv file. I noticed that when I upload a file with spaces between words for example:
abc
abc
abc
abc
abc
this gets shown. I want to delete all the blank lines Not sure what to do.
var reader = new FileReader();
reader.onload = function(e) {
var contents = e.target.result;
var rows = contents.split('\n');
// Check if the last row is empty. This works
if(rows[rows.length-1] ===''){
rows.pop()
}
}
// this doesn't work for some reason. It doesn't detect the '' in the middle of the arrays.
for( var i=rows.length-1;i>0;i--){
if(rows[i] === ''){
rows.splice(i,1)
}
}
Try using Array.prototype.filter()
var rows = contents.split('\n').filter(function(str){
return str;
});
From what you have shown it looks like you want to check if each item in the csvModel is an empty string, rather than newValue
Something like:
for( var i=0 ;i< $scope.csvModel.length; i++){
if (csvModel[i] == "") {
$scope.csvModel.splice(i,1);
}
}
var text = [];
var target = $event.target || $event.srcElement;
var files = target.files;
if(Constants.validateHeaderAndRecordLengthFlag){
if(!this._fileUtil.isCSVFile(files[0])){
alert("Please import valid .csv file.");
this.fileReset();
}
}
var input = $event.target;
var reader = new FileReader();
reader.readAsText(input.files[0], 'UTF-8');
reader.onload = (data) => {
let csvData = reader.result;
let csvRecordsArray = csvData.split(/\r\n|\n/);
if (csvRecordsArray[csvRecordsArray.length - 1] === '') {
csvRecordsArray.pop();
}
var headerLength = -1;
if(Constants.isHeaderPresentFlag){
let headersRow = this._fileUtil.getHeaderArray(csvRecordsArray, Constants.tokenDelimeter);
headerLength = headersRow.length;
}
this.csvRecords = this._fileUtil.getDataRecordsArrayFromCSVFile(csvRecordsArray,
headerLength, Constants.validateHeaderAndRecordLengthFlag, Constants.tokenDelimeter);
if(this.csvRecords===null){
this.csvRecords=[];
}
else if(this.csvRecords!==null) {
if ((JSON.stringify(this.csvRecords[0])) === (JSON.stringify(this.csvFormate))) {
alert("format matches");
this.displayCsvContent = true;
for (let i = 0; i < this.csvRecords.length; i++) {
if (i !== 0) {
this.csvRecords[i].push(this.recordInsertedFlag);
}
}
}
else {
alert("format not matches");
}
}
if(this.csvRecords == null){
this.displayCsvContent=false;
//If control reached here it means csv file contains error, reset file.
this.fileReset();
}
};
reader.onerror = function () {
alert('Unable to read ' + input.files[0]);
};