I'm trying to create a parser to parse my coordinate data into json. The data is in a text file in a simple x,y format. I'm trying to get the the text before [i], is that possible with .split()?
Code:
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
var length = data.length;
var commaCount = 0;
for(var i=0;i<length;i++)
{
if(data[i] == ",")
{
commaCount += 1;
if(commaCount == 2)
{
//get text before [i]
}
}
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
If your data is delimited by commas like this x1,y1,x2,y2,...,xn,yn you can use the split function to split the string into tokens. Then you can iterate through them to collect whatever you need from the input.
For example if you need x and y pairs you would do something like this:
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
// split the string 'x1,y1,...,xn,yn' into tokens ['x1', 'y1', ... 'xn', 'yn']
var tokens = data.split(',');
// iterate over all tokens using a step of 2 (i += 2)
// Note: if you have an odd number of tokens the last one will be ignored
// (this is by design because you are expecting x,y pairs)
for(var i = 1; i < tokens.length; i += 2)
{
// print the (x,y) pair to the console
console.log("New pair (" + tokens[i-1] + "," + tokens[i] + ")");
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
I would add a pointer and a second array witch is filled by characters when you hit a comma join the array and you have the previews text,
Does it make any sense.
function visualize()
{
if(currDoc == null)
{
var location = window.prompt("Please enter the name of the dataset file, and make sure it is in the data directory. Current supported formats txt.");
location = "data/" + location;
jQuery.get(location, function(data) {
data = data.replace(/\s/g, '');
var length = data.length;
var commaCount = 0;
var charArray = [];
for(var i=0;i<length;i++)
{
if(data[i] == ",")
{
console.log('text' , charArray.join(''));
charArray = [];
commaCount += 1;
if(commaCount == 2)
{
//get text before [i]
}
}else {
charArry.puch(data[i]);
}
}
}, "text").fail(function(){ alert("File not found. Did you enter the file name correctly?") });
}
else
{
alert("A dataset is already visualized");
}
}
Related
I have implemented the following code to parse a csv file, convert to a JSON array and send the JSON result to apex controller, which invokes the batch class to process the DML operation for opportunityLineItem object. The code is working fine up to a maximum of 4000 rows (files have 22 columns with values). When there are 5000 records, the process throws an error and stops (it does not call the apex server). Why does it stop if there are 4000 records? Is there any limit for parsing the csv records in LWC?
Code:
if (!this.csvFile) {
console.log("file not found");
return;
}
this.showprogressbar = true;
let reader = new FileReader();
let ctx = this; // to control 'this' property in an event handler
reader.readAsText(this.csvFile, "Shift_JIS");
reader.onload = function (evt) {
console.log('reader:'+evt.target.result);
let payload = ctx.CSV2JSON(evt.target.result, ctx.CSVToArray);
let json = null;
let error = null;
console.log("payload:" + payload);
setJSON({
payload: payload,
crud: crud,
csvFile:ctx.csvFile
})
.then((result) => {
json = result;
var err = json.includes("Error");
console.log('err====='+err);
if(err)
{
console.log('json==###=='+json);
ctx.error = json;
console.log("error:"+ctx.error);
///s/ alert('error');
ctx.showloader=false;
ctx.hasError=true;
}
else{
ctx.jobinfo(json);
console.log("apex call setJSON() ===> success: " + json);
//ctx.error = undefined;
}
})
.catch((error) => {
error =ctx.error;
console.log("error:" + error.errorCode + ', message ' + error.message);
///s/ alert('error');
ctx.showloader=false;
ctx.hasError=true;
if (error && error.message) {
json = "{'no':1,'status':'Error', 'msg':'" + error.message + "'}";
} else {
json = "{'no':1,'status':'Error','msg':'Unknown error'}";
}
});
};
reader.onerror = function (evt) {
/*ctx.mycolumns = [
{ label: "no", fieldName: "no", type: "number", initialWidth: 50 },
{
label: "status",
fieldName: "status",
type: "text",
initialWidth: 100
},
{ label: "message", fieldName: "msg", type: "text" }
];
ctx.mydata = [{ no: "1", status: "Error", msg: "error reading file" }];
*/
//$A.util.toggleClass(spinner, "slds-hide"); // hide spinner
};
// ctx.showloader=false;
console.log("mydata:===" + ctx.mydata);
alert('onerror');
}
CSV2JSON(csv, csv2array) {
let array = csv2array(csv);
//console.log("csv:::"+csv);
//console.log("csv2array:"+csv2array);
let objArray = [];
//console.log("objArray:"+objArray);
var headervar = oppheader;//'Name,BillingCity,Type,Industry';
console.log('headervar:::'+headervar);
let headerArray = headervar.split(',');
for (let i = 1; i < array.length; i++) {
objArray[i - 1] = {};
/*for (let k = 0; k < array[0].length && k < array[i].length; k++) {
let key = array[0][k];
if(key === 'DW予定日')
elseif(key === 'DW予定日')
elseif(key === 'DW予定日')
console.log("key:"+key);
this.hearder=key;
objArray[i - 1][key] = array[i][k];
}*/
for (let k = 0; k < headerArray.length; k++) {
let key = headerArray[k];
console.log("key====:"+key);
this.hearder=key;
objArray[i - 1][key] = array[i][k];
}
}
objArray.pop();
//console.log("objArray:==="+objArray.length);
this.rowCount = objArray.length;
//console.log("rowCount+++++++" + this.rowCount);
let json = JSON.stringify(objArray);
//console.log("json:==="+json.length);
let str = json.replace("/},/g", "},\r\n");
//console.log("str:======="+str);
return str;
}
CSVToArray(strData, strDelimiter) {
console.log('CSVToArray');
// Check to see if the delimiter is defined. If not,
// then default to comma.
//console.log('strData:'+strData);
//console.log("strDelimiter::" + strDelimiter);
strDelimiter = strDelimiter || ",";
//console.log("strDelimiter:" + strDelimiter);
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
// Delimiters.
"(\\" +
strDelimiter +
"|\\r?\\n|\\r|^)" +
// Quoted fields.
'(?:"([^"]*(?:""[^"]*)*)"|' +
// Standard fields.
'([^"\\' +
strDelimiter +
"\\r\\n]*))",
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
// console.log("objPattern:" + objPattern);
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
// console.log("arrData:" + arrData);
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while ((arrMatches = objPattern.exec(strData))) {
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[1];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (strMatchedDelimiter.length && strMatchedDelimiter != strDelimiter) {
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push([]);
}
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[2]) {
// We found a quoted value. When we capture
// this value, unescape any double quotes.
var strMatchedValue = arrMatches[2].replace(new RegExp('""', "g"), '"');
} else {
// We found a non-quoted value.
var strMatchedValue = arrMatches[3];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[arrData.length - 1].push(strMatchedValue);
}
// Return the parsed data.
return arrData;
}
I couldn't find anything on SO that matched my question. I'm using Sheetjs plugin to convert an excel sheet into json, and displaying it using jquery in the browser. I'm able to do the conversion and display, but I have a use-case where I need to validate each of the json rows with data returned from a jquery ajax 'GET' call.
I'm able to perform that validation as well. Once each excel json row is validated against the values from the ajax response, based on a set of rules, the excel json row is marked either a success row or an error row. For success rows, I perform no action. For error row, I need to add an additional key/value pair in the json element, denoting the error type, and the error description. Further, this error row, when displayed in the browser needs to have a css style with a color:red for red text, to indicate an error.
I haven't seen anything in Sheetjs documentation that might allow me to do this, but I'm pretty sure it can be done. In the code below, I have to modify the helper function called BindTable() in order to add the css style to set the text color to red IF it is an error row. I also have to somehow add a for each of the error rows in order to display the error type and error description.
In the below code, I need to be able to display the invalidRequests JSON object with the css style applied to display the text in red color. Or, if there is a way to directly manipulate the exceljson JSON object to somehow append the key/value pairs of MSG1/message to each of the error rows, that would be even better. I realize that due to the nature of this question, I can't create a jsfiddle, but any ideas/suggestion/comments would be extremely helpful, even if it doesn't provide the complete solution.
Expected format:
author1 JOHN DOE USA N.AMERICA
ERROR: THIS AUTHOR NAME ALREADY EXISTS IN THE SYSTEM!
This is the code that I currently have:
//Excel Reader
function ExcelToTable(event) {
event.preventDefault();
var regex = /^([a-zA-Z0-9\s_\\.\-:])+(.xlsx|.xls)$/;
/*Checks whether the file is a valid excel file*/
if (regex.test($("#excelfile").val().toLowerCase())) {
var xlsxflag = false; /*Flag for checking whether excel is .xls
format or .xlsx format*/
if ($("#excelfile").val().toLowerCase().indexOf(".xlsx") > 0) {
xlsxflag = true;
}
/*Checks whether the browser supports HTML5*/
if (typeof (FileReader) != "undefined") {
var reader = new FileReader();
reader.onload = function (e) {
var data = e.target.result;
//pre-process data
var binary = "";
var bytes = new Uint8Array(data);
var length = bytes.byteLength;
for(var i=0;i<length;i++){
binary += String.fromCharCode(bytes[i]);
}
// /pre-process data
/*Converts the excel data in to object*/
if (xlsxflag) {
// var workbook = XLSX.read(data, { type: 'binary' });
var workbook = XLSX.read(binary, {type: 'binary'});
}
else {
var workbook = XLS.read(binary, { type: 'binary' });
}
/*Gets all the sheetnames of excel in to a variable*/
var sheet_name_list = workbook.SheetNames;
// console.log('Sheet name list : ' + sheet_name_list);
var cnt = 0; /*This is used for restricting the script to
consider only first sheet of excel*/
// sheet_name_list.forEach(function (y) { /*Iterate through
all sheets*/
/*Convert the cell value to Json*/
if (xlsxflag) {
exceljson =
XLSX.utils.sheet_to_json(workbook.Sheets['CUSTOM_EXCEL_TAB'],{defval:
"NULL"});
var emptyAuthorCells =[];
var invalidCountryCells = [];
Object.keys(exceljson).forEach(function(value, key) {
if(exceljson[key].AUTHOR == 'ADD'){
}
else if(exceljson[key].AUTHOR == 'NULL'){
emptyAuthorCells.push({'MARKET':
exceljson[key].MARKET, 'REGION':exceljson[key].REGION,
'PARTNER':exceljson[key].PARTNER, 'AUTHOR': exceljson[key].AUTHOR });
}
//check effective end date
if((exceljson[key].DATE_ENDING != '') ||
(exceljson[key].DATE_ENDING <= getTodayDate())){
invalidCountryCells.push({
'MARKET': exceljson[key].MARKET,
'REGION':exceljson[key].REGION, 'PARTNER':exceljson[key].PARTNER, 'AUTHOR':
exceljson[key].AUTHOR
});
}
});
var emptyActionCellsMessage = "There were " +
emptyAuthorCells.length + " rows with Author=Null <br />";
var completedActionCellsMessage = " Success! There
were " + emptyAuthorCells.length + " rows with authro=Null <br />";
var invalidDateMsg = "There are missing or incorrect
date values.";
var validCompareDataMessage = "Success! All data has been successfully validated!";
var invalidCompareDataMessage = "Validation Failed!
Data does not match Rules.";
}
else {
var exceljson =
XLS.utils.sheet_to_row_object_array(workbook.Sheets[y]);
}
var conflictRows = [];
var returnedRows = [];
var errorReturnedRows = [];
if(emptyAuthorCells.length == 0){
var uniqueAuthor = $.unique(exceljson.map(function
(d){
return d.MARKET;
}));
var doAllValidations = function(){
var ajaxList = [];
var ajxIndex = 1;
$.each(uniqueAuthor, function (index, value){
var jqResponse =
$.ajax({
type: "get",
url: "authorlist.cfm?method=getlist&name=" +
value,
dataType: "json"
});
ajaxList.push(jqResponse);
jqResponse.then(
function( apiResponse ){
$.each (apiResponse, function (cc) {
if(apiResponse[cc].hasOwnProperty('SUCCESS')){
errorReturnedRows.push({
'success':
apiResponse[cc].SUCCESS,
'message':
apiResponse[cc].MESSAGE,
'country_code' : value
});
}
else{
returnedRows.push(apiResponse[cc]);
}
// }
// }
});
}
);
});
return ajaxList;
};
// /LOOP OVER country_code
}
var invalidRequests = [];
var validRequests = [];
$(function() {
var ajaxCalls = doAllValidations();
//begin apply
$.when.apply($, ajaxCalls).done(function(){
//console.log(ajaxList);
$('#hidReturnedRows').val();
$('#hidReturnedRows').val(JSON.stringify(returnedRows));
if (exceljson.length > 0 && cnt == 0) {
if((emptyAuthorCells.length != 0) ||
(errorReturnedRows.length!=0) ) {
//data is invalid
console.log("data is invalid");
$('#displayErrors tr
td.previewSuccessClass').html("");
$('#displayErrors tr
td.previewErrorsClass').html(emptyActionCellsMessage);
$('#export-file').addClass('hidebtn');
}
else{
//outer loop
var found = false;
var book_found = false;
var response_validation_errors = [];
var message = "The author's zone is
incorrect";
var message2 = "This book already
exists";
$.each(exceljson, function(x, ej){
// console.log("inside outer
loop");
found = false;
$.each(returnedRows, function(y,
rr){
//compare inner row with outer
row to make sure they're the same
if(rr.AUTHOR_ID == ej.ID &&
rr.AUTHOR_NAME == ej.NAME)
{
if((rr.AUTHOR ==
ej.NATIVE_AUTHOR) && (rr.BOOK_QUALITY == ej.AUTHOR_ZONE)){
// console.log("found!");
found = true;
}
}
});
if(found){
invalidRequests.push({
"AUTHOR": ej.NAME,
"AUTHOR_ZONE":
ej.AUTHOR_ZONE,
"COUNTRY": ej.COUNTRY
});
}
else{
validRequests.push(ej);
}
});
// /outer loop
}
BindTable(exceljson, '#exceltable');
cnt++;
}
})();
//end apply
});
};
if (xlsxflag) {/*If excel file is .xlsx extension than creates a
Array Buffer from excel*/
reader.readAsArrayBuffer($("#excelfile")[0].files[0]);
}
else {
reader.readAsBinaryString($("#excelfile")[0].files[0]);
}
}
else {
alert("Sorry! Your browser does not support HTML5!");
}
}
else {
alert("Please upload a valid Excel file!");
}
}
//Helper funcs
function BindTable(jsondata, tableid, invalidreqs) {/*Function used to convert the JSON
array to Html Table*/
var columns = BindTableHeader(jsondata, tableid); /*Gets all the column
headings of Excel*/
//ADDED .map() & .find() INSTEAD OF NESTED LOOPS
jsondata.map(a => {
// SEARCH FOR AN ELEMENT IN invalidreqs THAT MATCH THE
// CRITERIA TESTED FOR IN THE FUNCTION
if (invalidreqs.find(b => {
return a.AUTHOR == b.AUTHOR && a.BOOKNAME == b.BOOKNAME && a.COUNTRY ==
b.COUNTRY;
})) {
a.MSG = "THIS ROW ALREADY EXISTS";
}
});
console.log (jsondata);
//THE BELOW CODE NEEDS TO BE CHANGED
var row$ = $('<tr/>');
for (var colIndex = 0; colIndex < columns.length; colIndex++) {
var cellValue = jsondata[i][columns[colIndex]];
row$.append($('<td/>').html(cellValue));
}
//console.log("before table append");
$(tableid).append(row$);
if( has_error ){
row$.addClass( 'response-errors' );//add class to make text red
var error_row = $('<tr/>');
var error_cell = $('<td/>');
error_cell.attr('colspan', column.length); //set cols to span lenght of row
error_cell.html("SET ERROR MESSAGE TO DISPLAY BASED ON invalidreq object");
error_row.append( error_cell );
$( tableid ).append( error_row );
}
}
// /Outer loop
}
function BindTableHeader(jsondata, tableid) {/*Function used to get all
column names from JSON and bind the html table header*/
var columnSet = [];
var headerTr$ = $('<tr/>');
for (var i = 0; i < jsondata.length; i++) {
var rowHash = jsondata[i];
for (var key in rowHash) {
if (rowHash.hasOwnProperty(key)) {
if ($.inArray(key, columnSet) == -1) {/*Adding each unique
column names to a variable array*/
columnSet.push(key);
// console.log(key);
headerTr$.append($('<th/>').html(key));
}
}
}
}
$(tableid).append(headerTr$);
return columnSet;
}
Ok so what you want to do is:
1) Assign the row index to the invalidRequests object, on line 191 like this:
invalidRequests.push({
"AUTHOR": ej.NAME,
"AUTHOR_ZONE": ej.AUTHOR_ZONE,
"COUNTRY": ej.COUNTRY,
"index": x,
"MSG1": "Put the error message here"
});
Now it is very easy to determine which row has an error.
Since the invalidRequests is a private object of the ExcelTable function, you will need to
2) pass it on to the BindTable function like this:
BindTable(exceljson, '#exceltable', invalidRequests);
3) modify the BindTable function to check for invalidRequests and handle them:
function BindTable(jsondata, tableid, invalidreqs) {
var columns = BindTableHeader(jsondata, tableid);
for (var i = 0; i < jsondata.length; i++) {
//look for rows with error
var has_error = false
var invalidreq
for(var u=0;u<invalidreqs.length;u++){
if(i==invalidreqs[u].index){
//found invalid request belonging to current row, set flag
has_error = true
invalidreq = invalidreqs[u] // and store the current invalidrequest infos on temp obj
//break - not really needed
}
}
var row$ = $('<tr/>');
for (var colIndex = 0; colIndex < columns.length; colIndex++) {
var cellValue = jsondata[i][columns[colIndex]];
row$.append($('<td/>').html(cellValue));
}
$(tableid).append(row$);
if(has_error){
row$.addClass('error') // add css class which will make the text red or whatever
var error_row = $('<tr/>') // create error row
var error_cell = $('<td/>')
error_cell.attr('colspan',columns.length) // set column to span over all columns of table
error_cell.html(invalidreq.MSG1)
error_row.append(error_cell)
$(tableid).append(error_row);
}
}
}
Please note it is not clear, nor specified in your code, in which column the error should appear. Try to implement that yourself by pushing that info into the invalidRequests object and reading it out on BindTable.
Please see my code below, I am interrogating active directory and retrieving back two fields, "name" and "cn". I want to concatenate these in an array and then assign to my drop down list. i.e. name + ' ' + cn. The code below publishes my results wrongly and is displaying all names and cn as individual results i.e. not concatenated.
Can someone advise me and put me in the right direction?
thanks,
George
try
{
// Get LDAP Context
ctx = LdapServices.getLdapContext();
//Specify the search scope
ctls.setSearchScope(SearchControls.ONELEVEL_SCOPE);
var searchFilter = "(&(objectClass=group))";
//Specify the Base for the search
var searchBase = "ou=Licensed Applications,ou=SCCM Apps,ou=Applications,ou=Groups,dc=XXX,dc=XX,dc=XX";
//initialize counter to total the group members
var totalResults = 0;
//Specify the attributes to return
var returnedAtts=["name", "cn"];
ctls.setReturningAttributes(returnedAtts);
//Search for objects using the filter
var answer = ctx.search(searchBase, searchFilter, ctls);
//Loop through the search results
while (answer.hasMoreElements())
{
var sr = answer.next();
var attrs = sr.getAttributes();
if (attrs != null)
{
try
{
for (var ae = attrs.getAll();ae.hasMore();)
{
var attr = ae.next();
var pos = attr.toString().indexOf(":",0);
var attributeName = attr.toString().substring(0,pos);
var name = "";
var cn = "";
for (var e = attr.getAll();e.hasMore();totalResults++)
{
if(attributeName == "name")
{
name = e.next().replace('SCCM_','');
}
if(attributeName == "cn")
{
cn = e.next();
}
}
listItems.push(name + ' (' + cn + ')');
}
}
catch (e)
{
log("Problem listing items: " + e);
}
}
}
}
catch (e)
{
log("Problem searching directory: " + e);
}
finally
{
// Close LDAP Context
ctx.close();
}
I don't know what javascript library you use to have JNDI like code into, but from the purely LDAP point of view :
An attribute can be multi-valued, so every attributes value are returned in a array, even if single valued (possible exception for the dn), for example :
{
"dn":"cn=user,dc=example,dc=com",
"name":["username"],
"cn":["commonname"]
}
If your library works like JNDI, a way to do it could be :
After the line : var attrs = sr.getAttributes();
if (attrs != null) {
try {
log ("name: " + attrs.get("name").get());
log ("cn: " + attrs.get("cn").get());
} catch (e) {
log ("Problem listing attributes from Global Catalog: " + e);
}
}
I am attempting to create a web scraper (in node.js) that will pull down information from a site, and write it to a file. I have it built to correctly work for one page, but when I try to use the function in a for loop, to iterate through multiple games, I get bad data in all of the games.
I understand that this is related to Javascript's asynchronous nature, and I have read about callback functions, but I'm not sure I understand how to apply it to my code. Any help would be GREATLY appreciated:
for(x = 4648; x < 4650; x++){ //iterate over a few gameIDs, used in URL for request
scrapeGame(x);
}
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
Essentially, what I am looking to do, is within the for loop, tell it to WAIT to finish the scrapeGame(x) function before incrementing x and running it for the next game -- otherwise, the arrays start to overwrite each other and the data becomes a huge mess.
EDIT: I've now included the full code which I am attempting to run! I'm getting errors when looking in the files after they are written. For example, the first file is 8kb, second is ~16, 3rd is ~32, etc. It seems things aren't getting cleared before running the next game.
Idea of the program is to pull Jeopardy questions/answers from the archive site in order to eventually build a quiz app for myself.
//Iterate over arbitrary number of games, scrape each
for(x = 4648; x < 4650; x++){
scrapeGame(x, function(scrapeResult) {
if(scrapeResult){
console.log('Scrape Successful');
} else {
console.log('Scrape ERROR');
}
});
}
function scrapeGame(gameId, callback){
var request = require('request');
cheerio = require('cheerio');
fs = require('fs');
categories = [];
categorylist = [];
ids = [];
clues = [];
values = ['0','$200','$400','$600','$800','$1000','$400','$800','$1200','$1600','$2000'];
valuelist = [];
answers = [];
array = [];
file = [];
status = false;
var showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId;
var showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function(){
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function(){
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1){
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1){
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function(){
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
$('.correct_response').each(function(){
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for(var i = 0; i < array[0].length; i++){
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId +'.txt', stringFile, function(err) {
if(err) {
console.log(err);
} else {
console.log("Game #" + gameId + " has been scraped.");
status = true;
}
});
}
});
}
});
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
//feed callback status
callback(status);
}
// Iterate over a few gameIDs, used in URL for request.
for (x = 4648; x < 4650; x++) {
// Pass in the callback as an anonymous function.
// So below I am passing in the id and the function I want to execute.
// AND, defining the results I am expecting as passed in arguments.
scrapeGame(x, function(scrapeResult, err) {
// This will *NOT* execute *UNTIL* you call it in the function below.
// That means that the for loop's execution is halted.
// This function receives the status that is passed in,
// in this case, a boolean true/false and an error if any.
if (scrapeResult) {
// Scrape was true, nothing to do.
// The for loop will now move on to the next iteration.
console.log('Scrape Successful');
} else {
// Scrape was false, output error to console.log and
// break loop to handle error.
console.log('Scrape ERROR :: ' + err);
// Notice we are calling break while in the
// scope of the callback function
// Remove the break if you want to just move onto
// the next game ID and not stop the loop
break;
}
});
}
// This function now accepts two arguments.
function scrapeGame(gameId, callback) {
// ************************************************
// ** Do Your Work Here **
// Request from URL, scrape HTML to arrays as necessary.
// Write final array to file.
// After file creation, execute the callback and pass bool
// status (true/false).
// ************************************************
var request = require('request'),
cheerio = require('cheerio'),
fs = require('fs'),
categories = [],
categorylist = [],
ids = [],
clues = [],
values = [
'0',
'$200',
'$400',
'$600',
'$800',
'$1000',
'$400',
'$800',
'$1200',
'$1600',
'$2000'
],
valuelist = [],
answers = [],
array = [],
file = [],
showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId,
showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function() {
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function() {
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1) {
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1) {
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function() {
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
$('.correct_response').each(function() {
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for (var i = 0; i < array[0].length; i++) {
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId + '.txt', stringFile, function(err) {
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
if (err) {
// ******************************************
// Callback false with error.
callback(false, err);
// ******************************************
} else {
console.log("Game #" + gameId + " has been scraped.");
// ******************************************
// Callback true with no error.
callback(true);
// ******************************************
}
});
}
});
}
});
}
My assumption is that you want them to be scraped one after one, not in parallel. So, for loop won't help. The following approach should do the trick:
var x = 4648;
var myFunc = scrapeGame(x, function cb(){
if(x >= 4650){
return;
}
x++;
return myFunc(x, cb);
});
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
For nested async function, where you want them be executed in serial manner, you should just forget about for loop.
An example of correct request handling with http client:
function scrapeGame(gameId, cb){
//your code and set options
http.request(options, function(response){
var result = "";
response.on('data', function (chunk) {
result += chunk;
});
response.on('end',function(){
//write data here;
//do the callback
cb();
});
});
}
I solved the ROOT cause of the issue that I was seeing, though I do believe without the callback assistance from red above, I would have been just as lost.
Turns out the data was processing correctly, but the file write was scrambling. Turns out that there is a different method to call instead of writeFile or appendFile:
fs.appendFileSync();
Calling the Synchronous version processed the writes to the file IN THE ORDER they got appended to the file, instead of just going for it. This, in addition to the callback help above, solved the issue.
Thanks to everyone for the assistance!
I'm new to javaScript and am trying to load a CSV or TXT file into the browser.
When the file is selected an event handler displays the file name and details, once the user hits the load button the script should double check the file extension, load the file then carry out some further checks on the file.
My problem is that the file load function seems to always be called last meaning the other checks happen first.
The file is held here: http://bananamountain.net/project/20140703pm/file-loader2.html
Code pasted below:
</head>
<body>
<script>
// Check for the various File API support.
if (window.File && window.FileReader && window.FileList && window.Blob) {
// Great success! All the File APIs are supported.
} else {
alert('The File APIs are not fully supported in this browser.');
}
</script>
<h3>File Load test</h3>
<p>Use only test-data-csv.csv just now</p>
<input type="file" id="file" name="file" required="required" accept=".csv, .txt" />
<button onclick="handleFileLoad()">Load button</button>
<output id="list"></output>
<script>
// global variables
var content;
var fileName;
var splitString = ",";
var rows = new Array();
var headerRow = new Array();
var values = new Array();
function handleFileLoad() {
//var suitableFileType = checkFileType();
//document.write("<strong>Suitable file type: " + suitableFileType + "</strong><br />");
loadFile();
var suitableContent = checkFileContent();
document.write("<strong>Suitable file content: " + suitableContent + "</strong><br />");
}
function checkFileType() {
document.write("inside checkFileType<br/>");
// var testFile = fileName.split(".")[1].toUpperCase();
// document.write("file extension is '" + testFile+ "'<br />");
if ((fileName.split(".")[1].toUpperCase() === "CSV")) {
document.write('suitable file selected<br/>');
return (true);
} else if (fileName.split(".")[1].toUpperCase() === "TXT") {
document.write('suitable file selected<br/>');
return (true);
}else {
document.write('Invalid file format! \nPlease select a suitable .txt or .csv file<br/>');
return (false);
}
} // end of checkFileType - tested WORKING
function loadFile() {
// checkFileType();
var file = document.getElementById("file").files[0];
var reader = new FileReader();
var link_reg = /(http:\/\/|https:\/\/)/i;
reader.onload = function(file) {
// content = reader.result;
content = file.target.result;
rows = file.target.result.split(/[\r\n|\n]+/);
for (var i=0; i<rows.length; i++) {
document.write("row found at line " + i + " is " + rows[i] +".<br/>");
}
};
reader.readAsText(file);
/*
var suitableFileType = checkFileType();
document.write("<strong>Suitable file type: " + suitableFileType + "</strong><br />");
var suitableContent = checkFileContent();
document.write("<strong>Suitable file content: " + suitableContent + "</strong><br />");
var splitStringFound = getSplitString();
document.write("<strong>Split string found: " + splitStringFound + "</strong><br />");
document.write("<strong>Split String: " + splitString + "</strong><br/>");
var replacedHeaders = checkHeaderRow();
document.write("<strong>Header row complete<br />" + replacedHeaders +" headers replaced</strong><br/>");
document.write(content);
document.write(fileName);
document.write(splitString);
document.write(rows);
document.write( headerRow);
document.write(values);*/
return;
}
function checkFileContent() {
document.write("inside check file content<br/>");
// check for file content
// identifies blank lines and deletes them
// checking content of rows
for (var i=0;i<rows.length;i++) {
document.write ("Row " + i + " is " + rows[i]);
}
var filteredArr = rows.filter(function (val) {
return !(val === "" || typeof val == "undefined" || val === null || val === ",," || val === "\t\t");
});
// identifies empty file (e.g. all blank lines deleted)
if (filteredArr.length === 0) {
document.write("Empty file - no data found <br/>");
rows = filteredArr;
return false;
// check for row deletions
} else if (rows.length < filteredArr.length) {
rows = filteredArr;
document.write("blank rows deleted - " + (rows.length - filteredArr.length) + " rows remaining. <br/>");
return ("deletions");
} else {
document.write("No blank rows <br/>");
return true;
}
} // end of check file content - empty file tested, file with one line tested
function checkHeaderRow() {
// check for header row
// words in first non-empty row
var replaceCount = 0;
var checkArray = rows[0].split(splitString);
for (var i = 1; i < checkArray.length; i++) {
// start at array[1] as array[0] not likely to be a header value
// loop through inserting non numeric values into headerRow array
if (isNaN(checkArray[i])) {
headerRow[i - 1] = checkArray[i];
// need a flag to remove this from file once it has been done
replaceCount++;
} else {
headerRow[i - 1] = "Risk " + i;
}
}
// if non numeric values in array[1] delete rows[0]
// so the header row is not included with the data set
if (isNaN(checkArray[1])) {
rows[0] = null;
}
return (replaceCount);
} // end of checkHeaderRow works for all non-numeric, all numeric and mixed
function getSplitString() {
// call countCharacter to return count of comma and tab characters in first five lines
var tabCount = countCharacter("\t");
var commaCount = countCharacter(",");
// compare tabCount and commaCount values
if (tabCount === 0 && commaCount === 0) {
document.write("Cannot detect the value seperator,\n please ammend file to seperate values with tabs or commas");
return false;
}
else if (tabCount === commaCount) {
splitString = prompt("Cannot detect the value seperator,\n please input \"\\t\" for tabs or \",\" for commas");
if ((splitString === null) || (splitString != '\t') || (splitString != ',')) {
document.write("please check file and try again<br/>");
splitString = ',';
return false;
} // NOT WORKING
else {
return true;
}
} else if (tabCount>commaCount) {
splitString = "\t";
if (commaCount!=0) {
document.write("tab character selected as value seperator.<br/>");
// alert as this may not be the case
}
return true;
} else {
splitString=",";
if (tabCount!=0){
document.write("tab character selected as value seperator.<br/>");
// alert as this may not be the case
}
return true;
}
} // end of getSplitString - NOT FULLY WORKING
function splitRows() {
// what if rows is now empty? (e.g. header row only in file)
if (rows[0] != null) {
for (var i=0; i<rows.length;i++) {
values.push(rows[i].split(splitString));
}
return values;
} else {
return false;
}
} // end of splitRows fully working
function checkEmptyCells () {
for (var i=0; i<values.length; i++) {
for (var j=0; j<values[i].length; j++)
if (!((values[i][j] === "") || (typeof values[i][j] == "undefined") || (values[i][j] === null) || (values[i][j] === ",,") || (values[i][j] === "\t\t"))) {
// remove line values[i][j]
document.write("in here");
}
}
} // NOT FININSHED - STOPPED HERE
function countCharacter (character) {
// count the instances of a specified character over first 5 lines (or length of rows array)
// number of rows to loop through
var loopCount=0;
var characterCount=0;
if (rows.length < 5) {
loopCount = rows.length;
} else {
loopCount = 5;
}
for (var count=0; count < loopCount; count++) {
characterCount += rows[count].split(character).length-1;
}
return characterCount;
} // End of countCharacter - WORKING - TESTED
function handleFileSelect(evt) {
var files = evt.target.files; // FileList object
// files is a FileList of File objects. List some properties.
var output = [];
for (var i = 0, f; f = files[i]; i++) { // THIS IS NOT NEEDING TO BE IN A LOOP
output.push('<strong>', escape(f.name), '</strong> ', ' - ',
f.size, ' bytes, last modified: ',
f.lastModifiedDate ? f.lastModifiedDate.toLocaleDateString() : 'n/a',
'');
fileName = escape(f.name);
}
document.getElementById('list').innerHTML = '<div class="file-name">' + output.join('') + '</div>';
}
document.getElementById('file').addEventListener('change', handleFileSelect, false);
</script>
</body>
</html>
So first of all I think I asked the question wrong and it was for that reason that there were limited responses. I'm adding the answer so that if anyone has the same issue and comes across this that it can help them.
The problem was not in the html file loading but a file being loaded through javascript. The script carried out some checks on the file, loaded the file and then carried out further checks on the contents of the file.
This was all happening correctly however javascript does a thing called asyncronious loading where it calls the functions in turn but moves to the next function before the current function has finished doing what it is doing.
Imagine you go to a bar, order drinks, pay for your drinks and go to your table. Javascript would do this but without the normal pauses of waiting to get served, the useing of the drinks and getting change.
Essentially my code was going back to the table without drinks (or checking the contents of the file without it finishing loading).
To fix it I put a time out in, this probably isn't the best as the load speed will depend on the size of the file. However it works for just now and allows me to get on with other stuff.
A snippet of the working code is as follows:
{
function handleFileLoad() {
if (checkFileType()) {
values = [];
loadFile();
} else {
alert("Invalid file format! \nPlease select a suitable .txt or .csv file<br/>");
return;
}
//setTimeout(fileContentChecks(), 1000);
if (!setTimeout(fileContentChecks(), 1000)) {
return;
} else {
setData(); //PROBABLY PUT THESE IN A FUNCTION OR TWO
setComboLists(); //SO THESE CAN BE CALLED LATER TO UPDATE PAGE
UpdateAssetList();
UpdateXAxisList();
UpdateYAxisList();
UpdateTable();
}
}
function checkFileType() { // CHECK FILE NAME EXTENSION
if ((fileName.split(".")[1].toUpperCase() === "CSV")) {
return (true);
} else if (fileName.split(".")[1].toUpperCase() === "TXT") {
return (true);
} else {
return (false);
}
} // end of checkFileType - tested WORKING
function loadFile() { // LOADS FILE AND SPLITS INTO ROWS
var file = document.getElementById("file").files[0];
var reader = new FileReader();
var link_reg = /(http:\/\/|https:\/\/)/i;
reader.onload = function(file) {
content = file.target.result;
rows = file.target.result.split(/[\r\n|\n]+/);
};
reader.readAsText(file);
// NEEDS TIMEOUT HERE.....
return;
} // end of loadFile - TESTED WORKS WHEN STEPPING THROUGH - NEEDS TIMEOUT
function fileContentChecks() {
if (checkFileContent()) {
if (getSplitString()) {
checkHeaderRow();
} else {
alert("Seperator value not found"); // not sure if this is required?
return false;
}
} else {
alert("File contents not verified, please check file and try again.");
return false;
}