How to convert excel data to json format in node js? - javascript

I am a beginner in nodejs, I was tasked to read a excel file and convert it into json format so that it can be stored into mongodb database.
excel2.js file:
const XlsxStreamReader = require("xlsx-stream-reader");
var fs = require('fs');
const config = require('./excelpush.json');
const db = require('./dbmanager.js');
var finalList = [];
var singlePerson = {};
class ExcelReader {
readFile() {
var workBookReader = new XlsxStreamReader();
workBookReader.on('error', function (error) {
throw (error);
})
workBookReader.on('worksheet', function (workSheetReader) {
if (workSheetReader.id > 1) {
workSheetReader.skip();
return;
}
var isFirstLine = true;
var headerIndex = [];
workSheetReader.on('row', function (row) {
if (isFirstLine) {
headerIndex = row.values.slice(1);
}
else if (!isFirstLine) {
let rowValues = row.values.slice(1);
let valueIndex = 0;
headerIndex.forEach(currentval => {
singlePerson[currentval] = rowValues[valueIndex];
valueIndex++;
});
finalList.push(singlePerson);
}
isFirstLine = false;
});
workSheetReader.on('end', function () {
});
workSheetReader.process()
});
workBookReader.on('end', function () {
//console.log(finalList);
console.log('finished!');
});
fs.createReadStream(config.filePath).pipe(workBookReader);
}
}
excelReader = new ExcelReader();
excelReader.readFile();
db.connectDb();
db.insertDb(finalList);
dbmanager.js file:
var mongoose = require('mongoose');
const config = require('./db.json');
function connectDb() {
mongoose.connect(`mongodb://${config.dbConfig.host}:${config.dbConfig.port}/${config.dbConfig.dbName}`);
mongoose.connection.once('open', function () {
console.log('Connection has been made');
}).on('error', function (error) {
console.log('error is:' + error);
})
}
function insertDb(list) {
var myschema = new mongoose.Schema({}, { strict: false });
var obj = mongoose.model('myschema', myschema, `${config.dbConfig.collectionName}`);
var obj1 = new obj(list);
obj1.save();
}
module.exports = {
connectDb, insertDb
}
db.json file:
{
"dbConfig": {
"host": "localhost",
"port": "27017",
"dbName": "PRACTICE",
"collectionName": "Excel"
}
}
excelpush.json file:
{
"filePath": "D:\\grv_tracx_updt (1).xlsx"
}
here excel2.js is taking excel file from excel2.json and reading it and storing as a array of objects in finalList variable.
In dbmanager.js file db connection and insertion code are written.
Here i am not able to store data in database, The code gets executed perfectly but data in not stored in mongodb database.
Note: excel file is large.

Related

Updating Firestore Document with GoogleSheet AppScript

I'm finding it hard to update the fields in a firestore collection. The documentIDs of the persons are auto-generated. The inlined code below shows what I've been able to derive and make work, from the tutorials I have followed. Except for the 'updateFirestoreDocument' function, everything else works without error. How do I rewrite the code to export the modified cells on Google Sheets to the right persons' fields on the firestore collection? Thanks
function onOpen() {
SpreadsheetApp.getUi().createMenu('🔥 Firebase')
.addItem('⏪ Export to Firestore', 'main')
.addItem('⏩ Import from Firestore', 'menuImport')
.addToUi();
}
function main() {
var email = "fireb...ccount.com";
var key = "-----BEGI...rxEp...RIVATE KEY-----\n";
var projectId = "co...t";
var sheet = SpreadsheetApp.getActiveSheet();
var sheetName = sheet.getName();
var properties = getProperties(sheet);
var records = getRecords(sheet);
var firestore = FirestoreApp.getFirestore(email, key, projectId);
updateFirestoreDocument(firestore, sheetName, documentId, properties, data);
exportToFirestore(firestore, sheetName, properties, records);
}
function updateFirestoreDocument(firestore, collectionName, documentId, properties, data) {
var documentRef = firestore.getDocument(collectionName, documentId);
if (documentRef.exists()) {
properties.forEach(function(prop) {
documentRef.updateData(prop, data[prop]);
});
} else {
firestore.createDocument(collectionName, documentId, data);
}
}
function exportToFirestore(firestore, collectionName, properties, records) {
records.map(function(record) {
var data = {};
properties.forEach(function(prop,i) { data[prop] = record[i]; });
if (data[properties[0]] === undefined || data[properties[0]] === null) {
return;
}
// var documentId = data[properties[1]]; // first column
firestore.createDocument(collectionName, data);
// firestore.createDocument(collectionName, documentId, data, { id: documentId });
});
}
function getProperties(sheet){
return sheet.getRange(2, 1, 1, sheet.getLastColumn()).getValues()[0];
}
function getRecords(sheet) {
var data = sheet.getDataRange().getValues();
var dataToImport = [];
for (var i = 2; i < data.length; i++) {
dataToImport.push(data[i]);
}
return dataToImport;
}
I tried defining documentId
I was expecting documented to be renamed
documented fields got modified wrongly
function main() {
const { email, key, projectId } = getSecrets_();
const sheet = SpreadsheetApp.getActiveSheet();
const firestore = FirestoreApp.getFirestore(email, key, projectId);
exportToFirestore_(firestore, sheet.getName(), getProperties_(sheet), getRecords_(sheet));
}
function getSecrets_() {
return {
email: 'fireb..#..ccount.com',
key: '-----BEGI...rxEp...RIVATE KEY-----\n',
projectId: 'co...t',
};
}
function exportToFirestore_(firestore, collectionName, properties, records) {
records.map(record => {
const data = {};
properties.forEach((prop, i) => data[prop] = record[i]);
return data;
}).forEach(object => {
firestore.createDocument(collectionName, object);
});
}
function getProperties_(sheet) {
return sheet.getRange(2, 1, 1, sheet.getLastColumn()).getValues()[0];
}
function getRecords_(sheet) {
return sheet.getDataRange().getValues().slice(1);
}

Cannot read properties of undefined looping through XLSX file in Cypress test

I am trying to convert values from an XLSX file into a JSON file in a Cypress test.
I have copied code form this blog exactly, but when I hit the below line:
console.log(jsonData[index].data)
I get this error message:
Cannot read properties of undefined (reading '1')
Here is the code in my spec.js file:
describe('convert data to Json', () => {
it('read data from xcel', () => {
cy.parseXlsx('cypress/fixtures/excelData.xlsx').then((jsonData) => {
const rowLength = Cypress.$(jsonData[0].data).length
for (let index = 0; index < rowLength; index++) {
var jsonData = jsonData[index].data
console.log(jsonData[index].data)
cy.writeFile("cypress/fixtures/xlsxData.json", { username: jsonData[0][0], password: jsonData[0][1] })
}
})
})
})
And here is where my parseXlsx function is defined:
Plguins/index.js:
const xlsx = require('node-xlsx').default;
const fs = require('fs'); // for file
const path = require('path'); // for file path
module.exports = (on, config) => {
on('task', {
parseXlsx({ filePath }) {
return new Promise((resolve, reject) => {
try {
const jsonData = xlsx.parse(fs.readFileSync(filePath));
resolve(jsonData);
} catch (e) {
reject(e);
}
});
}
});
}
My cypress/fixtures/excelData.xlsx file looks like this:
USERNAME PASSWORD
Username1 Password1
Username2 Password2
Can someone please tell me how to resolve this error?
I managed to find a solution:
describe('convert data to Json', () => {
it('read data from xcel', () => {
cy.parseXlsx('cypress/fixtures/excelData.xlsx').then((jsonData) => {
const rowLength = Cypress.$(jsonData[0].data).length
for (let i = 0; i < rowLength; i++) {
cy.log('Username: ' + jsonData[0].data[i][0]);
cy.log('Password: ' + jsonData[0].data[i][1]);
cy.writeFile("cypress/fixtures/xlsxData.json", { usernameValue: jsonData[0].data[i][0], passwordValue: jsonData[0].data[i][1] })
}
})
})
})

Export of local file for dynamodb with node

I need to read an Excel spreadsheet, generate a unique field from it, create an id to save to the database,
I can export data from an xlsx file to dynamodb with this script, but I am unsuccessful when this file is too large; You can limit the reading of the file into parts without modifying or dividing it into parts.
var xlsx = require('xlsx');
var AWS = require("aws-sdk");
const controller = {};
var docClient = new AWS.DynamoDB.DocumentClient();
var table = "Table";
controller.export = (req, res) => {
var wb = xlsx.readFileSync("data/sample.xlsx", { cellDates: true })
var ws = wb.Sheets["FIT"]
var data = xlsx.utils.sheet_to_json(ws)
var NewData = data.map(function (record) {
record.NEW_ID = record.CD_EMPRESA + "_" + record.LOC_INI "_" + record.LOC_FIM;
return record;
});
for (let index = 0; index < NewData.length; index++) {
var params = {
TableName: table,
Item: {
"NEW_ID": NewData[index].NEW_ID,
"CD_EMPRESA" : NewData[index].CD_EMPRESA,
"LOC_INI" : NewData[index].LOC_INI,
"LOC_FIM" : NewData[index].LOC_FIM,
"ID_ATIVO" : NewData[index].ID_ATIVO,
}
};
docClient.put(params, function (err, data) {
if (err) {
console.log("Error", params, err);
} else {
console.log("Success", params);
}
});
}
res.status(200).json();
};
return controller;
};

Why does my code using insertMany() skips some of the records and insert same records multiple times?

I have 9577 unique records in a csv file.
This code inserts 9800 records and insert not all records, but duplicates of some of them. Any idea why it does not inserts the unique 9577 records and also duplicates of some of them? Below I also insert the remain part of the code so you get the whole picture
function bulkImportToMongo(arrayToImport, mongooseModel) {
const Model = require(`../../../models/${mongooseModel}`);
let batchCount = Math.ceil(arrayToImport.length / 100);
console.log(arrayToImport.length);
let ops = [];
for (let i = 0; i < batchCount; i++) {
// console.log(i);
let batch = arrayToImport.slice(i, i + 100);
console.log(batch.length);
ops.push(Model.insertMany(batch));
}
return ops;
return Promise.all(ops).then(results => {
// results is an array of results for each batch
console.log("results: ", results);
});
}
I parse the csv file
const Promise = require("bluebird");
const csv = require("fast-csv");
const path = require("path");
const fs = Promise.promisifyAll(require("fs"));
const promiseCSV = Promise.method((filePath, options) => {
return new Promise((resolve, reject) => {
var records = [];
csv
.fromPath(filePath, options)
.on("data", record => {
records.push(record);
})
.on("end", () => {
// console.log(records);
resolve(records);
});
});
});
And here is the script that connects it all together:
const path = require("path");
const promiseCSV = require("./helpers/ImportCSVFiles");
const {
connectToMongo,
bulkImportToMongo
} = require("./helpers/mongoOperations");
const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
delimiter: ";",
noheader: true,
headers: [
"facility",
"partNumber",
"partName",
"partDescription",
"netWeight",
"customsTariff"
]
};
connectToMongo("autoMDM");
promiseCSV(filePath, options).then(records => {
bulkImportToMongo(records, "parts.js");
});
//It looks like your issue is simply i++. Perhaps you meant i += 100?
for (let i = 0; i < batchCount; i+=100 /* NOT i++ */) {
//...
}
I solved it.
I hope this helps other... :-)
I had two errors, in the function promiseCSV (changed to parseCSV) and second I had bad logic in bulkImportToMongo.
Complete solution:
I parsed and imported 602.198 objects and here is how long time it took using node --max_old_space_size=8000 on a MacBook Pro with 8gb of ram.
console
➜ database git:(master) ✗ node --max_old_space_size=8000 partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜ database git:(master) ✗
parseCSV.js
const csv = require("fast-csv");
function promiseCSV(filePath, options) {
return new Promise((resolve, reject) => {
console.time("Time to parse file");
var records = [];
csv
.fromPath(filePath, options)
.on("data", record => {
records.push(record);
})
.on("end", () => {
console.timeEnd("Time to parse file");
resolve(records);
});
});
}
module.exports = promiseCSV;
mongodb.js
const mongoose = require("mongoose");
mongoose.Promise = global.Promise;
function connectToMongo(databaseName) {
mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
keepAlive: true,
reconnectTries: Number.MAX_VALUE,
useMongoClient: true
});
console.log("Connected to db!");
}
function disconnectFromMongo() {
mongoose.disconnect();
console.log("Disconnected from db!");
}
function bulkImportToMongo(arrayToImport, mongooseModel) {
const Model = require(`../../../models/${mongooseModel}`);
const batchSize = 100;
let batchCount = Math.ceil(arrayToImport.length / batchSize);
let recordsLeft = arrayToImport.length;
let ops = [];
let counter = 0;
for (let i = 0; i < batchCount; i++) {
let batch = arrayToImport.slice(counter, counter + batchSize);
counter += batchSize;
ops.push(Model.insertMany(batch));
}
return Promise.all(ops);
}
module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;
partImport.js
const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
connectToMongo,
disconnectFromMongo,
bulkImportToMongo
} = require("./helpers/mongodb");
const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
delimiter: ";",
noheader: true,
headers: [
"facility",
"partNumber",
"partName",
"partDescription",
"netWeight",
"customsTariff"
]
};
connectToMongo("autoMDM");
parseCSV(filePath, options)
.then(records => {
console.time("Time to import parsed objects to db");
return bulkImportToMongo(records, "parts.js");
})
/* .then(result =>
console.log("Total batches inserted: ", result, result.length)
) */
.then(() => {
disconnectFromMongo();
console.timeEnd("Time to import parsed objects to db");
})
.catch(error => console.log(error));

Mongoose findOne in stream callback not executed

When I execute a findOne query to a collection that is different than that of the surrounding stream, it doesn't really execute the callback (does 1 or 2). Here is my code:
schema.js:
'use strict';
var mongoose = require('mongoose'),
Schema = mongoose.Schema;
var AutocompleteSchema = new Schema({
nGram: String,
locations: [{ type: Schema.Types.ObjectId, ref: 'Location' }]
});
module.exports = mongoose.model('Autocomplete', AutocompleteSchema);
ingest.js:
var Autocomplete = require('./schema');
var nGramAPI = require('ngram');
var cache = [];
function storeNGram(input, location) {
if(cache.indexOf(input) === -1) {
cache.push(input);
Autocomplete
.findOne({ nGram: input })
.populate('locations')
.exec(function (err, nGram) {
console.log(nGram);
if(!nGram) {
var newAutocomplete = {
nGram: input,
locations: [location._id]
};
Autocomplete.create(newAutocomplete, function(err, created) {
cache.splice(cache.indexOf(input), 1);
});
}
else {
nGram.locations.push(location._id);
sortLocations(nGram);
location.save(function(err, saved){
cache.splice(cache.indexOf(input), 1);
});
}
});
}
else {
setTimeout(function() {
storeNGram(input, location);
}, 100);
}
}
exports.ingest = function() {
console.log("Building nGrams");
var stream = Location.find().stream();
stream.on('data', function (location) {
var length = location.dispName.length > 20 ? 20 : location.dispName.length;
for(var i = 1; i <= length; i++) {
_.each(nGramAPI(i)(location.dispName), function(nGram) {
storeNGram(nGram, location);
});
}
});
}
When I execute the Autocomplete.findOne on its own outside of the stream it queries properly, but when inside it fails. Any suggestions?

Categories

Resources