I am trying to write multiple csv files from a set of data that I have loaded using the sheet js library. My first attempt was like:
for (let i = 0; i < dataSetDivided.length; i++) {
let exportSet = dataSetDivided[i]
console.log(exportSet)
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + (i + 1) + ').csv'
XLSX.writeFile(wb, exportFileName)
}
With this code only some files are written I guess because the for loop doesn't wait for the file to be written before continuing.
So I am trying to write each file within a promise like below:
Promise.all(
dataSetDivided.map((exportSet, i) => {
return new Promise((resolve, reject) => {
console.log(exportSet)
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + (i + 1) + ').csv'
XLSX.writeFile(wb, exportFileName, (err) => {
if (err) {
console.log(err)
reject(err)
} else {
console.log('Created ' + exportFileName)
resolve()
}
})
})
})
)
.then(() => {
console.log('Created multiple files successfully')
})
.catch((err) => {
console.log('ERROR: ' + err)
})
But... this isn't working, again only some files are written and nothing is logged to the console. Can anyone give me any ideas how to make this work or a better way to achieve the goal of writing multiple files like this? There is a XLSX.writeFileAsync method but I can't find any examples of how it works and I'm not sure if that is what I need.
With thanks,
James
UPDATE:
I am now using setTimeout to delay the next writeFile call... this is working for my test cases but I am aware it isn't a good solution, would be much better to have a callback when the file is successfully written:
writeFileToDisk(dataSetDivided, i) {
if (dataSetDivided.length > 0) {
let exportSet = dataSetDivided[0]
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + i + ').csv'
XLSX.writeFile(wb, exportFileName)
dataSetDivided.splice(0, 1)
i += 1
setTimeout(() => {this.writeFileToDisk(dataSetDivided, i)}, 2500)
}
}
this.writeFileToDisk(dataSetDivided, 1)
Any suggestions how to get this working without simulating the file write time would be much appreciated.
I just tried this (first time) XLSX code and can confirm that it writes the expected workbooks and runs synchronously...
'use strict'
const XLSX = require('xlsx');
let finalHeaders = ['colA', 'colB', 'colC'];
let data = [
[ { colA: 1, colB: 2, colC: 3 }, { colA: 4, colB: 5, colC: 6 }, { colA: 7, colB: 8, colC: 9 } ],
[ { colA:11, colB:12, colC:13 }, { colA:14, colB:15, colC:16 }, { colA:17, colB:18, colC:19 } ],
[ { colA:21, colB:22, colC:23 }, { colA:24, colB:25, colC:26 }, { colA:27, colB:28, colC:29 } ]
];
data.forEach((array, i) => {
let ws = XLSX.utils.json_to_sheet(array, {header: finalHeaders});
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let exportFileName = `workbook_${i}.xls`;
XLSX.writeFile(wb, exportFileName)
});
Running this yields workbook_0.xls, workbook_1.xls, and workbook_2.xls, each with a single sheet entitled "SheetJS". They all look good in excel, for example, workbook_0 has...
I think you should do the writing asynchronously, and would suggest the following adaptation of the above ...
function writeFileQ(workbook, filename) {
return new Promise((resolve, reject) => {
// the interface wasn't clearly documented, but this reasonable guess worked...
XLSX.writeFileAsync(filename, workbook, (error, result) => {
(error)? reject(error) : resolve(result);
})
})
}
let promises = data.map((array, i) => {
let ws = XLSX.utils.json_to_sheet(array, {header: finalHeaders});
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let exportFileName = `workbook_${i}.xls`;
return writeFileQ(wb, exportFileName)
});
Promise.all(promises).then(result => console.log(result)).catch(error => console.log(error));
Running this async code, I found that it produced the same expected results and did so asynchronously.
So your original loop looks right, and should work synchronously. The fact that you aren't getting expected results must be caused by something apart from timing (or maybe some timing issue induced by react?).
In any event, if you do want to use the async approach, which I highly recommend, I've shown how to do that (but I worry that might not fully solve the problem unless you sort out what's happening with your first attempt).
XLSX.writeFileAsync does have a callback with the following syntax.
xlsx.writeFileAsync(workbookName, workbook, (err) => {
// It's a callback
});
But this will handle only writing one file asynchronously.
Your case is typical, if you want to do a series things in which each item is asynchronous, then you should not just use iterative methods like loops/map/forEach.
One best library I would suggest for this is 'async'.
'async.parallel' function which takes a array of functions which execute asynchronously and calls callback after all of them finished.
https://caolan.github.io/async/docs.html#parallel
If the concern is to use the library asynchronously for not blocking the server, you should know that this library implementation seems to be synchronous and you should check out the library's server demos README since it has several proposals to workaround this problem: https://github.com/SheetJS/sheetjs/tree/master/demos/server
Related
My problem is simple, but incredibly frustrating as I'm now on my second week of trying to figure this out and on the verge of giving up. I would like to retrieve my 'notesObject' variable outside my getAllNotes() function when after the transaction.oncomplete() listener executes.
(function() {
// check for IndexedDB support
if (!window.indexedDB) {
console.log(`Your browser doesn't support IndexedDB`);
return;
}
// open the CRM database with the version 1
let request = indexedDB.open('Notes', 1);
// create the Contacts object store and indexes
request.onupgradeneeded = (event) => {
let db = event.target.result;
// create the Notes object store ('table')
let store = db.createObjectStore('Notes', {
autoIncrement: true
});
// create an index on the sections property.
let index = store.createIndex('Sections', 'sections', {
unique: true
});
}
function insertData() {
let myDB = indexedDB.open('Notes');
myDB.onsuccess = (event) => {
// myDB.transaction('Notes', 'readwrite')
event.target.result.transaction('Notes', 'readwrite')
.objectStore('Notes')
.put({
sections: "New Note",
pages: "New page",
lastSelectedPage: ""
});
console.log("insert successful");
}
myDB.onerror = (event) => {
console.log('Error in NotesDB - insertData(): ' + event.target.errorCode);
}
myDB.oncomplete = (event) => {
myDB.close();
console.log('closed');
}
}
insertData()
function getAllNotes() {
let myDB = indexedDB.open('Notes');
let notesObject = [];
myDB.onsuccess = (event) => {
let dbObjectStore = event.target.result
.transaction("Notes", "readwrite").objectStore("Notes");
dbObjectStore.openCursor().onsuccess = (e) => {
let cursor = e.target.result;
if (cursor) {
let primaryKey = cursor.key;
let section = cursor.value.sections;
notesObject.push({
primaryKey,
section
})
cursor.continue();
}
}
dbObjectStore.transaction.onerror = (event) => {
console.log('Error in NotesDB - getAllData() tranaction: ' + event.target.errorCode);
}
dbObjectStore.transaction.oncomplete = (event) => {
return notesObject;
console.log(notesObject)
}
}
}
let notes = getAllNotes()
console.log("Getting Notes sucessful: " + notes)
})()
I've tried setting global variables, but nothing seems to work. I am a complete noob and honestly, I'm completely lost on how to retrieve the notesObject variable outside my getAllNotes() function. The results I get are 'undefined'. Any help would be greatly appreciated.
This is effectively a duplicate of Indexeddb: return value after openrequest.onsuccess
The operations getAllNotes() kicks off are asynchronous (they will run in the background and take time to complete), whereas your final console.log() call is run synchronously, immediately after getAllNotes(). The operations haven't completed at the time that is run, so there's nothing to log.
If you search SO for "indexeddb asynchronous" you'll find plenty of questions and answers about this topic.
I can't seem to figure out where I am going wrong, I have tried placing the code in an async function and awaiting for the loop to finish but it somehow keeps committing the writebatch before it's done.
Please could someone shed some light on my code, I don't know where I might not be understanding the process.
What I managed to research was that async function run in the background and allows other code to run, so I need to wait for the async operation first if I need a job to be done after.
Please let me know what I'm missing, appreciate the help, here's my code:
getReady();
async function stage1() {
const batch = db.batch();
await response.data.matches.forEach(async match => {
var batchRef = db.collection('matches').doc(`${match.id}`);
var utcDate = `${match.utcDate}`; // ISO-8601 formatted date returned from server
var localDate = moment.utc(utcDate).toDate();
var unixTime = ((localDate.getTime()) / 1000);
const now = new Date();
const secondsSinceEpoch = Math.round(now.getTime() / 1000)
var howLong = timeDifference(unixTime, secondsSinceEpoch);
var checkMatches = db.collection('matches');
matchesSnapshot = await checkMatches.where('matchId', '==',
match.id).get();
if (matchesSnapshot.empty) {
batch.set(batchRef, {
competitionName: `${match.competition.name}`,
competitionId: `${match.competition.id}`,
matchStatus: `${match.status}`,
matchDate: `${match.utcDate}`,
matchDay: `${match.matchday}`,
unixDate: unixTime,
matchId: `${match.id}`,
lastUpdated: `${match.lastUpdated}`,
homeTeamScore: match.score.fullTime.homeTeam,
awayTeamScore: match.score.fullTime.awayTeam,
homeWinOdds: `${match.odds.homeWin}`,
drawOdds: `${match.odds.draw}`,
awayWinOdds: `${match.odds.awayWin}`,
matchResults: `${match.score.winner}`,
matchduration: `${match.score.duration}`,
fullTimeHomeTeam: `${match.score.fullTime.homeTeam}`,
fullTimeAwayTeam: `${match.score.fullTime.awayTeam}`,
halfTimeHomeTeam: `${match.score.halfTime.homeTeam}`,
halfTimeAwayTeam: `${match.score.halfTime.awayTeam}`,
extraTimeHomeTeam: `${match.score.extraTime.homeTeam}`,
extraTimeAwayTeam: `${match.score.extraTime.awayTeam}`,
penaltiesHomeTeam: `${match.score.penalties.homeTeam}`,
penaltiesAwayTeam: `${match.score.penalties.awayTeam}`,
homeTeamId: `${match.homeTeam.id}`,
awayTeamId: `${match.awayTeam.id}`,
homeTeamName: `${match.homeTeam.name}`,
awayTeamName: `${match.awayTeam.name}`,
category: 'Football'
});
} else if (!matchesSnapshot.empty) {
var checkingMatches = db.collection('matches').doc(`${match}`);
var doc = await checkingMatches.get();
var oldTime = doc.data().lastUpdated;
var utcDate2 = `${match.lastUpdated}`; // ISO-8601 formatted date returned from server
var utcDate3 = oldTime; //
var localDate2 = moment.utc(utcDate2).toDate();
var localDate3 = moment.utc(utcDate3).toDate();
var unixTime2 = ((localDate2.getTime()) / 1000);
var unixTime3 = ((localDate3.getTime()) / 1000);
if (unixTime2 > unixTime3) {
const reference = db.collection('matches').doc(`${match.id}`);
batch.update(reference, {
matchStatus: `${match.status}`,
matchDate: `${match.utcDate}`,
matchDay: `${match.matchday}`,
lastUpdated: `${match.lastUpdated}`,
homeTeamScore: match.score.fullTime.homeTeam,
awayTeamScore: match.score.fullTime.awayTeam,
homeWinOdds: `${match.odds.homeWin}`,
drawOdds: `${match.odds.draw}`,
awayWinOdds: `${match.odds.awayWin}`,
matchResults: `${match.score.winner}`,
matchduration: `${match.score.duration}`,
fullTimeHomeTeam: `${match.score.fullTime.homeTeam}`,
fullTimeAwayTeam: `${match.score.fullTime.awayTeam}`,
halfTimeHomeTeam: `${match.score.halfTime.homeTeam}`,
halfTimeAwayTeam: `${match.score.halfTime.awayTeam}`,
extraTimeHomeTeam: `${match.score.extraTime.homeTeam}`,
extraTimeAwayTeam: `${match.score.extraTime.awayTeam}`,
penaltiesHomeTeam: `${match.score.penalties.homeTeam}`,
penaltiesAwayTeam: `${match.score.penalties.awayTeam}`,
});
}
}
});
return batch.commit().then(() => {
console.log("im done");
}).catch((err) => {
console.log('Mac! there was an error while doing the job: ', err);
});
}
async function getReady() {
await stage1();
}
What jumps out to my eye is your .forEach(async match => { on the 4th line. .forEach() is NOT asynchronous - it will NOT wait, it will continue through - which is likely why the WriteBatch is being closed before asynchronous operations try to write to it.
At a minimum you will want to use something like Promise.All(...whatever.map()) (and discard the result, if you wish) to make the entire thing asynchronous.
To be honest, I haven't even looked at anything after that - there may well be other issues.
I have a test folder with files
file
file (1)
file (2)
If the file exists I add a suffix to a new filename, to prevent overwriting the file. For example
if file exists new name should be file (1)
if file (1) exists new name should be file (2)
if file (2) exists new name should be file (3)
and so on.
The following function works fine, except the value is not returned so I can assign it later.
async function dest_exists_new_name(file) {
const access = fs.promises.access
try {
await access(file, fs.F_OK)
// file exists - generate new name
const info = path.parse(file)
const dir = info.dir
let name = info.name
const ext = info.ext
// generate suffix
let suffix = ' (1)'
const suffix_regex = / \([0-9]+\)$/
if (suffix_regex.test(name)) { // if suffix exists -> increment it
const num = name.split(' ').reverse()[0].replace(/[()]/g,'')
const next_num = parseInt(num) + 1
suffix = ' (' + next_num + ')'
name = name.replace(suffix_regex, '') // remove old suffix
}
// generate suffix end
const new_name = path.join(dir, name + suffix + ext)
// recurse until dest not exists
await dest_exists_new_name(new_name)
} catch {
// file not exist - return its name
// console.log works OK
console.log('new name ->', file)
// return doesn't work - returns undefined if the previous name exists, but works ok if the name doesn't exists
return file
}
}
await dest_exists_new_name('/path/file')
new name -> /path/file (3) // console.log - works OK
undefined // returns undefined, if file previously exists
The question is how can I correctly return the new file name value?
If there are any culprits in such a solution like
accidental file rewriting
infinite recursion
other issues
I will be grateful for the hints on how to improve the function.
Your function will return file, but being an async function, you need to await its return and you cannot do so outside of an async scope. Thus, if you just console.log() its "istantaneous" value, it will indeed return a pending promise, as the return value has not been resolved yet. You may retrieve the correct return value by including your function in an async scope, like this:
let a = async () => {
console.log(await dest_exists_new_name('/path/file'))
}
a();
This will output:
new name -> /path/file
/path/file //here's your file
Now, by adding return await dest_exists_new_name(new_name) you should be able to achive what you want and both console.log() and return the new, non-existent, file name. Here's a complete, reproducible example:
const fs = require('fs');
const path = require('path');
async function dest_exists_new_name(file) {
const access = fs.promises.access
try {
await access(file, fs.F_OK)
const info = path.parse(file)
const dir = info.dir
let name = info.name
const ext = info.ext
let suffix = ' (1)'
const suffix_regex = / \([0-9]+\)$/
if (suffix_regex.test(name)) {
const num = name.split(' ').reverse()[0].replace(/[()]/g, '')
const next_num = parseInt(num) + 1
suffix = ' (' + next_num + ')'
name = name.replace(suffix_regex, '')
}
const new_name = path.join(dir, name + suffix + ext)
return await dest_exists_new_name(new_name)
} catch {
console.log('new name ->', file)
return file
}
}
//Here, make sure that the path to "file" is correct
let a = async() => console.log(await dest_exists_new_name(path.join(__dirname, './file')));
a();
Output, having up to file (2) in the same folder:
new name -> /path/to/file (3)
/path/to/file (3)
Check you try catch and how you are receiving your variable.
async function dest_exists_new_name(file) {
try {
const res = await dest_exists_new_name(file1); // recursion result
} catch (err) {
return Promise.resolve("file not found");
}
}
// usage
let res = await dest_exists_new_name(fileArg);
First of all, you should use await, since it's async function:
// recurse until dest not exists
await dest_exists_new_name(new_name)
About recursion - IMHO, it's always better to use cycle (if it doesn't make code too complicated).
Mixing async-await & promises is not very good. Ideally you should use one style.
I prefer to use destructuring, lambda functions, and other modern features.
So, my variant for async-await, without recursion:
const fs = require('fs');
const path = require('path');
// better to create outside of func, since it doesn't depend on context
const suffix_regex = / \([0-9]+\)$/
const defaultSuffix = ' (1)'
const access = async (...params) => new Promise((resolve) => fs.access(...params, (err) => (err) ? resolve(false) : resolve(true)))
const generate_new_suffix = ({ dir, name, ext }) => {
if (suffix_regex.test(name)) { // if suffix exists -> increment it
const num = name.split(' ').reverse()[0].replace(/[()]/g, '')
const suffix = `(${+num + 1})`;
const cleanName = name.replace(suffix_regex, '') // remove old suffix
return path.join(dir, cleanName + suffix + ext)
}
return path.join(dir, name + defaultSuffix + ext)
}
const dest_exists_new_name = async (file) => {
let newNameGenerated = false
let newFileName = file
while (await access(newFileName, fs.F_OK)) {
console.log(newFileName);
const info = path.parse(newFileName)
newFileName = generate_new_suffix(info)
};
console.log('new name ->', newFileName)
return newFileName
}
(async () => {
console.log(await dest_exists_new_name(path.join(__dirname, 'file')))
})();
I've recently been studying some of my previous code and not sure where the memory leak is exactly coming form (if any). I seem to be running out of mem quite a lot and I'm sure there is an easier way around this? I initially thought creating the object every second (checkProducts gets called every second) was the issue however I am referencing the product in cache() so.
Thank you.
const checkProducts = async () => {
console.log("Checking for new products");
const proxyF = rProxy()
console.log("PROXY " + proxyF.auth.username)
try {
const response = await axios.get(
"https://www.sizeofficial.fr/campaign/New+In/?facet:new=latest&sort=latest", { proxy });
const $ = cheerio.load(response.data);
$("li").each((i, elm) => {
const title =
$(elm)
.find("a")
.text() + "";
const price = $(elm)
.find(".pri")
.text();
const link = $(elm)
.find(".itemImage")
.attr("href");
const quickBuy = $(elm)
.find(".itemQuickView.quickView.btn.btn-default")
.attr("data-quickview-path");
const image = $(elm)
.find("source")
.attr("data-srcset");
if (title !== "" && price !== "") {
const product = {
title: title.replace(/(\r\n|\n|\r)/gm, "").replace(/\t/g, ""),
price: price,
link: "https://www.sizeofficial.fr" + link,
quickBuy: "https://www.sizeofficial.fr/" + quickBuy,
image: image
};
cache(product);
}
});
} catch (err) {
console.log(err);
}
restocks.map(restock => checkRestock(restock));
};
What's the best way to overwrite a line in a large (2MB+) text file using node.js?
My current method involves
copying the entire file into a buffer.
Spliting the buffer into an array by the new line character (\n).
Overwriting the line by using the buffer index.
Then overwriting the file with the buffer after join with \n.
First, you need to search where the line starts and where it ends. Next you need to use a function for replacing the line. I have the solution for the first part using one of my libraries: Node-BufferedReader.
var lineToReplace = "your_line_to_replace";
var startLineOffset = 0;
var endLineOffset = 0;
new BufferedReader ("your_file", { encoding: "utf8" })
.on ("error", function (error){
console.log (error);
})
.on ("line", function (line, byteOffset){
startLineOffset = endLineOffset;
endLineOffset = byteOffset - 1; //byteOffset is the offset of the NEXT byte. -1 if it's the end of the file, if that's the case, endLineOffset = <the file size>
if (line === lineToReplace ){
console.log ("start: " + startLineOffset + ", end: " + endLineOffset +
", length: " + (endLineOffset - startLineOffset));
this.interrupt (); //interrupts the reading and finishes
}
})
.read ();
Maybe you can try the package replace-in-file
suppose we have a txt file as below
// file.txt
"line1"
"line2"
"line5"
"line6"
"line1"
"line2"
"line5"
"line6"
and we want to replace:
line1 -> line3
line2 -> line4
Then, we can do it like this:
const replace = require('replace-in-file');
const options = {
files: "./file.txt",
from: [/line1/g, /line2/g],
to: ["line3", "line4"]
};
replace(options)
.then(result => {
console.log("Replacement results: ",result);
})
.catch(error => {
console.log(error);
});
the result as below:
// file.txt
"line3"
"line4"
"line5"
"line6"
"line3"
"line4"
"line5"
"line6"
More details please refer to its docs: https://www.npmjs.com/package/replace-in-file
This isn't file size focoused solution, but Overwrites a line in a file using node.js. It may help other people that search engines redirect to this post, like me.
import * as fs from 'fs'
const filename = process.argv[2]
const lineIndexToUpdate = parseInt(process.argv[3]) - 1
const textUpdate = process.argv[4]
function filterLine(indexToUpdate, dataString) {
return dataString
.split('\n')
.map((val, index) => {
if (index === indexToUpdate)
return textUpdate
else
return val
})
.join('\n')
}
fs.readFile(filename, 'utf8', (err, data) => {
if (err) throw err
fs.writeFile(filename, filterLine(lineIndexToUpdate, data), (err, data) => {
if (err) throw err
console.log("Line removed")
})
})
Script use exemple:
node update_line.js file 10 "te voglio benne"