Promise Resolving before Google Cloud Bucket Upload - javascript

I am writing some code that loops over a CSV and creates a JSON file based on the CSV. Included in the JSON is an array named photos, which is to contain the returned urls for the images that are being uploaded to Google Cloud Storage within the function. However, having the promise wait for the uploads to finish has me stumped, since everything is running asynchronously, and finishes off the promise and the JSON compilation prior to finishing the bucket upload and returning the url. How can I make the promise resolve after the urls have been retrieved and added to currentJSON.photos?
const csv=require('csvtojson')
const fs = require('fs');
const {Storage} = require('#google-cloud/storage');
var serviceAccount = require("./my-firebase-storage-spot.json");
const testFolder = './Images/';
var csvFilePath = './Inventory.csv';
var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];
var allData = [];
csv()
.fromFile(csvFilePath)
.subscribe((json)=>{
return new Promise((resolve,reject)=>{
for (var i in dirArr ) {
if (json['Name'] == dirArr[i]) {
var currentJSON = {
"photos" : [],
};
fs.readdir(testFolder+json['Name'], (err, files) => {
files.forEach(file => {
if (file.match(/.(jpg|jpeg|png|gif)$/i)){
var imgName = testFolder + json['Name'] + '/' + file;
bucket.upload(imgName, function (err, file) {
if (err) throw new Error(err);
//returned uploaded img address is found at file.metadata.mediaLink
currentJSON.photos.push(file.metadata.mediaLink);
});
}else {
//do nothing
}
});
});
allData.push(currentJSON);
}
}
resolve();
})
},onError,onComplete);
function onError() {
// console.log(err)
}
function onComplete() {
console.log('finito');
}
I've tried moving the resolve() around, and also tried placing the uploader section into the onComplete() function (which created new promise-based issues).

Indeed, your code is not awaiting the asynchronous invocation of the readdir callback function, nor of the bucket.upload callback function.
Asynchronous coding becomes easier when you use the promise-version of these functions.
bucket.upload will return a promise when omitting the callback function, so that is easy.
For readdir to return a promise, you need to use the fs Promise API: then you can use
the promise-based readdir method and use
promises throughout your code.
So use fs = require('fs').promises instead of fs = require('fs')
With that preparation, your code can be transformed into this:
const testFolder = './Images/';
var csvFilePath = './Inventory.csv';
var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];
(async function () {
let arr = await csv().fromFile(csvFilePath);
arr = arr.filter(obj => dirArr.includes(obj.Name));
let allData = await Promise.all(arr.map(async obj => {
let files = await fs.readdir(testFolder + obj.Name);
files = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i));
let photos = await Promise.all(
files.map(async file => {
var imgName = testFolder + obj.Name + '/' + file;
let result = await bucket.upload(imgName);
return result.metadata.mediaLink;
})
);
return {photos};
}));
console.log('finito', allData);
})().catch(err => { // <-- The above async function runs immediately and returns a promise
console.log(err);
});
Some remarks:
There is a shortcoming in your regular expression. You intended to match a literal dot, but you did not escape it (fixed in above code).
allData will contain an array of { photos: [......] } objects, and I wonder why you would not want all photo elements to be part of one single array. However, I kept your logic, so the above will still produce them in these chunks. Possibly, you intended to have other properties (next to photos) as well, which would make it actually useful to have these separate objects.

The problem is the your code is not waiting in your forEach. I would highly recommend to look for stream and try to do things in parallel as much as possible. There is one library which is very powerful and does that job for you. The library is etl.
You can read rows from csv in parallel and process them in parallel rather than one by one.
I have tried to explain the lines in the code below. Hopefully it makes sense.
const etl = require("etl");
const fs = require("fs");
const csvFilePath = `${__dirname }/Inventory.csv`;
const testFolder = "./Images/";
const dirArr = [
"./Images/Subdirectory-A",
"./Images/Subdirectory-B",
"./Images/Subdirectory-C"
];
fs.createReadStream(csvFilePath)
.pipe(etl.csv()) // parse the csv file
.pipe(etl.collect(10)) // this could be any value depending on how many you want to do in parallel.
.pipe(etl.map(async items => {
return Promise.all(items.map(async item => { // Iterate through 10 items
const finalResult = await Promise.all(dirArr.filter(i => i === item.Name).map(async () => { // filter the matching one and iterate
const files = await fs.promises.readdir(testFolder + item.Name); // read all files
const filteredFiles = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i)); // filter out only images
const result = await Promise.all(filteredFiles).map(async file => {
const imgName = `${testFolder}${item.Name}/${file}`;
const bucketUploadResult = await bucket.upload(imgName); // upload image
return bucketUploadResult.metadata.mediaLink;
});
return result; // This contains all the media link for matching files
}));
// eslint-disable-next-line no-console
console.log(finalResult); // Return arrays of media links for files
return finalResult;
}));
}))
.promise()
.then(() => console.log("finsihed"))
.catch(err => console.error(err));

Here's a way to do it where we extract some of the functionality into some separate helper methods, and trim down some of the code. I had to infer some of your requirements, but this seems to match up pretty closely with how I understood the intent of your original code:
const csv=require('csvtojson')
const fs = require('fs');
const {Storage} = require('#google-cloud/storage');
var serviceAccount = require("./my-firebase-storage-spot.json");
const testFolder = './Images/';
var csvFilePath = './Inventory.csv';
var dirArr = ['./Images/Subdirectory-A','./Images/Subdirectory-B','./Images/Subdirectory-C'];
var allData = [];
// Using nodejs 'path' module ensures more reliable construction of file paths than string manipulation:
const path = require('path');
// Helper function to convert bucket.upload into a Promise
// From other responses, it looks like if you just omit the callback then it will be a Promise
const bucketUpload_p = fileName => new Promise((resolve, reject) => {
bucket.upload(fileName, function (err, file) {
if (err) reject(err);
resolve(file);
});
});
// Helper function to convert readdir into a Promise
// Again, there are other APIs out there to do this, but this is a rl simple solution too:
const readdir_p = dirName => new Promise((resolve, reject) => {
fs.readdir(dirName, function (err, files) {
if (err) reject(err);
resolve(files);
});
});
// Here we're expecting the string that we found in the "Name" property of our JSON from "subscribe".
// It should match one of the strings in `dirArr`, but this function's job ISN'T to check for that,
// we just trust that the code already found the right one.
const getImageFilesFromJson_p = jsonName => new Promise((resolve, reject) => {
const filePath = path.join(testFolder, jsonName);
try {
const files = await readdir_p(filePath);
resolve(files.filter(fileName => fileName.match(/\.(jpg|jpeg|png|gif)$/i)));
} catch (err) {
reject(err);
}
});
csv()
.fromFile(csvFilePath)
.subscribe(async json => {
// Here we appear to be validating that the "Name" prop from the received JSON matches one of the paths that
// we're expecting...? If that's the case, this is a slightly more semantic way to do it.
const nameFromJson = dirArr.find(dirName => json['Name'] === dirName);
// If we don't find that it matches one of our expecteds, we'll reject the promise.
if (!nameFromJson) {
// We can do whatever we want though in this case, I think it's maybe not necessarily an error:
// return Promise.resolve([]);
return Promise.reject('Did not receive a matching value in the Name property from \'.subscribe\'');
}
// We can use `await` here since `getImageFilesFromJson_p` returns a Promise
const imageFiles = await getImageFilesFromJson_p(nameFromJson);
// We're getting just the filenames; map them to build the full path
const fullPathArray = imageFiles.map(fileName => path.join(testFolder, nameFromJson, fileName));
// Here we Promise.all, using `.map` to convert the array of strings into an array of Promises;
// if they all resolve, we'll get the array of file objects returned from each invocation of `bucket.upload`
return Promise.all(fullPathArray.map(filePath => bucketUpload_p(filePath)))
.then(fileResults => {
// So, now we've finished our two asynchronous functions; now that that's done let's do all our data
// manipulation and resolve this promise
// Here we just extract the metadata property we want
const fileResultsMediaLinks = fileResults.map(file => file.metadata.mediaLink);
// Before we return anything, we'll add it to the global array in the format from the original code
allData.push({ photos: fileResultsMediaLinks });
// Returning this array, which is the `mediaLink` value from the metadata of each of the uploaded files.
return fileResultsMediaLinks;
})
}, onError, onComplete);

You are looking for this library ELT.
You can read rows from CSV in parallel and process them in parallel rather than one by one.
I have tried to explain the lines in the code below. Hopefully, it makes sense.
const etl = require("etl");
const fs = require("fs");
const csvFilePath = `${__dirname }/Inventory.csv`;
const testFolder = "./Images/";
const dirArr = [
"./Images/Subdirectory-A",
"./Images/Subdirectory-B",
"./Images/Subdirectory-C"
];
fs.createReadStream(csvFilePath)
.pipe(etl.csv()) // parse the csv file
.pipe(etl.collect(10)) // this could be any value depending on how many you want to do in parallel.
.pipe(etl.map(async items => {
return Promise.all(items.map(async item => { // Iterate through 10 items
const finalResult = await Promise.all(dirArr.filter(i => i === item.Name).map(async () => { // filter the matching one and iterate
const files = await fs.promises.readdir(testFolder + item.Name); // read all files
const filteredFiles = files.filter(file => file.match(/\.(jpg|jpeg|png|gif)$/i)); // filter out only images
const result = await Promise.all(filteredFiles).map(async file => {
const imgName = `${testFolder}${item.Name}/${file}`;
const bucketUploadResult = await bucket.upload(imgName); // upload image
return bucketUploadResult.metadata.mediaLink;
});
return result; // This contains all the media link for matching files
}));
// eslint-disable-next-line no-console
console.log(finalResult); // Return arrays of media links for files
return finalResult;
}));
}))
.promise()
.then(() => console.log("finsihed"))
.catch(err => console.error(err));

Related

how to access array in async context

i have this function :
const list = [];
(async () => {
await fs.readdir(JSON_DIR, async (err, files) => {
await files.forEach(async filename => {
const readStream = fs.createReadStream(path.join("output/scheduled", filename));
const parseStream = json.createParseStream();
await parseStream.on('data', async (hostlist: HostInfo[]) => {
hostlist.forEach(async host => {
list.push(host);
});
});
readStream.pipe(parseStream);
})
});
//here list.length = 0
console.log(list.length);
})();
the function read from a directory of large json files, and reads them, for each file,it create a stream that starts reading the json, and the stream can be working at the same time.
at the end of the function i need to save the variable host in the list, but when i check the lis at the end, is empty.
how can i save the content of the host to a global variable, so it can be accessible in the end.
i tought as solution to check when every file is finished reading using and end event.
though to access the list at the end, i need another event to start when all other events are finished.
and looks complicated.
i have been using the big-json library,
https://www.npmjs.com/package/big-json
You could use a counter to determine when the streams have finished processing.
You can use readdirSync for executing the operation synchronously.
const list: HostInfo[] = [];
(() => {
const files = fs.readdirSync(JSON_DIR);
let streamFinished = 0;
let streamCount = files.length;
files.forEach((filename) => {
const readStream = fs.createReadStream(
path.join('output/scheduled', filename)
);
const parseStream = json.createParseStream();
parseStream.on('error', (err) => {
// Handle errors
})
parseStream.on('data', (hostlist: HostInfo[]) => {
list.push(...hostlist);
});
parseStream.on('end', () => {
streamFinished++;
if (streamFinished === streamCount) {
// End of all streams...
}
console.log(list.length);
})
readStream.pipe(parseStream);
});
})();

Read csv asynchronous - NodeJS

I'm looping an array and reading from a CSV file so I can reduce its data, I've implemented something like this. I want to have a Map variable populated from a csv file in memory and get data from it. But I want to populate it once (or when I explicitly do so, but that's other thing).
when running I would like to print like this:
before
after
But I'm getting this:
before
before
before
after
after
after
Apparently when reading from the CSV is not awaiting. Is there any way I can read from CSV asynchronous?
This is my code:
let myMap = new Map();
const parseMap = async(stream) => {
const map = new Map();
return new Promise((resolve, reject) => {
stream
.on('data', elems => {
const key = elems[0];
const value = elems.slice(-1)[0];
map.set(key, value);
})
.on('error', () => reject)
.on('end', () => {
console.log('promise');
resolve(map);
});
});
}
const getMap = async(path_string) => {
const stream = fs.createReadStream(path_string).pipe(parse({delimiter: DELIMITER}));
return await parseMap(stream);
}
const translate = async (key) => {
if (!myMap.size) {
console.log('before');
myMap = await getMap('my-csv.csv');
console.log('after');
}
return myMap.get(key);
};
const guidRankings = await ['a', 'b', 'c'].reduce(async (accumulator, value, idx) => {
const data = await translate(value);
if (data) {
(await accumulator).push({
data,
order: idx
});
}
return accumulator;
}, Promise.resolve([]));
The reduce method can't handle an ascynrous callback.
You need to pass it a function that returns the actual value you care about, not one that returns a promise.
map your array values through translate to get an array of promises
Use await Promise.all to wait for them all to settle and get an array of the translated values
reduce that array (without doing anything asynchronous inside the reducer).
Assuming that the 3 calls can be made at same time it just using map to store the promises, using Promise All to wait for them to complete, and then a map to get your array.
const promises = ['a', 'b', 'c'].map(value) => translate(value));
const results = await Promise.all(promises);
const guidRankings = results.map((data, order) => ({ data, order }));
If for some reason they have to be made one at a time you want to use for await of

Trying to return 2D-array with fast-csv in Nodejs but returning none

I am attempting to parse a csv-file using fast-csv library and convert each values number or string to create 2d array.
But I can't return array in ReadStream.
Can you give me advice on my code?
const fs = require("fs");
const csv = require("fast-csv");
async csvParse(){
let array = [];
options = {
map(value){
// convert value's column type to number or string
}
}
fs.createReadStream(csvfile)
.pipe(csv.parse(options))
.on("error", error => console.log(error))
.on("data", data => array.push(data))
.on("end", function(){
console.log(array);
return array;
})
}
input
name,date,score,id
John,2020-01-01,1000,10
expected-output
[["name", "date", "score", "id"], ["John", "2020-01-01", 1000, 10]]
actual-output
// no output
I expect the code to return "expected-output" that converted from "input", but it returns none.
I understand this is due to me attempting to return a value from the anonymous function on(), however I do not know the proper approach.
You can wrap processing of the file-stream and the csv-conversion in a promise and resolve the promise once the end-event is emitted:
const fs = require("fs");
const csv = require("fast-csv");
function csvParse() {
const options = {
// your options here
}
let array = [];
return new Promise((resolve, reject) => {
fs.createReadStream(csvfile)
.pipe(csv.parse(options))
.on("error", error => reject(error))
.on("data", data => array.push(data))
.on("end", () => {
console.log(array);
resolve(array);
});
});
}
Then call it like this:
(async() => {
const result = await csvParse();
})();

Knex promises inside for loop

I'm new to Node/asynchronous coding and I realize this is a basic question that speaks to some fundamentals I'm missing, but for the life of me I just can't understand how this is supposed to work. I'm seeding a database using knex, by reading in data from a CSV and iterating through the rows in a for loop. Within that loop, I need to query another table in the database and return an associated value as part of the new row to be created.
I understand that knex returns a pending Promise, so it doesn't have access to the returned value yet. But I can't seem to get the structure right with async/await or Promise.all; I've tried it a bunch of ways based on other answers I've seen, but none seem to actually explain what's happening well enough that I can apply it successfully to my case. Any help hugely appreciated.
My seed file currently looks like this:
exports.seed = function(knex) {
const fs = require('fs');
function createImage(row, event_id) {
return {
name: row[1],
event_id: event_id
}
};
function get_event_id(loc) {
knex('events')
.where({location: loc})
.first()
.then(result => {console.log(result)}); // <-- this never executes
};
let images = [];
const file = fs.readFileSync('./data.csv');
const lines = file.toString().replace(/[\r]/g, '').split('\n');
for (i=1; i<lines.length; i++) {
var row = lines[i].split(',');
var event_id = (async function () { return await get_event_id(row[0]) }) ();
images.push(createImage(row, event_id));
};
console.log(images);
// Inserts seed entries
// return knex('table_name').insert(images);
};
Output:
[ { name: '003.jpg', event_id: undefined } ]
My data.csv is structured like:
Location,Name
Amsterdam,003.jpg,
...
You can change your for loop into an Array.map and use Promise.all on returned promises.
Also, seed will return a promise so invoke it properly
exports.seed = async function (knex) {
const fs = require('fs');
function createImage(row, event_id) {
return {
name: row[1],
event_id: event_id
}
};
function get_event_id(loc) {
return knex('events')
.where({ location: loc })
.first()
.then(result => { console.log(result); return result }); // <-- this never executes
};
const file = fs.readFileSync('./data.csv');
const lines = file.toString().replace(/[\r]/g, '').split('\n');
let promises = lines.map(async (line) => {
let [row] = line.split(',');
let event_id = await get_event_id(row)
return createImage(row, event_id)
});
let images = await Promise.all(promises);
console.log(images);
// Inserts seed entries
// return knex('table_name').insert(images);
};

fs.readdir recursive search with depth=1

I have to write a code which takes one parameter i.e. path to directory, fetch files from the given directory and again does the same for the directories inside the given directory.The whole search should be wrapped in a promise.
But the depth of recursive search is 1.
Final array should look like: [file1, file2, file3, [file1inDir1, file2inDir1, Dir1inDir1, file3inDir1, Dir2inDir1], file4, file5]
My code is:
const fs = require("fs");
const path = require("path");
function checkfile(files){
let result = [];
for(let i=0 ; i<files.length ;i++){
let newpath = path.join(__dirname,files[i]);
fs.stat(newpath, (err,stats)=>{
if(stats.isDirectory()){
fs.readdir(newpath, (error,files)=>{result.push(files)})
}
else{result.push(files[i])}
})
}
return result;
}
let test = (filepath) => {
return new Promise((resolve, reject) =>{
fs.readdir(filepath, (error,files) => {
if (error) {
reject("Error occured while reading directory");
} else {
resolve(checkfile(files));
}
});
}
)}
test(__dirname)
.then(result => {
console.log(result);
})
.catch(er => {
console.log(er);
});
When I run it I get the following output: []
How do I correct this?
test correctly returns a promise, but checkfile does not, thus all the async operations happen after the yet empty result array was synchronously returned.
Fortunately NodeJS already provides utilities that return promises instead of taking callbacks docs, with them writing that code without callbacks going wrong is easy:
async function checkfile(files){
const result = [];
for(let i=0 ; i<files.length ;i++){
let newpath = path.join(__dirname,files[i]);
const stats = await fs.promises.stat(newpath);
if(stats.isDirectory()){
const files = await fs.promises.readdir(newpath);
result.push(files);
} else result.push(files[i]);
}
return result;
}
async function test(filepath) {
const files = await fs.promises.readdir(filepath);
return checkfile(files);
}

Categories

Resources