Saving Readstream as File on async loop - javascript

=> What I need to do :
I'm trying to loop over multiples tracks got from database
Getting audio file path/name of each track, and get the readstream from AWS then save it to a temporary repertory on my server
Make some changes on the audio with FFMPEG
ReUpload the changed audio file to AWS
Delete the file from the temporary repertory from my server
So first I got all the tracks, loop on it, and call the function with track parameter that make audio process on each track :
exports.updateAllTracksWithNewTag = async (req, res) => {
try {
const allTracks = await Tracks.findAll()
await Promise.all(
allTracks.map(async (track) => {
return await this.updateOne(track)
})
)
return res.status(200).json({ message: 'All tracks are done' })
} catch (error) {
return ResponseErrorFormated.responseError(error, res)
}
}
This is the function that make the audio process on each track :
exports.updateOne = async (track) => {
const filesToDeleteFromTemp = []
try {
const fileAudioURL = track.MP3_audio_url
const originalFileReadstream = await AWSS3Utils.getReadStream(
URLConfig.URL_ASSETS.PRIVATE.USERS_TRACKS_AUDIOS.path.slice(1) + fileAudioURL
)
const tempPathToSaveStreamGetted = 'assets/temp/audios/'
// It seem he just loop all the tracks to this line and then continue the process
console.log('SAVING FILE for track.id=', track.id) /////////////////////
await FilesUtils.saveReadStreamAsFile(originalFileReadstream, tempPathToSaveStreamGetted, fileAudioURL)
console.log('FILE SAVED for track.id=', track.id) /////////////////////
filesToDeleteFromTemp.push(tempPathToSaveStreamGetted + fileAudioURL)
const fileInfosForTag = {
path: tempPathToSaveStreamGetted + fileAudioURL,
filename: fileAudioURL,
}
console.log('CREATING TAGGED for track.id=', track.id) /////////////////////
const resultTaggedMP3 = await FilesFormater.createTaggedAudioFromMP3(fileInfosForTag)
console.log('TAGGED CREATED for track.id=', track.id) /////////////////////
const readStreamTaggedMP3 = resultTaggedMP3.readStream
const finalFilePathTaggedMP3 = resultTaggedMP3.finalFilePath
const finalFileNameTaggedMP3 = resultTaggedMP3.finalFileNameTaggedMP3
const newFileKeyMP3 =
URLConfig.URL_ASSETS.PUBLIC.USERS_TRACKS_TAGGED.path.slice(1) + finalFileNameTaggedMP3
filesToDeleteFromTemp.push(finalFilePathTaggedMP3)
await AWSS3Utils.uploadFileFromReadstream(readStreamTaggedMP3, newFileKeyMP3)
await FilesUtils.unlinkFiles(filesToDeleteFromTemp)
} catch (error) {
await FilesUtils.unlinkFiles(filesToDeleteFromTemp)
throw error
}
}
Excepted Result :
SAVING FILE for track.id=120
FILE SAVED for track.id=120
CREATING TAGGED for track.id=120
TAGGED CREATED for track.id=120
SAVING FILE for track.id=121
FILE SAVED for track.id=121
CREATING TAGGED for track.id=121
TAGGED CREATED for track.id=121
The real result :
SAVING FILE for track.id=120
SAVING FILE for track.id=121
SAVING FILE for track.id=122
SAVING FILE for track.id=123

Related

Async function immediately returns undefined yet output variable returns a value immediately before the return statement

I am writing a function that downloads and converts a pdf into individual jpg files by page. I am using the imagemagick library to do the conversion. I am having trouble with my processPDF() function as it immediately returns undefined. I put a console.log statement immediately before the function returns and it returns the exact value I expect yet that value doesn't seem to be getting outside of the function for some reason.
import im from 'imagemagick'
import { promises as fs } from 'fs'
import path from 'path'
import _ from 'lodash'
import axios from 'axios'
import { v4 as uuid } from 'uuid'
async function processPDF(pdfPath) {
let basename = path.basename(pdfPath, '.pdf')
let outputPath = "./img/" + basename + ".jpg";
console.log(`Converting ${pdfPath}`)
// Take PDF file and generate individual JPG files
await im.convert(["-density", 300, pdfPath, outputPath],async (err) => {
if (err) {
console.log(err)
throw `Couldn't Process ${pdfPath}`
}
else {
// Get every file in Temporary Image Directory
let files = await fs.readdir(`./img/`)
// Append directory into filenames
files = files.map(file => {
return "./img/" + file
})
// We only want the files that match the source pdf's name
files = files.filter((file) => {
return file.includes(basename)
})
console.log(`Getting ${basename} Buffer Data`)
// For each file, read and return the buffer data along with the path
let images = await Promise.all(files.map(async file => {
const contents = await fs.readFile(file)
return { path: file, buffer: contents }
}))
// Since we read the files asynchonously, Reorder the files
images = _.orderBy(images, (image) => {
let regex = /\d*.jpg/
let res = image.path.match(regex)[0]
res = path.basename(res, '.jpg')
return res
})
let output = { pdf: pdfPath, images }
// Returns a value
console.log(output)
// Returns undefined???
return output
}
})
}
export async function downloadAndProcessPDF(url) {
// Fetch PDF from server
let { data } = await axios.get(url, {
responseType: 'arraybuffer',
headers: {
'Content-Type': 'application/json',
'Accept': 'application/pdf'
}
}).catch(e=>{
console.log(e);
throw `Can't retrieve ${url}`
})
// Generate a Unique ID for the pdf since this is called asynchronously, this will be called many times simultaneously
let id = "./pdf/" + uuid() + ".pdf"
await fs.writeFile(id, data);
// tell processPDF to process the pdf in the ./pdf directory with the given filename
let pdfData = await processPDF(id);
// Returns undefined???
console.log(pdfData)
return pdfData
}
If I had to take a wild guess I'd think that im.convert is the function that is giving me trouble. Throughout my source code i'm using promises to handle asynchronous tasks yet im.convert() uses a callback function. I'm not super familiar with how concurrency works between promises and callback functions so I think that's what's probably the issue.

Event listener for when a file has finished streaming to AWS S3 upload api?

I am creating a file backup between Google Drive and AWS S3. Where I create a Readable stream promise by downloading the file using the Google Get API and Pipping the data to AWS S3.
As I have many files, each promise is added to a queue and only new promises enter when it resolves.
I'm struggling to only resolve the promise when the file has completed upload to AWS S3, rather than when the file has downloaded?
I thought using .on('finish', () => {resolve()}) should do this but it doesn't seem to be working.
Here is my code sample:
// download stream of NON gdocs files and pipe to destination
const getGFileContent = async (fileObj) => {
let fileExt = fileObj.path.join('/').concat('/',fileObj.name)
return drive.files.get({fileId: fileObj.id, mimeType: fileObj.mimeType, alt: 'media'}, {responseType: 'stream'})
.then(res => {
return new Promise((resolve, reject) => {
res.data
.pipe(uploadS3(fileExt))
.on('end', () => {console.log(`Done downloading file: ${fileExt}`)})
.on('finish', () => {resolve(console.log(`File Backup Complete: ${fileExt}`))})
.on('error', err => {reject(console.error(`Error downloading file: ${err}`))})
})
// upload a file to AWS S3 by passing the file stream from getGFileContent into the 'body' parameter of the upload
const uploadS3 = (filePath) => {
let pass = new stream.PassThrough()
let params = {
Bucket: awsBucketName, // bucket-name
Key: filePath, // file will be saved as bucket-name/[uniquekey.csv]
Body: pass // file data passed through stream
}
new aws.S3().upload(params).promise()
.then(() => console.log(`Successfully uploaded to S3: ${filePath}`))
.catch( err => console.log(`Error, unable to upload to S3: ${err}`))
return pass
}
The first thing that comes to mind is to make uploadS3 function async and await for the upload to finish, before returning the passThrough stream. But this wouldn't work. It would then return a Promise and the .pipe() accepts only a stream object.
Instead of that, you could refactor your code so that getGFileContent would return a readable stream promise.
Then, make uploadS3 accept a readable stream as a parameter and return an s3 upload promise.
To wrap it up, add an async backupFile function, which will await for both GDrive steam and upload promises to be resolved before continuing. This will also keep the functions tidy and clean, each having its own single responsibility.
Example code:
const AWS = require('aws-sdk');
const fs = require('fs');
const s3 = new AWS.S3();
AWS.config.update({
accessKeyId: '----',
secretAccessKey: '----',
});
const backupFile = async (file) => {
const fileStream = await getGFileStream(file);
try {
await uploadStreamToS3(fileStream);
console.log(`S3 Backup of ${fileStream.path} completed`)
} catch (err) {
console.log(`error during file upload ${err}`);
}
}
const getGFileStream = async (fileObj) => {
// TODO: logic to find and get the file. Returns a readableStream promise
const fileStream = fs.createReadStream('./largeFile.zip');
console.log('File ${...} read from Google Drive');
return fileStream;
}
const uploadStreamToS3 = (fileStream) => {
const params = {Bucket: 'test-bucket', Key: 'key', Body: fileStream}
console.log(`Starting to upload ${fileStream.path} to S3`);
return s3.upload(params).promise();
}
backupFile({id: 'mockTestFile'});

Getting some problem to read the CSV file inside the firebase functions

I am trying to read the csv file inside the Firebase functions so that i can send the mail to the all the records. I am planning to go with the following procedure
upload the csv
fire a on finalize function
read the file and send emails
Below is the function
import * as functions from "firebase-functions";
import * as mkdirp from "mkdirp-promise";
import * as os from "os";
import * as path from "path";
import csv = require('csvtojson');
const gcs = require('#google-cloud/storage')({ keyFilename: 'service-account-credentials.json' });
const csvDirectory = "csv";
export = functions.storage.object().onFinalize(async (object) => {
const filePath = object.name;
const contentType = object.contentType;
const fileDir = path.dirname(filePath);
if(fileDir.startsWith(csvDirectory) && contentType.startsWith("text/csv")) {
const bucket = gcs.bucket(object.bucket);
const file = bucket.file(filePath);
const fileName = path.basename(filePath);
const tempLocalFile = path.join(os.tmpdir(), filePath);
const tempLocalDir = path.dirname(tempLocalFile);
console.log("values", bucket, file, fileName, tempLocalDir, tempLocalFile);
console.log("csv file uploadedeeeed");
await mkdirp(tempLocalDir);
await bucket.file(filePath).download({
destination: tempLocalFile
});
console.log('The file has been downloaded to', tempLocalFile);
csv()
.fromFile(tempLocalFile)
.then((jsonObj) => {
console.log(jsonObj);
})
}
});
While running the code i am only getting csv file uploadeded which i have written inside the console.log and then i get the timeout after 1 minute .i am also not getting the The file has been downloaded to log . Can anybody look at the code and help me to get out of this.
You are mixing up the use of async/await together with a call to then() method. You should also use await for the fromFile() method.
The following should do the trick (untested):
export = functions.storage.object().onFinalize(async (object) => {
const filePath = object.name;
const contentType = object.contentType;
const fileDir = path.dirname(filePath);
try {
if (fileDir.startsWith(csvDirectory) && contentType.startsWith("text/csv")) {
//.....
await mkdirp(tempLocalDir);
await bucket.file(filePath).download({
destination: tempLocalFile
});
console.log('The file has been downloaded to', tempLocalFile);
const jsonObj = await csv().fromFile(tempLocalFile);
console.log(jsonObj);
return null;
} else {
//E.g. throw an error
}
} catch (error) {
//.....
}
});
Also note that (independently of the mixed use of async/await and then()), with the following line in your code
csv().fromFile(tempLocalFile).then(...)
you were not returning the Promise returned by the fromFile() method. This is a key point in Cloud Functions.
I would suggest you watch the official Video Series on Cloud Functions (https://firebase.google.com/docs/functions/video-series/) and in particular the videos on Promises titled "Learn JavaScript Promises".

How to zip files PDF from a Storage in NodeJS

I need to create a zip file with any PDF what I recieved from Storage AWS, and I am trying do this with ADM-zip in NodeJS, but i cant read the final file.zip.
Here is the code.
var zip = new AdmZip();
// add file directly
var content = data.Body.buffer;
zip.addFile("test.pdf", content, "entry comment goes here");
// console.log(content)
// add local file
zip.addLocalFile(`./tmp/boletos/doc.pdf`);
// // get everything as a buffer
var willSendthis = zip.toBuffer();
console.log(willSendthis)
// // or write everything to disk
zip.writeZip("test.zip", `../../../tmp/boletos/${datastring}.zip`);
As it is this only creates a .zip for each file..zip
I was also facing this issue. I looked through a lot of SO posts. This is how I was able to create a zip with multiple files from download urls. Please keep in mind, I'm unsure this is best practice, or if this is going to blow up memory.
Create a zip folder from a list of id's of requested resources via the client.
const zip = new AdmZip();
await Promise.all(sheetIds.map(async (sheetId) => {
const downloadUrl = await this.downloadSds({ sheetId, userId, memberId });
if (downloadUrl) {
await new Promise((resolve) => https.get(downloadUrl, (res) => {
const data = [];
res.on('data', (chunk) => {
data.push(chunk);
}).on('end', () => {
const buffer = Buffer.concat(data);
resolve(zip.addFile(`${sheetId}.pdf`, buffer));
});
}));
} else {
console.log('could not download');
}
}));
const zipFile = zip.toBuffer();
I then used downloadjs in my React.js client to download.
const result = await sds.bulkDownloadSds(payload);
if (result.status > 399) return store.rejectWithValue({ errorMessage: result?.message || 'Error', redirect: result.redirect });
const filename = 'test1.zip';
const document = await result.blob();
download(document, filename, 'zip');

GraphQL: Error when resolving promises during file upload

So I've been working with GraphQL uploads, and before stating my problem here's an overview for the tech stack that I am using:
Backend: Mongoose, Express, Apollo, GraphQL
Frontend: VueJS, Apollo, GraphQL
I'm using Apollo Upload Client to send the Upload files to the server side from the client. Since I am sending a list of files type scalar Upload from the client, I am receiving a list of promises that need to be resolved. On using Promise.all() I am getting the following error (which, weirdly, I wasn't getting before and I don't know why). If I upload more than one file, the first file just gets lost somewhere and the second file uploads.... But this isn't all the time. Sometimes it doesn't happen. Maybe I am not resolving or catering to the promises properly. Note that I also have to save the file name in MongoDB through Mongoose
{ BadRequestError: Request disconnected during file upload stream parsing.
at IncomingMessage.request.once (F:\repos\pushbox\node_modules\graphql-upload\lib\processRequest.js:245:35)
at Object.onceWrapper (events.js:285:13)
at IncomingMessage.emit (events.js:197:13)
at resOnFinish (_http_server.js:583:7)
at ServerResponse.emit (events.js:202:15)
at onFinish (_http_outgoing.js:683:10)
at processTicksAndRejections (internal/process/next_tick.js:74:9)
message: 'Request disconnected during file upload stream parsing.',
expose: true,
statusCode: 499,
status: 499 }
I have an HTML file input tag that takes multiple files and the mutation I use is:
async uploadFiles() {
// Check if input tag is empty
if (this.files.length === 0) {
this.uploadErrorAlert = true;
return;
}
// Mutation
this.isUploading = true;
await this.$apollo.mutate({
mutation: UPLOAD_FILES,
variables: {
files: this.files,
id: this.selectedCard.id,
},
})
.then(() => {
// clear files from the input tag
this.files = '';
this.$refs.selectedFiles.value = '';
this.isUploading = false;
})
.catch((err) => {
console.error(err);
});
},
And finally, the resolver on the server is:
/**
* Uploads files sent on disk and saves
* the file names in the DB
*
* #param {Object} attachments - List of files for a card
*
* #return {Boolean} - true if upload is
* successful
*/
uploadFiles: async (_, attachments, { controllers }) => {
Promise.all(attachments.files.map(async (file) => {
const { createReadStream, filename } = await file;
const stream = createReadStream();
/**
* We need unique names for every file being uploaded,
* so we use the ID generated by MongoDB and concat it
* to the filename sent by the user.
*
* Therefore we instantiate an attachment object to get an ID
*/
const attachment = await controllers.attachment.add({ id: attachments.id, file: '' });
const newFileName = `${attachment.id}_${filename}`;
const path = `${process.env.UPLOAD_DIR}/${newFileName}`;
await controllers.attachment.update({
id: attachment.id,
file: newFileName,
});
console.log(`reached for ${path}`);
// Attempting to save file in server
return new Promise((resolve, reject) => stream
.pipe(createWriteStream(path))
.on('finish', () => resolve())
.on('error', (error) => {
console.log('dude?');
if (stream.truncated) {
// Delete the truncated file
unlinkSync(path);
}
reject(error);
}));
})).then(() => {
pubsub.publish(ATTACHMENTS_ADDED, { attachmentsChanged: controllers.attachment.getAll() });
}).catch((err) => {
console.log(err);
});
},
Any help would be appreciated!
Okay so I don't know how I missed this issue here, but this right there is the solution! The issue is on the module's, that I am using, github issue forum.
So the problem is solved by using await before the Promise.all() function. So now the code inside the uploadFiles resolver looks like:
await Promise.all(attachments.files.map(async (file) => {
const { createReadStream, filename } = await file;
const stream = createReadStream();
/**
* We need unique names for every file being uploaded,
* so we use the ID generated by MongoDB and concat it
* to the filename sent by the user.
*
* Therefore we instantiate an attachment object to get an ID
*/
const attachment = await controllers.attachment.add({ id: attachments.id, file: '' });
const newFileName = `${attachment.id}_${filename}`;
const path = `${process.env.UPLOAD_DIR}/${newFileName}`;
await controllers.attachment.update({
id: attachment.id,
file: newFileName,
});
console.log(`reached for ${path}`);
// Attempting to save file in server
return new Promise((resolve, reject) => stream
.pipe(createWriteStream(path))
.on('finish', () => resolve())
.on('error', (error) => {
console.log('dude?');
if (stream.truncated) {
// Delete the truncated file
unlinkSync(path);
}
reject(error);
}));
})).then(() => {
pubsub.publish(ATTACHMENTS_ADDED, { attachmentsChanged: controllers.attachment.getAll() });
}).catch((err) => {
console.log(err);
});

Categories

Resources