Promise chaining causing increased execution time? - javascript

I am create a simple NODE-JS function that Converts PDF to Image > Crops Image > Merge Them back with ImageMagick.
and this is the complete code i am using :
var os = require('os');
var fs = require('fs');
var path = require('path');
var gs = require('node-gs');
var sharp = require('sharp');
var areaMap = require('./areaMap');
const { performance } = require('perf_hooks');
var spawn = require('child_process').spawnSync;
var pExcep = 'someException';
var gsPath = 'Ghostscript/gs26';
var src = path.join(os.tmpdir(), '/');
var Files = {
file1: path.join(src, 'out1.jpeg'),
file2: path.join(src, 'out2.jpeg'),
OutImg: path.join(src, 'out.jpeg')
}
var crop = function (s, sFile) {
return new Promise((res, rej) => {
s = areaMap[s];
sharp(Files.OutImg).extract(s)
.toFile(sFile)
.then(()=> res())
.catch((err) => rej(err));
});
};
var getBaseCard = function (s) {
if (RegExp('^([0-9]{8})$').test(s)) { return 'SOMETHINGHERE' } else { return 'inception'; }
//This can be done on client side.
}
var GetCardType = function (base, sInfo) {
return new Promise((res, rej) => {
if (base === 'SOEMTHINGHERE') {
if (sInfo.includes('SOMETHINGHERE2')) {
if (sInfo.includes(pExcep)) {
res('PA_S_')
} else {
res('PA_S2')
}
} else {
res('PA_ST')
}
} else {
res('SA_')
}
})
}
var PdfToText = function (file, pass) {
return new Promise((res, rej) => {
gs()
.batch().safer().nopause().res(2).option('-dDEVICEWIDTHPOINTS=20').option('-dDEVICEHEIGHTPOINTS=20').option('-dFIXEDMEDIA').option('-sPDFPassword=' + pass).device('txtwrite').output('-').input(file).executablePath(gsPath)
.exec((err, stdout, stderr) => {
if (!err) {
res(stdout);
} else {
console.log(stdout);
console.log(err);
console.log(stderr);
}
})
});
}
var getBaseImage = function (file, pass, quality) {
return new Promise((res, rej) => {
gs()
.batch().nopause().safer().res(300 * quality).option('-dTextAlphaBits=4').option('-dGraphicsAlphaBits=4').option('-sPDFPassword=' + pass)
.executablePath(gsPath).device('jpeg').output(Files.OutImg).input(file)
.exec((err, stdout, stderr) => {
if (!err) { res(); } else { rej(stdout) };
})
})
}
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
getBaseImage(file, password, quality) // Convert PDF to Image
.then(() => {
PdfToText(file, password) // Extract Text from pdf
.then((res) => {
GetCardType(getBaseCard(password), res) // finally get PDF Type
.then((ct) => {
// crop image here using Sharp
Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
.then(() => {
// Merge Above two image into one using ImageMagick convert
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve(); // finally resolve
}).catch((err) => reject(err));
}).catch((err) => reject(err))
}).catch((err) => reject(err))
}).catch((err) => reject(err))
})
}
and now these are the problem i am facing:
1. ImageMagick isn't creating the output file.
2. fs.unlinksysnc throws ENOENT: no such file or directory, unlink '/tmp/out1.jpeg'
on average every second execution.
3. Using above code increases execution time.
For Example: getBaseImage should complete in 600ms but it takes 1400 using above code.
About speed in General it (The Complete Function not just getBaseImage) should finish in 1100-1500ms(*) on average but the time taken is ~2500ms.
*1100-1500ms time is achievable by using function chaining but that is hard to read and maintaine for me.
I am going to use this function in Firebase Functions.
How to properly chain these functions ?
EDIT
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
console.log(performance.now());
getBaseImage(file, password, quality) //Convert PDF TO IMAGE
.then(() => { return PdfToText(file, password) })
.then((res) => {return GetCardType(getBaseCard(password), res) })
.then((ct) => {
return Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
})
.then(() => {
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
.catch((err) => { console.log(err) });
Using above pattern didn't solved my issues here.

There's a good chance this weirdness is caused by using the file system. If I understand it correctly, the fs in cloud functions is in memory, so when you write to it, read from it, and remove from it, you're using more and less os memory. That can get weird if a function is called repeatedly and re uses the loaded module.
One thing to try to keep the state clean for each invocation is to put everything (including the requires) inside the scope of the handler. That way you instantiate everything freshly on each invocation.
Finally, you don't seem to be waiting for the spawned convert command to run, you'll need to wait for it to complete:
const convertProc = spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
convertProc.on('close', function() {
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
convertProc.on('close', function(error) {
reject(error);
});
Then you wait for it to complete before you resolve.

Related

how do I ensure two asynchronous operations add to a file sequentially?

I have this code:
const fs = require("fs");
const saveFile = (fileName, data) => {
return new Promise((resolve) => {
fs.writeFile(fileName, data, (err) => {
resolve(true);
});
});
};
const readFile = (fileName) => {
return new Promise((resolve) => {
fs.readFile(fileName, "utf8", (err, data) => {
resolve(data);
});
});
};
const filename = "test.txt";
saveFile(filename, "first");
readFile(filename).then((contents) => {
saveFile(filename, contents + " second");
});
readFile(filename).then((contents) => {
saveFile(filename, contents + " third");
});
I'm hoping to obtain in 'test.txt'
first second third
but instead, I get
first thirdd
The idea is that every time I receive a certain post request. I have to add more text to the file
Does someone have any solution for this?
Thank you so much!
Edit:
The problem of using async await or a chain of .then( ) is that I have to add more text to the file every time I receive a certain post request. So I don't have control over what is written or when. The Idea is that everything is written and nothing is overwritten even if two post requests are received at the same time.
I'm going to share the solution with a linked list I came up with yesterday. But I still want to know if someone has a better solution.
const saveFile = (fileName, data) => {
return new Promise((resolve) => {
fs.writeFile(fileName, data, (err) => {
resolve(true);
});
});
};
const readFile = (fileName) => {
return new Promise((resolve) => {
fs.readFile(fileName, "utf8", (err, data) => {
resolve(data);
});
});
};
class LinkedCommands {
constructor(head = null) {
this.head = head;
}
getLast() {
let lastNode = this.head;
if (lastNode) {
while (lastNode.next) {
lastNode = lastNode.next;
}
}
return lastNode;
}
addCommand(command, description) {
let lastNode = this.getLast();
const newNode = new CommandNode(command, description);
if (lastNode) {
return (lastNode.next = newNode);
}
this.head = newNode;
this.startCommandChain();
}
startCommandChain() {
if (!this.head) return;
this.head
.command()
.then(() => {
this.pop();
this.startCommandChain();
})
.catch((e) => {
console.log("Error in linked command\n", e);
console.log("command description:", this.head.description);
throw e;
});
}
pop() {
if (!this.head) return;
this.head = this.head.next;
}
}
class CommandNode {
constructor(command, description = null) {
this.command = command;
this.description = description;
this.next = null;
}
}
const linkedCommands = new LinkedCommands();
const filename = "test.txt";
linkedCommands.addCommand(() => saveFile(filename, "first"));
linkedCommands.addCommand(() =>
readFile(filename).then((contents) =>
saveFile(filename, contents + " second")
)
);
linkedCommands.addCommand(() =>
readFile(filename).then((contents) => saveFile(filename, contents + " third"))
);
Because these are async functions they notify you that the work is completed in the then function.
That means you want to use a then chain (or an async function) like so:
readFile(filename).then((contents) => {
return saveFile(filename, contents + " second");
}).then(() => {
return readFile(filename)
}).then((contents) => {
saveFile(filename, contents + " third");
});
You can use a FIFO queue of functions that return promises for this.
const { readFile, writeFile } = require("fs/promises");
let queue = [];
let lock = false;
async function flush() {
lock = true;
let promise;
do {
promise = queue.shift();
if (promise) await promise();
} while (promise);
lock = false;
}
function createAppendPromise(filename, segment) {
return async function append() {
const contents = await readFile(filename, "utf-8");
await writeFile(
filename,
[contents.toString("utf-8"), segment].filter((s) => s).join(" ")
);
};
}
async function sequentialWrite(filename, segment) {
queue.push(createAppendPromise(filename, segment));
if (!lock) await flush();
}
async function start() {
const filename = "test.txt";
// Create all three promises right away
await Promise.all(
["first", "second", "third"].map((segment) =>
sequentialWrite(filename, segment)
)
);
}
start();
So how's this work? Well, we use a FIFO queue of promise functions. As requests come in we create them and add them to the queue.
Every time we add to the array we attempt to flush it. If there's a lock in place, we know we're already flushing, so just leave.
The flushing mechanism will grab the first function in the queue, delete it from the queue, invoke it, and await on the promise that returns. It will continue to do this until the queue is empty. Because all of this is happening asynchronously, the queue can continue to get populated while flushing.
Please keep in mind that if this file is shared on a server somewhere and you have multiple processes reading from this file (such as with horizontal scaling) you will lose data. You should instead use some kind of distributed mutex somewhere. A popular way of doing this is using Redis and redlock.
Hope this helps!
Edit: by the way, if you want to prove that this indeed works, you can add a completely random setTimeout to the createAppendPromise function.
function createAppendPromise(filename, segment) {
const randomTime = () =>
new Promise((resolve) => setTimeout(resolve, Math.random() * 1000));
return async function append() {
await randomTime();
const contents = await readFile(filename, "utf-8");
await writeFile(
filename,
[contents.toString("utf-8"), segment].filter((s) => s).join(" ")
);
};
}
Chaining is fine, even if you don't know in advance when or how many promises will be created. Just keep the end of the chain handy, and chain to it whenever you create a promise...
// this context must persist across posts
let appendChain = Promise.resolve();
const filename = "test.txt";
// assuming op's readFile and saveFile work...
const appendToFile = (filename, data) =>
return readFile(filename).then(contents => {
return saveFile(filename, contents + data);
});
}
function processNewPost(data) {
return appendChain = appendChain.then(() => {
return appendToFile(filename, data);
});
}
Here's a demonstration. The async functions are pretend read, write and append. The <p> tag is the simulated contents of a file. Press the button to add new data to the pretend file.
The button is for you to simulate the external event that triggers the need to append. The append function has a 1s delay, so, if you want, you can get in several button clicks before all the appends on the promise chain are write is done.
function pretendReadFile() {
return new Promise(resolve => {
const theFile = document.getElementById('the-file');
resolve(theFile.innerText);
})
}
function pretendWriteFile(data) {
return new Promise(resolve => {
const theFile = document.getElementById('the-file');
theFile.innerText = data;
resolve();
})
}
function pretendDelay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function appendFile(data) {
return pretendDelay(1000).then(() => {
return pretendReadFile()
}).then(result => {
return pretendWriteFile(result + data);
});
}
document.getElementById("my-button").addEventListener("click", () => click());
let chain = Promise.resolve();
let count = 0
function click() {
chain = chain.then(() => appendFile(` ${count++}`));
}
<button id="my-button">Click Fast and At Random Intervals</button>
<h3>The contents of the pretend file:</h3>
<p id="the-file">empty</p>

ffmpeg - on end function probably not working correctly?

got this code to loop through some .mp4 files and create a screenshot of them:
files.forEach(file => {
console.log(file);
if(!fs.existsSync('img/' + file.replace('.mp4', '.png'))) {
ffmpeg({ source: 'movies/' + file})
.takeScreenshots({ timemarks: [ '50' ], size: '150x100' }, 'img/', function(err, filenames) {
})
.on('end', function() {
fs.rename('img/tn.png', 'img/' + file.replace('.mp4', '.png'), function(err) {if (err) console.log('Error: ' + err) });
sleep(1000);
})
}
});
Now i got the problem that it seems like that .on(end) is sometimes not working, Error:
ENOENT: no such file or directory, rename
i think it´s because the process of saving the tn.png is slower than the renaming...
I wouldn't mix callback, sync, sleep and loop togather. You can use fs promise version to convert all your callback style to promise style. You could do sequentially or parallely.
Also, I would say convert the screenshot code to wrap in promise.
here is the code
const fs = require("fs").promises;
function takeScreenshot(file) {
return new Promise((resolve, reject) => {
ffmpeg({"source": `movies/${file}`})
.takeScreenshots({"timemarks": ["50"], "size": "150x100"}, "img/", function (err, filenames) {
})
.on("end", function () {
resolve();
})
.on("error", err => {
reject(err);
});
});
}
// execute one by one
async function sequential(files) {
for (const file of files) {
const fileExists = await fs.stat(`img/${file.replace(".mp4", ".png")}`);
if (fileExists) {
await takeScreenshot(file);
await fs.rename("img/tn.png", `img/${ file.replace(".mp4", ".png")}`);
}
}
}
// execute in parallel
async function parallel(files) {
return Promise.all(files.map(async file => {
const fileExists = await fs.stat(`img/${file.replace(".mp4", ".png")}`);
if (fileExists) {
await takeScreenshot(file);
await fs.rename("img/tn.png", `img/${ file.replace(".mp4", ".png")}`);
}
}));
}
Hope this helps.

Run line at node script end?

I've written a script that recursively asynchronously modifies js files in a directory. It's made up of a search(dirname) function which searches a directory for js files, and a modify(filename) which does the modifying.
let totalFilesSearched = 0;
const search = (dir) => {
fs.readdir(dir, (err, list) => {
if (err) return;
list.forEach((filename) => {
const filepath = path.join(dir, filename);
if (filename.endsWith('.js')) {
modify(filepath);
} else if (fs.lstatSync(filepath).isDirectory()) {
search(filepath);
}
})
});
}
const modify = (filename) => {
fs.readFile(filename, 'utf8', (err, data) => {
if (err) console.log(err);
// ... my modification code ...
totalFilesSearched++;
});
}
search(args[0])
console.log(`Total files searched: ${totalFilesSearched}`);
I want to print out the totalFilesSearched at the end of my script but because my code is asynchronous, it just prints Total files searched: 0 right away.
Does anyone know how I'd wait until the script is about to end to print this out? I'm having trouble because both my search() and modify() functions are asynchronous.
Use Promises instead, and then call console.log when everything is resolved. Use promisify to turn the callbacks into promises:
const { promisify } = require('util');
const readFile = promisify(fs.readFile);
const readDir = util.promisify(fs.readdir);
const search = (dir) => (
readDir(dir).then((list) => (
Promise.all(list.map((filename) => {
const filepath = path.join(dir, filename);
if (filename.endsWith('.js')) {
return modify(filepath); // recursively return the promise
} else if (fs.lstatSync(filepath).isDirectory()) {
return search(filepath); // recursively return the promise
}
}))
))
.catch(err => void 0)
);
const modify = (filename) => (
readFile(filename, 'utf8')
.then((data) => {
// other code
totalFilesSearched++;
}).catch(err => console.log(err))
)
search(args[0])
.then(() => {
console.log(`Total files searched: ${totalFilesSearched}`);
});
Self answer:
Just use process.on('exit', callback_function_to_execute_at_end)
Its built into node, your callback will get executed right before the process exits.

Node.js file system: Promise once read all files

I am using Node.js file system to build an array of file paths. I would like to know when all files have been read, so I could work further with my array.
Sequence of events:
Go into a folder
Get a path of each file
Put each path into an array
Let me know once you're done
Code:
'use strict';
const fs = require('fs');
function readDirectory(path) {
return new Promise((resolve, reject) => {
const files = [];
fs.readdir(path, (err, contents) => {
if (err) {
reject(err);
}
contents.forEach((file) => {
const pathname = `${ path }/${ file }`;
getFilesFromPath(pathname).then(() => {
console.log('pathname', pathname);
files.push(pathname);
});
resolve(files);
});
});
});
}
function getFilesFromPath(path) {
return new Promise((resolve, reject) => {
const stat = fs.statSync(path);
if (stat.isFile()) {
fs.readFile(path, 'utf8', (err, data) => {
if (err) {
reject(err);
} else {
resolve(data);
}
});
} else if (stat.isDirectory()) {
readDirectory(path);
}
});
}
getFilesFromPath('./dist');
Would be great to glue with:
Promise.all(files).then(() => {
// do stuff
})
Your suggestion pretty much works - did you try it? Here's a typical way of doing it:
getFilesFromPath( path ).then( files => {
const filePromises = files.map( readFile );
return Promises.all( filePromises );
}).then( fileContentsArray => {
//do stuff - the array will contain the contents of each file
});
You'll have to write the "readFile()" function yourself, but looks like you got that covered.

s3.getObject().createReadStream() : How to catch the error?

I am trying to write a program to get a zip file from s3, unzip it, then upload it to S3.
But I found two exceptions that I can not catch.
1. StreamContentLengthMismatch: Stream content length mismatch. Received 980323883 of 5770104761 bytes. This occurs irregularly.
2. NoSuchKey: The specified key does not exist. This happens when I input the wrong key.
When these two exceptions occur, this program crashes.
I'd like to catch and handle these two exceptions correctly.
I want to prevent a crash.
const unzipUpload = () => {
return new Promise((resolve, reject) => {
let rStream = s3.getObject({Bucket: 'bucket', Key: 'hoge/hoge.zip'})
.createReadStream()
.pipe(unzip.Parse())
.on('entry', function (entry) {
if(entry.path.match(/__MACOSX/) == null){
// pause
if(currentFileCount - uploadedFileCount > 10) rStream.pause()
currentFileCount += 1
var fileName = entry.path;
let up = entry.pipe(uploadFromStream(s3,fileName))
up.on('uploaded', e => {
uploadedFileCount += 1
console.log(currentFileCount, uploadedFileCount)
//resume
if(currentFileCount - uploadedFileCount <= 10) rStream.resume()
if(uploadedFileCount === allFileCount) resolve()
entry.autodrain()
}).on('error', e => {
reject()
})
}
}).on('error', e => {
console.log("unzip error")
reject()
}).on('finish', e => {
allFileCount = currentFileCount
})
rStream.on('error', e=> {
console.log(e)
reject(e)
})
})
}
function uploadFromStream(s3,fileName) {
var pass = new stream.PassThrough();
var params = {Bucket: "bucket", Key: "hoge/unzip/" + fileName, Body: pass};
let request = s3.upload(params, function(err, data) {
if(err) pass.emit('error')
if(!err) pass.emit('uploaded')
})
request.on('httpUploadProgress', progress => {
console.log(progress)
})
return pass
}
This is the library I use when unzipping.
https://github.com/mhr3/unzip-stream
Help me!!
If you'd like to catch the NoSuchKey error thrown by createReadStream you have 2 options:
Check if key exists before reading it.
Catch error from stream
First:
s3.getObjectMetadata(key)
.promise()
.then(() => {
// This will not throw error anymore
s3.getObject().createReadStream();
})
.catch(error => {
if (error.statusCode === 404) {
// Catching NoSuchKey
}
});
The only case when you won't catch error if file was deleted in a split second, between parsing response from getObjectMetadata and running createReadStream
Second:
s3.getObject().createReadStream().on('error', error => {
// Catching NoSuchKey & StreamContentLengthMismatch
});
This is a more generic approach and will catch all other errors, like network problems.
You need to listen for the emitted error earlier. Your error handler is only looking for errors during the unzip part.
A simplified version of your script.
s3.getObject(params)
.createReadStream()
.on('error', (e) => {
// handle aws s3 error from createReadStream
})
.pipe(unzip)
.on('data', (data) => {
// retrieve data
})
.on('end', () => {
// stream has ended
})
.on('error', (e) => {
// handle error from unzip
});
This way, you do not need to make an additional call to AWS to find out if out if it exists.
You can listen to events (like error, data, finish) in the stream you are receiving back. Read more on events
function getObjectStream (filePath) {
return s3.getObject({
Bucket: bucket,
Key: filePath
}).createReadStream()
}
let readStream = getObjectStream('/path/to/file.zip')
readStream.on('error', function (error) {
// Handle your error here.
})
Tested for "No Key" error.
it('should not be able to get stream of unavailable object', function (done) {
let filePath = 'file_not_available.zip'
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error instanceof Error).to.equal(true)
expect(error.message).to.equal('The specified key does not exist.')
done()
})
})
Tested for success.
it('should be able to get stream of available object', function (done) {
let filePath = 'test.zip'
let receivedBytes = 0
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error).to.equal(undefined)
})
readStream.on('data', function (data) {
receivedBytes += data.length
})
readStream.on('finish', function () {
expect(receivedBytes).to.equal(3774)
done()
})
})
To prevent a crash, you need to asynchronously listen to the object's head metadata, where it does not return the whole object, which will take less time. Try this one!
isObjectErrorExists = async functions () => {
try {
const s3bucket = {
secret key: '',
client id: ''
}
const params = {
Bucket: 'your bucket name',
Key: 'path to object'
};
await s3bucket.headObject(params).promise(); // adding promise will let you add await to listen to process untill it completes.
return true;
} catch (err) {
return false; // headObject threw error.
}
throw new Error(err.message);
}
}
public yourFunction = async() => {
if (await this.isObjectErrorExists()) {
s3Bucket.getObject().createReadStream(); // works smoothly
}
}

Categories

Resources