Error handling when opening a PDF in Node - javascript

I am attempting to open a 3rd party generated PDF that I know will fail occasionally. I am trying both pdf2json and pdfreader, and am encountering the same issue, which I'm not sure if it how I am attempting to handle the libraries in a promise.
When I receive an PDF, I would like to open it, to ensure that it is a valid PDF before passing it on for processing.
I am doing it like so:
function printRawItems(filename, callback){
new pdfReader.PdfReader().parseBuffer(filename, function(err, item) {
if (err) {
callback(err);
} else if (!item) {
callback();
} else if (item.text) {
callback(null, item)
} else if (item.page){
console.log("page =", item.page);
callback(null, item);
} else if (item.x){
console.log([item.x, item.y, item.oc, item.A, Math.floor(item.w), item.text].join("\t"));
callback(null, item);
} else {
console.warn(item);
}
});
}
function isValidPdf(buffer) {
return new Promise((resolve, reject) => {
printRawItems(buffer, function(err, item){
if (err) {
return reject(err);
} else if (item) {
return resolve(item);
}
return reject();
})
}).catch(err => {throw err})
}
The buffer being passed in to the "isValidPdf" is from an http request.
Now from what I can tell the callback I'm passing into the parseBuffer appears to get run twice. Once when the file is opened (and so item is "file"), and a second when it is parsed. After the first pass the promise in "isValidPdf" is resolved and the callback being passed in is never called, so it isn't rejected. The second run of the parseBuffer callback displays errors, which throws the exception, but by that time the promise is resolved and bad things happen.
Am I misunderstanding how the callbacks work, or are these libraries doing something wrong, and I should open a support ticket?

You're not misunderstanding how callbacks work. Just using them in the wrong way. I had a quick look at pdf2json and it seems you first create the parser, then do .parseBuffer() and wait for events to fire, e.g.:
function printRawItems (buffer, cb) {
const parser = new PDFParser()
parser.on('pdfParser_dataError', errData => {
cb(errData.parserError)
})
parser.on('pdfParser_dataReady', pdfData => {
cb(null, pdfData)
})
parser.parseBuffer(buffer)
}

Related

Am I chaining Promises correctly or committing a sin?

I have not worked with Javascript in a long time, so now promises are a new concept to me. I have some operations requiring more than one asynchronous call but which I want to treat as a transaction where steps do not execute if the step before failed. Currently I chain promises by nesting and I want to return a promise to the caller.
After reading the chaining section of Mozilla's Using Promises guide, I'm not sure if what I'm doing is correct or equivalent to the "callback pyramid of doom".
Is there a cleaner way to do this (besides chaining with a guard check in each then)? Am I right in my belief that in Mozilla's example it will execute each chained then even when there is an error?
myfunction(key) => {
return new Promise((outerResolve, outerReject) => {
return new Promise((resolve, reject) => {
let item = cache.get(key);
if (item) {
resolve(item);
} else {
//we didnt have the row cached, load it from store
chrome.storage.sync.get(key, function (result) {
chrome.runtime.lastError
? reject({ error: chrome.runtime.lastError.message })
: resolve(result);
});
}
}).then((resolve) => {
//Now the inner most item is resolved, we are working in the 'outer' shell
if (resolve.error) {
outerReject(resolve);
} else {
//No error, continue
new Promise((resolve, reject) => {
chrome.storage.sync.get(keyBasedOnPreviousData, function (result) {
chrome.runtime.lastError
? reject({ error: chrome.runtime.lastError.message })
: resolve(result);
});
}).then((resolve) => {
//finally return the result to the caller
if (resolve.error) {
outerReject(resolve);
} else {
outerResolve(resolve);
}
});
}
});
});
}
Subsequent then statements are not executed (until a catch) when an exception is thrown. Also, .then returns a Promise, so you don't need to create an additional, outer Promise.
Try this example:
var p = new Promise((resolve, reject) => {
console.log('first promise, resolves');
resolve();
})
.then(() => {
throw new Error('Something failed');
})
.then(() => {
console.log('then after the error');
return('result');
});
p.then(res => console.log('success: ' + res), err => console.log('error: ' + err));
You will not see "then after the error" in the console, because that happens after an exception is thrown. But if you comment the throw statement, you will get the result you expect in the Promise.
I am not sure I understand your example entirely, but I think it could be simplified like this:
myfunction(key) => {
return new Promise((resolve, reject) => {
let item = cache.get(key);
if (item) {
resolve(item);
} else {
//we didnt have the row cached, load it from store
chrome.storage.sync.get(key, function (result) {
chrome.runtime.lastError
? throw new Error(chrome.runtime.lastError.message)
: resolve(result);
});
}
}).then((previousData) => {
// keyBasedOnPreviousData is calculated based on previousData
chrome.storage.sync.get(keyBasedOnPreviousData, function (result) {
chrome.runtime.lastError
? throw new Error(chrome.runtime.lastError.message)
: return result;
});
});
}
It's a bit of a mess. This is my attempt at rewriting. A good thing to try to avoid is new Promise().
function chromeStorageGet(key) {
return new Promise( (res, rej) => {
chrome.storage.sync.get(key, result => {
if (chrome.runtime.lastError) {
rej(new Error(chrome.runtime.lastError.message))
} else {
res(result)
}
});
});
});
function myfunction(key) {
const item = cache.get(key) ? Promise.resolve(cache.get(key)) : chromeStorageGet(key);
return item.then( cacheResult => {
return chromeStorageGet(keyBasedOnPreviousData);
});
}
Why avoid new Promise()?
The reason for this is that you want to do every step with then(). If any error happened in any of the promises, every promise in the chain will fail and any subsequent then() will not get executed until there is a catch() handler.
Lots of promise based-code requires no error handlers, because promise-based functions always return promises and exceptions should flow all the back to the caller until there is something useful to be done with error handling.
Note that the exceptions to these 2 rules are in my chromeStorageGet function. A few notes here:
new Promise can be a quick and easy way to convert callback code to promise code.
It's usually a good idea to just create a little conversion layer for this callback-based code. If you need chrome.storage.sync in other places, maybe create a little utility that promisifies all its functions.
If there is only 1 'flow', you can just use a series of then() to complete the process, but sometimes you need to conditionally do other things. Just splitting up these complicated operations in a number of different functions can really help here.
But this:
const result = condition ? Promise.resolve() : Promise.reject();
Is almost always preferred to:
const result = new Promise( (res, rej) => {
if (condition) {
res();
} else {
rej();
}
}

File Loop Function

I am creating a function in node.js that loops through the files of a directory. It is supposed to add the file name to the returnData variable, then return the returnData. However, it keeps returning nothing. I've put a few console.log statements in the function to help me debug, but I can't figure out why it won't work.
function loopMusic (directory) {
var returnData = "";
fs.readdir (directory, function (err, files) {
if (err) {
console.log (err);
}
files.forEach (function (file, index) {
returnData += file;
console.log (returnData);
});
});
console.log (returnData);
return returnData;
}
The first console.log statement is able to print the files, but the one right before the return just prints a new line.
You can make the function return a promise:
function loopMusic (directory) {
return new Promise((resolve, reject) => {
fs.readdir (directory, function (err, files) {
if (err) {
reject(err);
return;
}
files.forEach (function (file, index) {
returnData += file;
console.log (returnData);
});
resolve(returnData);
});
}
You would use in that way:
loopMusic('...')
.then((data) => console.log(data))
.catch((err) => ...);
fs.readdir is asynchronous, meaning it does not return with the result when you call it. Instead the result is provided to the callback, which is called when the command finishes processing. It "calls-back" to the function you provided when it's done (hence the name).
If you wanted to do this synchronously you can do the following:
function loopMusic (directory) {
var returnData = "";
var files = fs.readdirSync(directory);
files.forEach (function (file, index) {
returnData += file;
console.log (returnData);
});
console.log(files);
return returnData;
}
That would return a string of mushed together file paths, as in your question.
However, blocking isn't usually a good idea and you should use the asynchronous version. I like to return a Promise in these situations. Here's an example that returns a promise filled with that string. This technically isn't necessary since the callback could just be used...but lets just pretend.
function loopMusic (directory) {
return new Promise(function(resolve, reject) {
fs.readdir (directory, function (err, files) {
if (err) {
return reject(err);
}
let returnData = "";
files.forEach (function (file, index) {
returnData += file;
});
resolve(returnData);
});
});
}
Usage:
var musicPromise = loopMusic(dir);
musicPromise.then((musicStr) => console.log(musicStr)), (err) => console.log(err));
The asynchronous nature of this makes it a bit hard to follow since things don't happen in order, but when using Promises the then() is used to handle what happens on success (or failure) when it does complete later on.
Finally, if you're using ES2017+ (the newest version of Node) you can use the async/await pattern. Keep in mind my promise example above:
async function loopMusicAsync(directory) {
try{
return await loopMusic(directory); //promise returned
}
catch(error) {
console.log(error); //promise rejected
return null;
}
}

Why does the promise resolve first?

Based on a code snippet found here on stackoverflow, I want to read all files in a directory and then proceed.
I've added a promise, but this somehow doesn't work.
My directory contains 2 files and the console log output is:
promise resolved
inside filenames
inside filenames
inside readFiles
inside readFiles
function readFiles(dirname, onFileContent, onError) {
return new Promise((resolve, reject) => {
fs.readdir(dirname, function(err, filenames) {
filenames.forEach(function(filename) {
console.log('inside filenames');
fs.readFile(dirname + filename, 'utf-8', function(err, content) {
onFileContent(filename, content);
});
});
});
});
}
var data = [];
readFiles('datadir/', function(filename, content) {
console.log('inside readFiles');
data.push(filename);
}).then(
console.log('promise resolved');
//proceed handling the data-array
);
The promise does not "resolve first". The call to console.log executes before your first file is read.
You're never calling resolve on your promise, so the then is never being called. However you're passing the result of console.log to the then. the result of console.log is void.
You can test this by correcting the problem:
readFiles('datadir/', function(filename, content) {
console.log('inside readFiles');
data.push(filename);
}).then(function(){ // NOTE: addition of function(){..}
console.log('promise resolved');
//proceed handling the data-array
});
And you'll notice the message is never written to the console.
So that's whats wrong - how to fix it. It takes some thinking to wrap your head round the totally async/promise based code in node.
I'm assuming that you want to wait for all the files to have the contents read before resolving your promise. This is a little tricky as you have 2 async calls (reading the list of files, and then individually reading their contents). It may well be easier to wrap the reading of the file into its own promise. Something like this:
function readFile(filePath){
return new Promise((resolve,reject) => {
fs.readFile(filePath, "utf-8", function(err,content) => {
if(err) reject(err)
else resolve({path:filePath, content:content})
});
});
}
Do the same for readdir so as to make that also chainable:
function readDirectory(dir){
return new Promise((resolve,reject) => {
fs.readdir(dirname, function(err, filenames) {
if(err) reject(err);
else{
resolve(filenames.map(fn => dir + fn));
}
});
});
}
The reason to do this is that you can then chain on a Promise.all to wait for all the file contents too.
function readFileContents(dirname) {
return readDirectory(dirname)
.then(files =>
Promise.all(files.map(file => readFile(file))
);
}
Usage:
readFileContents('datadir/').then(files => {
files.forEach(file => {
console.log(file.path, file.content.length);
});
});

Conditional async function

I have my function getting an email from Gmail. I want to run this function n times or until an email is found.
What is a proper way to do it? I tried: http://caolan.github.io/async/docs.html#retry but without success.
I was following this article how to read emails: https://developers.google.com/gmail/api/quickstart/nodejs
Assuming you have a routine called gmail, which returns a promise which succeeds (fulfills) if an email is found, and otherwise fails (rejects), then:
function get(n) {
return gmail().catch(() => {
if (!n) throw "Too many tries!";
return get(--n);
};
}
Usage:
get(5).then(
mail => console.log(mail.body),
() => console.log("No mail!"));
If for some reason you do not like the recursive style:
function get(n) {
let promise = Promise.reject();
do { promise = promise.catch(gmail); } while (n--);
return promise;
}
If gmail is callback style, then
function get(n, cb) {
gmail(function(err, data) {
if (err)
if (!n) get(--n, cb);
else cb("Too many tries!");
else cb(null, data);
});
}
Or better yet, promisify gmail, either using a library or
function promisify(fn) {
return new Promise((resolve, reject) {
fn(function(data, err) {
if (err) reject(err);
else resolve(data);
});
});
}
and then replace gmail in the first solution with promisify(gmail).

Promise code are read twice

I use the following code to read json file and return a promise
I've two questions
return globAsync("folder/*.json").catch(function (err) {
throw new Error("Error read: " + err);
}).map(function (file) {
return fs.readFileAsync(file, 'utf8')
.then(function (res) {
console.log("test");
return JSON.parse(res);
},
function (err) {
throw new Error("Error :" + err);
}).then(function () {
console.log("test2");
});
});
I use the console log and I see that the console is printed twice
test
test
test2
test2
why its happening and how to avoid it ?
In the place I've put console.log("test2"); I need to invoke event
that the json parse is finished and still return outside the json object (to the caller), when I add the last then it doesn't work(the returned object is undefined),any idea how to do that right?
UPDATE I try like following which it doesn't work...
return globAsync("folder/*.json").catch(function (err) {
throw new Error("Error read: " + err);
}).map(function (file) {
return fs.readFileAsync(file, 'utf8')
.then(function (res) {
console.log("test");
JSON.parse(res); //data parse
}.catch(function (err) {
throw new Error("Error :" + err);
}
).then(function (data) {
obj.emit('ready');
return data;
}))
});
}
UPDATE2 I was able to solve it by simply add new return JSON.parse(res);
Now how should I solve the first issue which method called twice
Like #jaromandaX said, you probably got two *.json files. Try to print out the file name instead and it should become more obvious. In that case, .map is expected to be called twice, once for each file. Otherwise you aren't gonna be able to read and parse two files together.
If you want to get it to converge to a single point after all file reads and parses are complete, then you need to chain another .then after .map. eg.
return globAsync("folder/*.json")
.map(function(file) {
...
})
.then(function() {
obj.emit('ready');
});
EDIT To answer your question in comment. There are a few things you should keep in mind.
Throwing Error inside the promise chain will get caught by the promise and send it into the rejection flow. You may still throw an error if you are interested in getting custom error type or printing stack trace in a desirable way. But most people prefer return Promise.reject(error).
Any rejection in .map will send the promise chain into rejection flow.
Inside the rejection chain, if you want to continue down the rejection flow. You need to return Promise.reject(error), otherwise if you don't return a reject object, you can bring it back into resolve flow.
If you want to want to handle each error individually, you can do something like this:
return globAsync("folder/*.json")
.catch(function(error) {
// TODO: Handle error
return Promise.reject(error);
})
.map(function(file) {
return fs.readFileAsync(file, 'utf8')
.catch(function(error) {
// TODO: Handle error
return Promise.reject(error);
})
.then(function(res) {
return JSON.parse(res);
});
})
.then(function() {
obj.emit('ready');
});
If you want to handle once for glob and once for file read, then you have to get a bit more creative.
return globAsync("folder/*.json")
.catch(function(error) {
// TODO: Handle error
return Promise.reject(error);
})
.then(function(files) {
return Promise.resolve(files)
.map(function(file) {
return fs.readFileAsync(file, 'utf8');
})
.catch(function(error) {
// TODO: Handle error once for any read error
return Promise.reject(error);
})
.map(function(res) {
// Judging by your original code, you are not handling
// parser error, so I wrote this code to behave equivalent
// to your original. Otherwise chain parse immediate after
// readFileAsync.
return JSON.parse(res);
});
})
.then(function() {
obj.emit('ready');
});

Categories

Resources