s3.getObject().createReadStream() : How to catch the error? - javascript

I am trying to write a program to get a zip file from s3, unzip it, then upload it to S3.
But I found two exceptions that I can not catch.
1. StreamContentLengthMismatch: Stream content length mismatch. Received 980323883 of 5770104761 bytes. This occurs irregularly.
2. NoSuchKey: The specified key does not exist. This happens when I input the wrong key.
When these two exceptions occur, this program crashes.
I'd like to catch and handle these two exceptions correctly.
I want to prevent a crash.
const unzipUpload = () => {
return new Promise((resolve, reject) => {
let rStream = s3.getObject({Bucket: 'bucket', Key: 'hoge/hoge.zip'})
.createReadStream()
.pipe(unzip.Parse())
.on('entry', function (entry) {
if(entry.path.match(/__MACOSX/) == null){
// pause
if(currentFileCount - uploadedFileCount > 10) rStream.pause()
currentFileCount += 1
var fileName = entry.path;
let up = entry.pipe(uploadFromStream(s3,fileName))
up.on('uploaded', e => {
uploadedFileCount += 1
console.log(currentFileCount, uploadedFileCount)
//resume
if(currentFileCount - uploadedFileCount <= 10) rStream.resume()
if(uploadedFileCount === allFileCount) resolve()
entry.autodrain()
}).on('error', e => {
reject()
})
}
}).on('error', e => {
console.log("unzip error")
reject()
}).on('finish', e => {
allFileCount = currentFileCount
})
rStream.on('error', e=> {
console.log(e)
reject(e)
})
})
}
function uploadFromStream(s3,fileName) {
var pass = new stream.PassThrough();
var params = {Bucket: "bucket", Key: "hoge/unzip/" + fileName, Body: pass};
let request = s3.upload(params, function(err, data) {
if(err) pass.emit('error')
if(!err) pass.emit('uploaded')
})
request.on('httpUploadProgress', progress => {
console.log(progress)
})
return pass
}
This is the library I use when unzipping.
https://github.com/mhr3/unzip-stream
Help me!!

If you'd like to catch the NoSuchKey error thrown by createReadStream you have 2 options:
Check if key exists before reading it.
Catch error from stream
First:
s3.getObjectMetadata(key)
.promise()
.then(() => {
// This will not throw error anymore
s3.getObject().createReadStream();
})
.catch(error => {
if (error.statusCode === 404) {
// Catching NoSuchKey
}
});
The only case when you won't catch error if file was deleted in a split second, between parsing response from getObjectMetadata and running createReadStream
Second:
s3.getObject().createReadStream().on('error', error => {
// Catching NoSuchKey & StreamContentLengthMismatch
});
This is a more generic approach and will catch all other errors, like network problems.

You need to listen for the emitted error earlier. Your error handler is only looking for errors during the unzip part.
A simplified version of your script.
s3.getObject(params)
.createReadStream()
.on('error', (e) => {
// handle aws s3 error from createReadStream
})
.pipe(unzip)
.on('data', (data) => {
// retrieve data
})
.on('end', () => {
// stream has ended
})
.on('error', (e) => {
// handle error from unzip
});
This way, you do not need to make an additional call to AWS to find out if out if it exists.

You can listen to events (like error, data, finish) in the stream you are receiving back. Read more on events
function getObjectStream (filePath) {
return s3.getObject({
Bucket: bucket,
Key: filePath
}).createReadStream()
}
let readStream = getObjectStream('/path/to/file.zip')
readStream.on('error', function (error) {
// Handle your error here.
})
Tested for "No Key" error.
it('should not be able to get stream of unavailable object', function (done) {
let filePath = 'file_not_available.zip'
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error instanceof Error).to.equal(true)
expect(error.message).to.equal('The specified key does not exist.')
done()
})
})
Tested for success.
it('should be able to get stream of available object', function (done) {
let filePath = 'test.zip'
let receivedBytes = 0
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error).to.equal(undefined)
})
readStream.on('data', function (data) {
receivedBytes += data.length
})
readStream.on('finish', function () {
expect(receivedBytes).to.equal(3774)
done()
})
})

To prevent a crash, you need to asynchronously listen to the object's head metadata, where it does not return the whole object, which will take less time. Try this one!
isObjectErrorExists = async functions () => {
try {
const s3bucket = {
secret key: '',
client id: ''
}
const params = {
Bucket: 'your bucket name',
Key: 'path to object'
};
await s3bucket.headObject(params).promise(); // adding promise will let you add await to listen to process untill it completes.
return true;
} catch (err) {
return false; // headObject threw error.
}
throw new Error(err.message);
}
}
public yourFunction = async() => {
if (await this.isObjectErrorExists()) {
s3Bucket.getObject().createReadStream(); // works smoothly
}
}

Related

How to stream x-ndjson content using Express and parse the streamed data?

I have a TS library using Node v19.1.0. The library has a function that observes streamed server events.
The server provides a /events route streaming 'application/x-ndjson' content which might be an event/ping/... ( sending a ping every x seconds is important to keep the connection alive )
My observe function parses the streamed data and inspects it. If it is a valid event it will pass it to a callback function. The caller also receives an abort function to abort the streaming on demand.
Whenever I run tests locally or via CI I get the following error
Warning: Test "observes events." generated asynchronous activity after the test ended. This activity created the error "AbortError: The operation was aborted." and would have caused the test to fail, but instead triggered an unhandledRejection event.
I tried to minimize the example code using plain JavaScript
const assert = require('assert/strict');
const express = require('express');
const { it } = require('node:test');
it('observes events.', async () => {
const expectedEvent = { type: 'event', payload: { metadata: { type: 'entity-created', commandId: 'commandId' } } };
const api = express();
const server = api
.use(express.json())
.post('/events', (request, response) => {
response.writeHead(200, {
'content-type': 'application/x-ndjson',
});
const line = JSON.stringify(expectedEvent) + '\n';
response.write(line);
})
.listen(3000);
let stopObserving = () => {
throw new Error('should never happen');
};
const actualEventPayload = await new Promise(async resolve => {
stopObserving = await observeEvents(async newEvent => {
resolve(newEvent);
});
});
stopObserving();
server.closeAllConnections();
server.close();
assert.deepEqual(actualEventPayload, expectedEvent.payload);
});
const observeEvents = async function (onReceivedFn) {
const abortController = new AbortController();
const response = await fetch('http://localhost:3000/events', {
method: 'POST',
headers: { 'content-type': 'application/json' },
signal: abortController.signal,
});
if (!response.ok) {
throw new Error('error handling goes here - request failed');
}
Promise.resolve().then(async () => {
if (!response.body) {
throw new Error('error handling goes here - missing response body');
}
for await (const item of parseStream(response.body, abortController)) {
switch (item.type) {
case 'event': {
await onReceivedFn(item.payload);
break;
}
case 'ping':
// Intentionally left blank
break;
case 'error':
throw new Error('error handling goes here - stream failed');
default:
throw new Error('error handling goes here - should never happen');
}
}
});
return () => { abortController.abort(); };
};
const parseLine = function () {
return new TransformStream({
transform(chunk, controller) {
try {
const data = JSON.parse(chunk);
// ... check if this is a valid line...
controller.enqueue(data);
} catch (error) {
controller.error(error);
}
},
});
};
const splitLines = function () {
let buffer = '';
return new TransformStream({
transform(chunk, controller) {
buffer += chunk;
const lines = buffer.split('\n');
for (let i = 0; i < lines.length - 1; i++) {
controller.enqueue(lines[i]);
}
buffer = lines.at(-1) ?? '';
},
flush(controller) {
if (buffer.length > 0) {
controller.enqueue(buffer);
}
},
});
};
const parseStream = async function* (stream, abortController) {
let streamReader;
try {
const pipedStream = stream
.pipeThrough(new TextDecoderStream())
.pipeThrough(splitLines())
.pipeThrough(parseLine());
streamReader = pipedStream.getReader();
while (true) {
const item = await streamReader.read();
if (item.done) {
break;
}
yield item.value;
}
} finally {
await streamReader?.cancel();
abortController.abort();
}
};
Unfortunately, when running node --test, the test does not finish. I have to cancel it manually.
The test breaks with these lines
const actualEventPayload = await new Promise(async resolve => {
stopObserving = await observeEvents(async newEvent => {
resolve(newEvent);
});
});
and I think that's because the Promise never resolves. I thought the stream parsing might have a bug but if you remove all the stream parsing stuff and replace
Promise.resolve().then(async () => {
/* ... */
});
with
Promise.resolve().then(async () => {
await onReceivedFn({ metadata: { type: 'entity-created', commandId: 'commandId' }});
});
it doesn't work neither. Does someone know what's wrong or missing?
The problem here has nothing to do with your promise not resolving since you never even get to that point.
The problem here is that observeEvents is not yet initialized when the test is being run and thus throws a ReferenceError: Cannot access 'observeEvents' before initialization error.
To see that for yourself you can add a simple const it = (name, fn) => fn(); stub to the top of the file and run it without the --test.
There are multiple ways to fix this and the simplest one is to move the test function to the bottom of the file.
If you don't want to do that you can also define the observeEvents function like this: async function observeEvents(onReceivedFn) {...}. This way it will be available immediately.

Jest mock a function which instantiates a class and its constructor has an error callback, callback not invoked in error case

I need to satisfy SonarQube for the error case in the code below.
When I run the test code though, it will not fail.
index.js
const mod = require('mod')
function get_modclass(data) {
let modclass = new ModClass(data, function(err) {
if (err) {
console.log('ERROR: ' + err);
throw err
}
})
return modclass
}
index.spec.js
describe('Test get_modclass', () => {
jest.resetModules()
jest.clearAllMocks()
test('Should fail get_modclass', () => {
const idx = require('./index.js');
const mod = require('mod')
jest.mock('mod', () => ({
ModClass: jest.fn(() => ({
constructor: (_data, cb) => cb('err'),
}))
}))
try {
let mc = idx.get_modclass("abc")
} catch(e) {
expect(e.message).toEqual('some message')
}
})
})
Rather than cb('err') I have tried sending cb(new Error('err message')) to the constructor, but it will still not fail. When I debug, it goes into the idx.get_modclass function, but from there it goes direct to return modclass instead of to the if of the callback.
Help appreciated.
Thanks!

s3.getObject not working from dev environment

This is my code, which works fine if i run it from my local using local aws account , but it doesn't work from my dev environment. S3.getobject api doesnt get executed and code prints the next log skipping the getobject call :
const unzipFromS3 = (key) => {
return new Promise(async (resolve, reject) => {
log.info("inside unzipfroms3");
var zlib = require('zlib');
// let fileName = _.replace(key, 'Root/', '');
let options = {
'Bucket': config.bucketName,
'Key': "Root/" + key,
}
log.info("Key:", options);
await s3.getObject(options).on('error', error => {
log.error(error) }).promise().then((res) => {
yauzl.fromBuffer(res.body, { lazyEntries: true }, function (err, zipfile) {
log.info("Inside Yauzl")
if (err) throw err;
zipfile.readEntry();
zipfile.on("entry", function (entry) {
if (/\/$/.test(entry.fileName)) {
zipfile.readEntry();
} else {
zipfile.openReadStream(entry, function (err, readStream) {
if (err) throw err;
// readStream.pipe(fs.createWriteStream(`result/${entry.fileName}`));
readStream
.pipe(uploadFromStream(s3));
function uploadFromStream(s3) {
log.info("Inside uploadFromStream")
var pass = new Stream.PassThrough();
let options = {
'Bucket': config.bucketName,
'Key': entry.fileName,
}
var params = { ...options, Body: pass };
s3.upload(params, function (err, data) {
log.error(err, data);
});
return pass;
}
readStream.on("end", function () {
zipfile.readEntry();
});
});
}
});
});
});
});
};
In order to use await, i.e. the promised based version of S3.getObject(), you must add the promise() method to your method call as explained in the Using JavaScript Promises chapter of the AWS SDK developer guide. Moreover, there is also an Using async/await chapter that you can look into.
In your case, the code can be modified to something like:
await s3.getObject(options).promise()
.then((res) => {
yauzl.fromBuffer(/* more code */);
});

Promise chaining causing increased execution time?

I am create a simple NODE-JS function that Converts PDF to Image > Crops Image > Merge Them back with ImageMagick.
and this is the complete code i am using :
var os = require('os');
var fs = require('fs');
var path = require('path');
var gs = require('node-gs');
var sharp = require('sharp');
var areaMap = require('./areaMap');
const { performance } = require('perf_hooks');
var spawn = require('child_process').spawnSync;
var pExcep = 'someException';
var gsPath = 'Ghostscript/gs26';
var src = path.join(os.tmpdir(), '/');
var Files = {
file1: path.join(src, 'out1.jpeg'),
file2: path.join(src, 'out2.jpeg'),
OutImg: path.join(src, 'out.jpeg')
}
var crop = function (s, sFile) {
return new Promise((res, rej) => {
s = areaMap[s];
sharp(Files.OutImg).extract(s)
.toFile(sFile)
.then(()=> res())
.catch((err) => rej(err));
});
};
var getBaseCard = function (s) {
if (RegExp('^([0-9]{8})$').test(s)) { return 'SOMETHINGHERE' } else { return 'inception'; }
//This can be done on client side.
}
var GetCardType = function (base, sInfo) {
return new Promise((res, rej) => {
if (base === 'SOEMTHINGHERE') {
if (sInfo.includes('SOMETHINGHERE2')) {
if (sInfo.includes(pExcep)) {
res('PA_S_')
} else {
res('PA_S2')
}
} else {
res('PA_ST')
}
} else {
res('SA_')
}
})
}
var PdfToText = function (file, pass) {
return new Promise((res, rej) => {
gs()
.batch().safer().nopause().res(2).option('-dDEVICEWIDTHPOINTS=20').option('-dDEVICEHEIGHTPOINTS=20').option('-dFIXEDMEDIA').option('-sPDFPassword=' + pass).device('txtwrite').output('-').input(file).executablePath(gsPath)
.exec((err, stdout, stderr) => {
if (!err) {
res(stdout);
} else {
console.log(stdout);
console.log(err);
console.log(stderr);
}
})
});
}
var getBaseImage = function (file, pass, quality) {
return new Promise((res, rej) => {
gs()
.batch().nopause().safer().res(300 * quality).option('-dTextAlphaBits=4').option('-dGraphicsAlphaBits=4').option('-sPDFPassword=' + pass)
.executablePath(gsPath).device('jpeg').output(Files.OutImg).input(file)
.exec((err, stdout, stderr) => {
if (!err) { res(); } else { rej(stdout) };
})
})
}
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
getBaseImage(file, password, quality) // Convert PDF to Image
.then(() => {
PdfToText(file, password) // Extract Text from pdf
.then((res) => {
GetCardType(getBaseCard(password), res) // finally get PDF Type
.then((ct) => {
// crop image here using Sharp
Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
.then(() => {
// Merge Above two image into one using ImageMagick convert
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve(); // finally resolve
}).catch((err) => reject(err));
}).catch((err) => reject(err))
}).catch((err) => reject(err))
}).catch((err) => reject(err))
})
}
and now these are the problem i am facing:
1. ImageMagick isn't creating the output file.
2. fs.unlinksysnc throws ENOENT: no such file or directory, unlink '/tmp/out1.jpeg'
on average every second execution.
3. Using above code increases execution time.
For Example: getBaseImage should complete in 600ms but it takes 1400 using above code.
About speed in General it (The Complete Function not just getBaseImage) should finish in 1100-1500ms(*) on average but the time taken is ~2500ms.
*1100-1500ms time is achievable by using function chaining but that is hard to read and maintaine for me.
I am going to use this function in Firebase Functions.
How to properly chain these functions ?
EDIT
exports.processCard = function (file, password, quality) {
return new Promise((resolve, reject) => {
console.log(performance.now());
getBaseImage(file, password, quality) //Convert PDF TO IMAGE
.then(() => { return PdfToText(file, password) })
.then((res) => {return GetCardType(getBaseCard(password), res) })
.then((ct) => {
return Promise.all([
crop(ct + 'A_' + quality, Files.file1),
crop(ct + 'B_' + quality, Files.file2)])
})
.then(() => {
spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
.catch((err) => { console.log(err) });
Using above pattern didn't solved my issues here.
There's a good chance this weirdness is caused by using the file system. If I understand it correctly, the fs in cloud functions is in memory, so when you write to it, read from it, and remove from it, you're using more and less os memory. That can get weird if a function is called repeatedly and re uses the loaded module.
One thing to try to keep the state clean for each invocation is to put everything (including the requires) inside the scope of the handler. That way you instantiate everything freshly on each invocation.
Finally, you don't seem to be waiting for the spawned convert command to run, you'll need to wait for it to complete:
const convertProc = spawn('convert', [Files.file1, Files.file2, '+append', 'files/out1.jpg']);
convertProc.on('close', function() {
fs.unlinkSync(Files.OutImg); // Unlink tmp folders
fs.unlinkSync(Files.file1);
fs.unlinkSync(Files.file2);
resolve();
})
convertProc.on('close', function(error) {
reject(error);
});
Then you wait for it to complete before you resolve.

In Node, how do I request JSON from multiple URLs using promises?

Please forgive the fairly case-specific question, though I think the general end goal could be of use to other people.
Goal: Populate a MongoDB with data requested from multiple JSON API URLs.
Short question: So far I've had some success with request-promise, which uses Bluebird:
var rp = require('request-promise');
var options = {
uri: 'http://www.bbc.co.uk/programmes/b006qsq5.json',
headers: {
'User-Agent': 'Request-Promise'
},
json: true
};
rp(options)
.then(function (body) {
// Mongoose allows us query db for existing PID and upsert
var query = {pid: body.programme.pid},
update = {
name: body.programme.title,
pid: body.programme.pid,
desc: body.programme.short_synopsis
},
options = { upsert: true, new: true };
// Find the document
Programme.findOneAndUpdate(query, update, options, function(err, result) {
if (err) return res.send(500, { error: err });
return res.send("succesfully saved");
});
})
.catch(function (err) {
return res.send(err);
})
But how do I loop over an array of URLs, without the program failing if any of the promises are rejected?
Something like this for example, using Bluebird, fails if any of the URLs errors.
const urls = ['http://google.be', 'http://google.uk']
Promise.map(urls, rp)
.map((htmlOnePage, index) => {
return htmlOnePage;
})
.then(console.log)
.catch((e) => console.log('We encountered an error' + e));
As I want to write to the DB with successful requests, and ignore those that might not be responding right then, I need something that skips over rejected promises, which .all does not do.
Long question:
I've been reading up about promises all day and it's making my head hurt! But I've found some good resources, such as https://pouchdb.com/2015/05/18/we-have-a-problem-with-promises.html, which mentions the use of a Promise factory. Would this work for my case? I initially thought I should make each request, process the result and add it to the DB, then move on to the next request; but having seen .all I thought I should do all the requests, save the results in an array and loop over that with my DB saving function.
Should I even be using Promises for this? Maybe I should just make use of something like async.js and run my requests in series.
Thanks very much for any help or ideas.
But how do I loop over an array of URLs, without the program failing if any of the promises are rejected?
if you return a value from .catch other than a rejected promise, you will return a resolved promise
So, your .then for each individual request could return an object like
{
success: true,
result: whateverTheResultIs
}
and your catch returns
{
success: false,
error: whateverTheErrorIs
}
Really you don't NEED the success property, it's a convenience though
So the code would be - assuming process(url) returns a Promise
Promise.map(urls, url =>
process(url)
.then(result => ({result, success:true}))
.catch(error => ({error, success:false}))
)
.then(results => {
let succeeded = results.filter(result => result.success).map(result => result.result);
let failed = results.filter(result => !result.success).map(result => result.error);
});
Or, in ES5
Promise.map(urls, function (url) {
return process(url).then(function (result) {
return { result: result, success: true };
}).catch(function (error) {
return { error: error, success: false };
});
}).then(function (results) {
var succeeded = results.filter(function (result) {
return result.success;
}).map(function (result) {
return result.result;
});
var failed = results.filter(function (result) {
return !result.success;
}).map(function (result) {
return result.error;
});
});
I don't know if this fit your case, but I think You can use a counter to check when all promises has returned, regardless of the fact that each one has been resolved or rejected
var heroes = [
'Superman',
'Batman',
'Spiderman',
'Capitan America',
'Ironman',
];
function getHero(hero) {
return new Promise((resolve, reject) => {
setTimeout(() => {
return Math.round(Math.random()) ? resolve(hero + ' lives') : reject(hero + ' dead');
}, Math.random() * 3000)
})
}
function checkHeroes() {
var checked = heroes.length;
heroes.forEach((hero) => {
getHero(hero)
.then((res) => {
checked --;
console.log(res);
if (!checked) done();
})
.catch((err) => {
checked --;
console.log(err);
if (!checked) done();
});
})
}
function done() {
console.log('All heroes checked');
}
checkHeroes();
I think your issue is less about the bluebird api than structuring your promise chain.
const reducePropsToRequests = (props) => Promise.resolve(Object
.keys(props)
.reduce((acc, key) => {
acc[key] = request(sources[key]);
return acc;
}, {}));
const hashToCollection = (hash) => Promise.resolve(Object
.keys(hash)
.reduce((acc, k) => {
return [...acc, {source: k, data: hash[k]}];
}, []));
const fetchFromSources = (sources) => Promise.props(sources);
const findSeveralAndUpdate = (results) => Promise
.each(results.map(obj => {
// you have access to original {a: 'site.com'}
// here, so use that 'a' prop to your advantage by abstracting out
// your db config somewhere outside your service
return Programme.findOneAndUpdate(someConfig[obj.source], obj.data);
}))
const requestFromSeveralAndUpdate = (sources) => reducePropsToRequests(sources)
.then(fetchFromSources)
.then(hashToCollection)
.then(findSeveralAndUpdate)
.catch(/* some err handler */);
requestFromSeveralAndUpdate({ a: 'site.com', b: 'site.net' });
I'd just use request and write my own promise with try catch inside that only resolves. Pseudo example below
var request = require('request')
var urls = ['http://sample1.com/json', 'http://sample2.com/json']
var processUrl = (url) => {
return new Promise((resolve,reject)=> {
var result;
try {
var myRequest = {
uri: url,
method: 'GET',
header: {...}
};
request(option, (res,body,err)=> {
if(err) {
result = err;
return;
}
result = body;
})
}
catch(e) {
result = e;
}
finally {
resolve(result)
}
})
}

Categories

Resources