Nodejs and Express, use res.send() from a worker thread - javascript

In Nodejs, using Express as a server, I offload a heavy computation onto a worker thread.
From the main application, I call the worker thread like this:
// file: main.js
const { Worker } = require("worker_threads");
function runService(fileName, workerData) {
return new Promise((resolve, reject) => {
const worker = new Worker(fileName, { workerData });
worker.on("message", resolve);
worker.on("error", reject);
worker.on("exit", code => {
if (code !== 0)
reject(new Error(`Worker stopped with exit code ${code}`));
});
});
}
router.get("/some_url", async function(req, res) {
const result = await runService(
"./path/to/worker.js",
{ query: req.query, user: req.user } // using { req } causes an error
);
});
The worker looks like this:
// file: worker.js
const { workerData, parentPort } = require('worker_threads');
const { query, user } = workerData;
async function run() {
const result = await generateLotsOfData(query, user);
parentPort.postMessage(result);
// What I would like to do here (doesn't work): res.send(result);
}
The worker generates a huge amount of data and "postMessage" causes a server error.
Is there a way to send this data from the worker thread directly to the client, using res.send() or something alike?
(instead of using postMessage and then sending from the main thread)?

It seems not possible to send directly from the worker-tread to the client.
In the end, I used child-process (instead of worker-treads), and send the result back to the main tread, and then to the client.
// file: main.js
var child_process = require('child_process');
// Run a worker thread
function runService(fileName, workerData) {
return new Promise((resolve, reject) => {
const worker = child_process.fork(fileName);
worker.on('message', function(message) {
if (message.ready) {
// worker is ready to receive data
worker.send(workerData);
} else {
// worker finished it's calculations, send the data to the client
resolve(message);
}
});
worker.on("error", function(x) {
console.log('error', x);
resolve({status: 500});
});
worker.on("close", function(y) {
console.log('close', y);
});
worker.on("exit", function(code) {
if (code == 0) {
console.log('report ran successfully');
} else {
console.log('report FAILED');
resolve({status: 500});
}
});
});
}
And in the worker:
process.on('message', run);
process.send({ready: true});
async function run(workerData) {
try {
const result = doSomeCalculations();
process.send({
data: JSON.stringify(result)
}, null, {}, function() {process.exit(0)});
} catch(err) {
console.error(err);
process.exit(1); // exit the process with a failure
}
}
// Make sure that this child_process doesn't accidentally stay in memory.
// Kill after 10 minutes. (not sure this is necessary, just to be sure)
setTimeout(function(){
console.log('Timeout in child-process');
process.exit(1);
}, 600000);
This actually works quite well.

This sample of code works :
create worker
create event handler
sendMessage --> wrapperWorkerThreadBigComputing
sendBack message on our main thread
emit server response
Api routes :
app.get('/bigComputingWithWorker', (req, res) => {
const worker = new Worker(`${__dirname}/src/wrapperWorkerThreadBigComputing.js`);
res.set('Content-Type', 'text/html');
worker.once("message", count => {
res.status(200).send(`The final count :${count}`);
});
worker.once("error", err => {
console.error(err);
res.status(400).send(`error worker thread`);
});
worker.postMessage({coucou : 'john'});
});
wrapperWorkerThreadBigComputing
const { parentPort } = require('node:worker_threads');
console.log("* worker created");
parentPort.once('message', (message) => {
let big = bigComputing();
parentPort.postMessage(big);
})
function bigComputing() {
let i = 0;
console.log("* start big computing")
for (i = 0; i < 300000000; i++) {
}
return i;
}

Related

How to stream x-ndjson content using Express and parse the streamed data?

I have a TS library using Node v19.1.0. The library has a function that observes streamed server events.
The server provides a /events route streaming 'application/x-ndjson' content which might be an event/ping/... ( sending a ping every x seconds is important to keep the connection alive )
My observe function parses the streamed data and inspects it. If it is a valid event it will pass it to a callback function. The caller also receives an abort function to abort the streaming on demand.
Whenever I run tests locally or via CI I get the following error
Warning: Test "observes events." generated asynchronous activity after the test ended. This activity created the error "AbortError: The operation was aborted." and would have caused the test to fail, but instead triggered an unhandledRejection event.
I tried to minimize the example code using plain JavaScript
const assert = require('assert/strict');
const express = require('express');
const { it } = require('node:test');
it('observes events.', async () => {
const expectedEvent = { type: 'event', payload: { metadata: { type: 'entity-created', commandId: 'commandId' } } };
const api = express();
const server = api
.use(express.json())
.post('/events', (request, response) => {
response.writeHead(200, {
'content-type': 'application/x-ndjson',
});
const line = JSON.stringify(expectedEvent) + '\n';
response.write(line);
})
.listen(3000);
let stopObserving = () => {
throw new Error('should never happen');
};
const actualEventPayload = await new Promise(async resolve => {
stopObserving = await observeEvents(async newEvent => {
resolve(newEvent);
});
});
stopObserving();
server.closeAllConnections();
server.close();
assert.deepEqual(actualEventPayload, expectedEvent.payload);
});
const observeEvents = async function (onReceivedFn) {
const abortController = new AbortController();
const response = await fetch('http://localhost:3000/events', {
method: 'POST',
headers: { 'content-type': 'application/json' },
signal: abortController.signal,
});
if (!response.ok) {
throw new Error('error handling goes here - request failed');
}
Promise.resolve().then(async () => {
if (!response.body) {
throw new Error('error handling goes here - missing response body');
}
for await (const item of parseStream(response.body, abortController)) {
switch (item.type) {
case 'event': {
await onReceivedFn(item.payload);
break;
}
case 'ping':
// Intentionally left blank
break;
case 'error':
throw new Error('error handling goes here - stream failed');
default:
throw new Error('error handling goes here - should never happen');
}
}
});
return () => { abortController.abort(); };
};
const parseLine = function () {
return new TransformStream({
transform(chunk, controller) {
try {
const data = JSON.parse(chunk);
// ... check if this is a valid line...
controller.enqueue(data);
} catch (error) {
controller.error(error);
}
},
});
};
const splitLines = function () {
let buffer = '';
return new TransformStream({
transform(chunk, controller) {
buffer += chunk;
const lines = buffer.split('\n');
for (let i = 0; i < lines.length - 1; i++) {
controller.enqueue(lines[i]);
}
buffer = lines.at(-1) ?? '';
},
flush(controller) {
if (buffer.length > 0) {
controller.enqueue(buffer);
}
},
});
};
const parseStream = async function* (stream, abortController) {
let streamReader;
try {
const pipedStream = stream
.pipeThrough(new TextDecoderStream())
.pipeThrough(splitLines())
.pipeThrough(parseLine());
streamReader = pipedStream.getReader();
while (true) {
const item = await streamReader.read();
if (item.done) {
break;
}
yield item.value;
}
} finally {
await streamReader?.cancel();
abortController.abort();
}
};
Unfortunately, when running node --test, the test does not finish. I have to cancel it manually.
The test breaks with these lines
const actualEventPayload = await new Promise(async resolve => {
stopObserving = await observeEvents(async newEvent => {
resolve(newEvent);
});
});
and I think that's because the Promise never resolves. I thought the stream parsing might have a bug but if you remove all the stream parsing stuff and replace
Promise.resolve().then(async () => {
/* ... */
});
with
Promise.resolve().then(async () => {
await onReceivedFn({ metadata: { type: 'entity-created', commandId: 'commandId' }});
});
it doesn't work neither. Does someone know what's wrong or missing?
The problem here has nothing to do with your promise not resolving since you never even get to that point.
The problem here is that observeEvents is not yet initialized when the test is being run and thus throws a ReferenceError: Cannot access 'observeEvents' before initialization error.
To see that for yourself you can add a simple const it = (name, fn) => fn(); stub to the top of the file and run it without the --test.
There are multiple ways to fix this and the simplest one is to move the test function to the bottom of the file.
If you don't want to do that you can also define the observeEvents function like this: async function observeEvents(onReceivedFn) {...}. This way it will be available immediately.

How to download a file through FTP with a firebase function?

QUESTION:
Unfortunately, my function finishes execution within seconds instead of executing in full. This is apparently due to the fact that listeners are declared to stream the data: they are not promises I can await to my knowledge.
How may I have my firebase function execute in full ?
CODE:
exports.fifteenMinutesData = functions
.runWith(runtimeOpts)
.pubsub
.schedule('*/15 * * * *')
.timeZone('Etc/UTC')
.onRun((context) => {
return (async() => {
try {
const Client = require('ftp');
const c = new Client();
c.connect({
host: "...",
user: "..."
});
c.on('ready', async function () {
c.get('text.txt', async function (err, stream) {
if (err)
throw err;
var content = '';
stream.on('data', function (chunk) {
content += chunk.toString();
});
stream.on('end', function () {
(async () => {
try {
let data = content;
//etc....
}
catch(err) {
console.log("ERR: "+err);
}
})()
})
})
})
}
catch(err) {
console.log("ERR: "+err)
}
})()
});
You will need to promisify the result so the module is aware the value is asynchronous. Currently, your callback is not informing the module of anything so the execution exits immediately, you will want a format like
exports.fifteenMinutesData = functions
.runWith(runtimeOpts)
.pubsub
.schedule('*/15 * * * *')
.timeZone('Etc/UTC')
.onRun((context) => new Promise((resolve, reject) =>
{
});
Where you call resolve(data); for the success path and reject(err); for all error execution paths.

How to use promise in JS and Node JS

I have connected my sql server database to my nodejs application like this :
DAO.js
const sql = require('mssql')
class DAO {
constructor() {
this.sqlConfig = {user: 'connexionMartin', password: 'InfoMartin', server: '192.168.102.232\\SQLEXPRESS', database: 'PROFACE'}
}
async connect() {
try {
console.log("Connecting database.....");
let pool = await sql.connect(this.sqlConfig);
if (pool)
console.log("Database connected");
} catch (err) {
console.log(err);
}
}
async getDataLastHour() {
try {
let result = await sql.query('SELECT * FROM PROFACE.dbo.SuiviProduction WHERE Time_Stamp >= DATEADD(DAY,DATEDIFF(DAY,0,GETDATE()),0) AND DATEPART(HOUR,Time_Stamp) = DATEPART(HOUR,GETDATE())-1');
console.dir(result);
} catch (err) {
console.log(err);
}
}
}
app.js
const Server = require('./server/Server');
const DAO = require('./server/DAO');
const express = require('express');
const server = new Server();
const dao = new DAO();
server.start();
dao.connect();
Now I want to request my database using dao.getDataLastHour() in app.js, but the function is executed before application is connected to database. I have tried to fix this problem by using promise, like this :
const promise = dao.connect();
promise.then(dao.getDataLastHour());
But it doesn't seem to work.
Perhaps I don't use Promise correctly.
To use a then in you function,it need to return any result and turn it in a promise, or not use await, might it will work!
async connect() {
console.log("Connecting database.....");
sql.connect(this.sqlConfig).then(pool => {
if (pool)
console.log("Database connected");
}).catch (err{
console.log(err);
});
}
sorry for the bad identification!
Your method : dao.connect() does not return a promise.
So the first thing would be to change that method to return a promise which you can then listen to then decide whether to run a query or not. :
...
connect() {
return new Promise((resolve, reject) => {
try {
console.log("Connecting database.....");
let pool = sql.connect(this.sqlConfig);
if (pool)
console.log("Database connected");
resolve("Success");
} catch (err) {
console.log(err);
reject(err);
}
});
}
...
And then call your connect method like this:
dao.connect().then(
success => { ... }, // you can continue querying
error => { ... } // something went wrong
);
Also try to read a bit about how to use promises here : https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
Differences between promise and async: What is the difference between JavaScript promises and async await?
You can try something like this:
connect() {
return new Promise((resolve, reject) => {
let pool = sql.connect(this.sqlConfig, function(err) {
if(!err) {
resolve();
}
else {
reject();
}
});
});
}
dao.connect().then(<<call function here>>)

I called then() on a TypeScript promise but it is still pending. Why is this? How can I get it to resolve?

Here is the index.ts script I am running (based on something I found on reddit):
const path = require("path");
const sql = require("mssql");
const config = require(path.resolve("./config.json"));
let db1;
const connect = () => {
return new Promise((resolve, reject) => {
db1 = new sql.ConnectionPool(config.db, err => {
if (err) {
console.error("Connection failed.", err);
reject(err);
} else {
console.log("Database pool #1 connected.");
resolve();
}
});
});
};
const selectProjects = async (name) => {
const query = `
select * from [Time].ProjectData where [Name] like concat('%', concat(#name, '%'))`;
const request = new sql.Request(db1);
const result = await request
.input("name", name)
.query(query);
return result.recordset;
};
module.exports = {
connect,
selectProjects
};
connect().then(function() {
console.log(selectProjects('General'));
}).catch(function(err) {
console.log(err);
});
When I run the script using node index (after compiling it of course), I get this in the console:
Database pool #1 connected.
Promise { <pending> }
And then the script hangs.
Apparently the await keyword creates an implicit promise; I had to change the last function call to:
connect().then(function() {
selectProjects('General').then(function(data) {
console.log(data);
});
}).catch(function(err) {
console.log(err);
});

s3.getObject().createReadStream() : How to catch the error?

I am trying to write a program to get a zip file from s3, unzip it, then upload it to S3.
But I found two exceptions that I can not catch.
1. StreamContentLengthMismatch: Stream content length mismatch. Received 980323883 of 5770104761 bytes. This occurs irregularly.
2. NoSuchKey: The specified key does not exist. This happens when I input the wrong key.
When these two exceptions occur, this program crashes.
I'd like to catch and handle these two exceptions correctly.
I want to prevent a crash.
const unzipUpload = () => {
return new Promise((resolve, reject) => {
let rStream = s3.getObject({Bucket: 'bucket', Key: 'hoge/hoge.zip'})
.createReadStream()
.pipe(unzip.Parse())
.on('entry', function (entry) {
if(entry.path.match(/__MACOSX/) == null){
// pause
if(currentFileCount - uploadedFileCount > 10) rStream.pause()
currentFileCount += 1
var fileName = entry.path;
let up = entry.pipe(uploadFromStream(s3,fileName))
up.on('uploaded', e => {
uploadedFileCount += 1
console.log(currentFileCount, uploadedFileCount)
//resume
if(currentFileCount - uploadedFileCount <= 10) rStream.resume()
if(uploadedFileCount === allFileCount) resolve()
entry.autodrain()
}).on('error', e => {
reject()
})
}
}).on('error', e => {
console.log("unzip error")
reject()
}).on('finish', e => {
allFileCount = currentFileCount
})
rStream.on('error', e=> {
console.log(e)
reject(e)
})
})
}
function uploadFromStream(s3,fileName) {
var pass = new stream.PassThrough();
var params = {Bucket: "bucket", Key: "hoge/unzip/" + fileName, Body: pass};
let request = s3.upload(params, function(err, data) {
if(err) pass.emit('error')
if(!err) pass.emit('uploaded')
})
request.on('httpUploadProgress', progress => {
console.log(progress)
})
return pass
}
This is the library I use when unzipping.
https://github.com/mhr3/unzip-stream
Help me!!
If you'd like to catch the NoSuchKey error thrown by createReadStream you have 2 options:
Check if key exists before reading it.
Catch error from stream
First:
s3.getObjectMetadata(key)
.promise()
.then(() => {
// This will not throw error anymore
s3.getObject().createReadStream();
})
.catch(error => {
if (error.statusCode === 404) {
// Catching NoSuchKey
}
});
The only case when you won't catch error if file was deleted in a split second, between parsing response from getObjectMetadata and running createReadStream
Second:
s3.getObject().createReadStream().on('error', error => {
// Catching NoSuchKey & StreamContentLengthMismatch
});
This is a more generic approach and will catch all other errors, like network problems.
You need to listen for the emitted error earlier. Your error handler is only looking for errors during the unzip part.
A simplified version of your script.
s3.getObject(params)
.createReadStream()
.on('error', (e) => {
// handle aws s3 error from createReadStream
})
.pipe(unzip)
.on('data', (data) => {
// retrieve data
})
.on('end', () => {
// stream has ended
})
.on('error', (e) => {
// handle error from unzip
});
This way, you do not need to make an additional call to AWS to find out if out if it exists.
You can listen to events (like error, data, finish) in the stream you are receiving back. Read more on events
function getObjectStream (filePath) {
return s3.getObject({
Bucket: bucket,
Key: filePath
}).createReadStream()
}
let readStream = getObjectStream('/path/to/file.zip')
readStream.on('error', function (error) {
// Handle your error here.
})
Tested for "No Key" error.
it('should not be able to get stream of unavailable object', function (done) {
let filePath = 'file_not_available.zip'
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error instanceof Error).to.equal(true)
expect(error.message).to.equal('The specified key does not exist.')
done()
})
})
Tested for success.
it('should be able to get stream of available object', function (done) {
let filePath = 'test.zip'
let receivedBytes = 0
let readStream = s3.getObjectStream(filePath)
readStream.on('error', function (error) {
expect(error).to.equal(undefined)
})
readStream.on('data', function (data) {
receivedBytes += data.length
})
readStream.on('finish', function () {
expect(receivedBytes).to.equal(3774)
done()
})
})
To prevent a crash, you need to asynchronously listen to the object's head metadata, where it does not return the whole object, which will take less time. Try this one!
isObjectErrorExists = async functions () => {
try {
const s3bucket = {
secret key: '',
client id: ''
}
const params = {
Bucket: 'your bucket name',
Key: 'path to object'
};
await s3bucket.headObject(params).promise(); // adding promise will let you add await to listen to process untill it completes.
return true;
} catch (err) {
return false; // headObject threw error.
}
throw new Error(err.message);
}
}
public yourFunction = async() => {
if (await this.isObjectErrorExists()) {
s3Bucket.getObject().createReadStream(); // works smoothly
}
}

Categories

Resources