I have a lambda function written in javascript (node) that takes a large payload, breaks it down, hits multiple APIs, builds the response, and returns.
The gist of the code looks like so:
'use strict';
let API_KEY = '';
exports.createObjects = (event, context, callback) => {
let promises = createObject(event);
Promise.all(promises).then(results => {
let res = [];
results.forEach((r) => {
res.push(r.obj);
});
callback(null, {"message" : res});
})
.catch((e) =>{
console.log('Error Creating obj ', e);
});
};
const createObject = (req) => {
let objects = req.body.obj;
let promiseReturn = [];
if(req.headers.Authorization === undefined)
return promiseReturn;
API_KEY = req.headers.Authorization;
objects.forEach((s) => {
let newPromise = create(s);
promiseReturn.push(newPromise);
});
return promiseReturn;
};
const create = (object) => {
return new Promise((resolve, reject) => {
let chain = createObject(s);
chain
.then((res)=>{
return updateObject(s);
})
.then((res)=>{
return createAttribute1(object.x, object.y);
})
.then((res)=>{
return createAttribute2(object.z, object.q);
})
.then((res)=>{
return createAttribute3(object.s, object.t);
})
.then((res)=>{
return createAttribute4(object.v, object.c);
})
.then((finalResponse)=>{
resolve(finalResponse);
})
.catch((e)=>{
return rollBack(object);
})
});
};
I'm having problems dealing with concurrent request. For example if I hit the lambda function with a 10 mb payload it processes just fine, but If 3 users hit the lambda function with 10mb payloads at the same time i see the error "Endpoint request timed out".
What is the best way to "parallelize" this and move it out of the web server with lambda? I'm not sure how to approach this... I have increased the response time from 3 seconds to the maximum of 30 but this only buys a little time and doesn't fix the problem.
Related
I have to use the code below several times through out my project. I have therefore decided to abstract it into a utils file. Therefore when ever I need it all I have to do is call consumer("topic"). How ever for some reason it doesn't allow me to re-use the function and it just keeps the info from the previous call.
var async = require('async')
var kafka = require('kafka-node'),
Consumer = kafka.Consumer,
client = new kafka.KafkaClient({ kafkaHost: 'kafka:9092' }),
offset = new kafka.Offset(client)
let messages = []
let latestOffset
let consumerInstance
let consumer = async (topic) => {
messages = []
latestOffset = 0
consumerInstance = ""
offset.fetch([{ topic: topic, partition: 0, time: -1 }], (errd, data) => {
if (data) {
latestOffset = data[topic][0][0];
}
});
consumerInstance = new Consumer(client, [{ topic: topic, partition: 0, fromOffset: latestOffset }], { autoCommit: true });
let KafkaConsumer = new Promise(function (resolve, reject) {
consumerInstance.on('message', async (message) => {
consumerInstance.pause()
await q.push(message, function (err) {
if (err) { reject(err); return }
else {
consumerInstance.resume()
}
})
if (message.offset == (message.highWaterOffset - 1)) {
resolve()
}
})
consumerInstance.on('error', async (err) => {
reject(err)
})
})
await KafkaConsumer
await q.drain()
return messages
}
var q = async.queue(async (message, cb) => {
await processTestCase(message, cb)
}, 1)
q.drain(async function () {
// Pause the consumer
consumerInstance.close(true, function (err, message) {
});
});
processTestCase = async (message, cb) => {
messages.push(JSON.parse(message.value))
cb()
}
module.exports = { consumer }
The main issue is that unless I use a let in front of the consumerInstance I cant seem to reuse the function, how ever when I do that, I cant close the consumer in the q.drain
Any clues or pointers are very welcomed. TIA!
You may be seeing repeat messages because your messages array is in the same scope as your export. That means if you call consumer('topic') concurrently at any point, those calls would be sharing the same reference to messages. That means setting messages = [] from one consumer call would affect the other messages of the other consumer calls as well. You do not want that.
Here is my refactor. I swapped out async for a Promise library I created. I tried to do everything you wanted. One thing I am unsure about is message.highWaterOffset. There is more work to be done if you run across a memory leak there.
var { pipe, get } = require('rubico')
var kafka = require('kafka-node'),
Consumer = kafka.Consumer
// safely accesses properties with get
const safeParseTopic = (topic, data) => get([topic, 0, 0])(data)
// changed this to return a Promise
const fetchLatestOffset = client => topic => new Promise((resolve, reject) => {
new kafka.Offset(client).fetch(
[{ topic: topic, partition: 0, time: -1 }],
(err, data) => err ? reject(err) : resolve(safeParseTopic(topic, data)),
)
})
// it's recommend to create new client for different consumers
// https://www.npmjs.com/package/kafka-node#consumer
const makeConsumerInstance = client => ({ topic, offset }) => new Consumer(
client,
[{ topic, offset, partition: 0 }],
{ autoCommit: true },
)
// this is the function version of KafkaConsumer from your example
// consume(consumerInstance) == KafkaConsumer
const consume = consumerInstance => new Promise((resolve, reject) => {
const messages = []
consumerInstance.on('message', message => {
messages.push(message)
// you don't need to pause and resume
// you are gauranteed one message at a time in this block
if (message.offset == (message.highWaterOffset - 1)) {
resolve(messages)
// you have to handle cleanup of consumerInstance after the resolve
// If there's a memory leak, I would look here
}
})
// handles a termination signal from the producer
consumerInstance.on('end', () => resolve(messages))
consumerInstance.on('error', reject)
})
// topic -> messages
// pipe chains async functions together
const consumer = topic => {
const client = new kafka.KafkaClient({ kafkaHost: 'kafka:9092' })
return pipe([
fetchLatestOffset(client), // topic -> latestOffset
latestOffset => ({
topic: topic,
offset: latestOffset,
}), // latestOffset -> ({ topic, offset })
makeConsumerInstance(client), // ({ topic, offset }) -> consumerInstance
consume, // consumerInstance -> messages
])(topic)
}
module.exports = { consumer }
I have a 2 identical Firebase functions that batch write data to Firestore. One is wrapped in a scheduled/onRun trigger, and the other is a HTTP onRequest trigger.
Both functions work fine and throw no errors.
They have the same amount of memory and timeout as well.
When invoking the http trigger, the function runs through and completes in about 30 seconds.
When invoking the scheduled onRun trigger, the function takes 5+ minutes to complete.
Is there something different about the runtimes that is not documented or something?
Edit: It works now - I made processMentions await totalMentions and return null.
processMentions does not have to return a promise, only a value because the actual scheduledPull/onRun function is returning the processMentions async function, which resolves the promise by returning a value.
Cheers for the help #dougstevenson
Triggers:
/**
* Get manual mentions
*/
exports.get = functions.https.onRequest((req, res) => {
const topic = 'topic'
const query = 'queryString'
processMentions(res, query, topic)
})
/**
* Get schedule mentions
*/
exports.scheduledPull = functions.pubsub.schedule('every day 1:00').onRun((context) => {
const topic = 'topic'
const query = 'queryString'
return processMentions('sched', query, topic)
})
Logic:
const functions = require('firebase-functions')
const admin = require('firebase-admin')
admin.initializeApp()
const db = admin.firestore()
const axios = require('axios')
const moment = require('moment')
// Globals
const auth = 'token'
const url = 'https://apiurl.com/'
async function totalMentions(nextPage, start, end, query) {
try {
let config = {
headers: {
Authorization: auth,
Accept: 'text/html',
}
}
const response = await axios.get(url, config)
const total = response.data.results.total
const loops = Math.ceil(total / 500)
return loops
} catch (error) {
console.log('error 1', error)
}
}
async function allMentions(nextPage, start, end, query) {
try {
let config = {
headers: {
Authorization: auth,
Accept: 'text/html',
},
}
const response = await axios.get(url, config)
return response
} catch (error) {
console.log('error 2', error)
}
}
async function saveData(response, end, topic) {
try {
let data = await response.data.results.clips
let batch = db.batch()
data.forEach((c) => {
delete c.localTime
let reff = db.collection(collection).doc(date).collection(collection).doc(c.id.toString())
batch.set(reff, c)
})
let batches = await batch.commit()
return batches
} catch (error) {
console.log('error3 ', error)
}
}
async function processMentions(res, query, topic) {
try {
totalMentions(1, start, end, query)
.then(async (loops) => {
let endbatch = 0
for (let i = 1; i <= loops; i++) {
await allMentions(i, start, end, query)
.then(async (response) => {
await saveData(response, end, topic)
return ++endbatch
})
.catch((err) => {
console.log('error 4 ' + err)
})
if (endbatch === loops) {
if (res !== 'sched') {
console.log('http trigger finished')
return res.status(200).end()
} else {
return console.log('schedule finished')
}
}
}
})
.catch((err) => {
console.log('error5 ' + err)
})
} catch (error) {
console.log('error6 ' + error)
}
}
For the pubsub trigger to work correctly, processMentions needs to return a promise that resovles when all of the async work is complete. Right now, it's returning nothing, which (since it's declared async) translates into a promise that's resolved immediately with no value. Calling then/catch on a promise isn't doing what you expect - you need to return a promise chain from your async work.
I'm not sure why you have it declared async, without also using await inside of it to manage the promises much more easily.
I have to do a functionality to test if 3 APIs are running.
Thus, the user will click on the Test APIs button and it will return the status of each API (status: 200, 500, 404 etc). If an API return an error, I should show the error stack.
Screen example:
API Status Detail
url1.com 200 -
url2.com 200 -
url3.com 500 internal server error
My question is, how can I call the 3 requests in parallel and return the async result, I mean how can I update the screen of API request status without having to wait for the result of all requests
I was basing on that How do I call three requests in order?, but it returns the result synchronously.
*******EDIT*****
Thats my current code
app.get('/testDependencies', function (req, res, next) {
let objTestsResul = {}
var urls = ['url1', 'url2', 'url3'];
let index = 0
while(urls.length > 0) {
let url = urls.shift();
objTestsResult[index++] = testURL(url)
}
res.send(objTestsResult)
});
This function is the same for each URL:
function testURL(URL){
fetch(URL, {
method: 'GET'
})
.then(res => {
res.json()
})
.then(json => {
console.log(json)
return json
})
.catch(error => {
return error
})
}
Promises (mdn) seem to be what you're looking for. They're essentially a more readable version of callbacks, which allow you to execute code when something else occurs rather than having to wait for that trigger to occur before resuming execution.
let endpoint1 = () => new Promise(resolve => setTimeout(() => resolve('200'), 1000));
let endpoint2 = () => new Promise(resolve => setTimeout(() => resolve('201'), 2000));
let endpoint3 = () => new Promise(resolve => setTimeout(() => resolve('500'), 1500));
document.getElementById('test').addEventListener('click', () => {
document.getElementById('status').textContent = 'test running...';
Promise.all([
endpoint1().then(a => document.getElementById('result1').textContent = a),
endpoint2().then(a => document.getElementById('result2').textContent = a),
endpoint3().then(a => document.getElementById('result3').textContent = a),
]).then(() => document.getElementById('status').textContent = 'test complete');
});
<button id="test">test</button>
<div>status: <span id="status">not running</span></div>
<div>endpoint 1: <span id="result1"></span></div>
<div>endpoint 2: <span id="result2"></span></div>
<div>endpoint 3: <span id="result3"></span></div>
This is actually pretty straightforward if you can use Bluebird:
const { Promise } = require('bluebird');
app.get('/testDependencies', function (req, res, next) {
Promise.map(['url1', 'url2', 'url3'], url => testURL(url)).then(results => {
res.send(results);
});
});
You'll just need to ensure your promise function actually returns a promise:
function testURL(URL) {
let start_time = new Date().getTime();
return fetch(URL, {
method: 'GET'
}).then(res => {
res.json()
}).then(json => {
console.log(json)
return json
}).catch(error => {
return error
})
}
Promises can't be dependency chained unless you explicitly return them from the function that's involved in chaining.
If you're able to use async and await, I'd also recommend doing that as well as that can vastly simplify otherwise complex code.
Express can't send multiple responses. You will have to finish all calls or use WebSockets to stream data.
function testURL(URL) {
return new Promise((resolve, reject) => {
if (URL === 'url2') {
reject(new Error('Internal Server Error'));
return;
}
resolve({ status: 200 });
});
}
const main = async () => {
const urls = ['url1', 'url2', 'url3'];
// return resolved and rejected Promises because if one fails in Promise.all
// the function will throw and we won't have any access to any resolved Promises.
const results = await Promise.all(urls
.map(url => testURL(url).then(response => response).catch(error => error)));
// every error have a stack property, Set the status to whatever you want
// based on the error and store the stack and the message
const objTestsResul = results.reduce((result, cur, i) => {
result[urls[i]] = cur.stack
? { status: 500, message: cur.message, stack: cur.stack }
: cur;
return result;
}, {});
console.log(objTestsResul);
};
main();
I Have a array of objects which i need to clone with different values.
Those values i'll get from each promise finally after preparing main modified array of object i'll have to save this. So i many need this as one single promise.
I not sure how to do it.
Here is the example we need to clone oldUser data. Say old user has credit score = 100; but for new user default credit will be created randomly by system.
For each user in the array of users few details has to get updated using async call.
This is the requirement
function getUserCreditScore(user){
var url = '/someurl';
return $http.get(url).then(function(res){
user.creditScore = (res.data) ? res.data : 0;
});
}
function getUserRecomandations(user){
var url = '/someurl';
return $http.get(url).then(function(res){
user.recommendation = (res.data) ? res.data : 'basic recommendation';
});
}
function getUserHelpInfo(user){
var url = '/someurl';
return $http.get(url).then(function(res){
user.helpInfo = (res.data) ? res.data : 'Help Info';
});
}
function clone(){
var newUsers = angular.copy(oldUsers);
for (var i=0; i<newUsers.length; i++){
newUsers[i].id = undefined;
getUserCreditScore(newUsers[i]);
getUserRecommendation(newUsers[i]);
getUserHelpInfo(newUsers[i]);
}
var promises = _.map(newUsers, user => user.save());
$q.all(promises).then(function (data) {
console.log(data);
}
}
You'll need to Promise.all on an array of Promises that are returned by getScreditScore
something like
function getCreditScore(){
var url = '/someurl';
return $http.get(url).then(res => (res && res.data) ? res.data : res);
}
function clone(){
var newUsers = angular.copy(oldUsers);
Promise.all(
newUsers.map(newUser => {
newUser.id = undefined;
return getCreditScore()
.then(result => newUser.creditScore = result);
})
).then(results => // results will be an array of values returned by the get in getCreditScore(newUser)
Promise.all(newUsers.map(user => user.save()))
).then(data =>
console.log(data); // this will be the result of all the user.save
);
}
Note: the newUser.creditScore is set in the .then in the newUsers.map callback - (minimal change to my original answer)
Alternatively, passing user to getCreditScore
function getCreditScore(user){
var url = '/someurl';
return $http.get(url)
.then(res => (res && res.data) ? res.data : res)
.then(score => user.creditScore = score);
}
function clone(){
var newUsers = angular.copy(oldUsers);
Promise.all(
newUsers.map(newUser => {
newUser.id = undefined;
return getCreditScore(newUser);
})
).then(results => // results will be an array of values returned by the get in getCreditScore(newUser)
Promise.all(newUsers.map(user => user.save()))
).then(data =>
console.log(data); // this will be the result of all the user.save
);
}
Personally, I'd write the code
function getCreditScore(){
var url = '/someurl';
return $http.get(url).then(res => (res && res.data) ? res.data : res);
}
function clone(){
var newUsers = angular.copy(oldUsers);
Promise.all(
newUsers.map(newUser => {
newUser.id = undefined;
return getCreditScore()
.then(result => newUser.creditScore = result)
.then(() => newUser.save())
.then(() => newUser);
})
).then(data =>
console.log(data); // this will be the newUsers Array
);
}
This assumes, though, that you don't need to wait for all the $http.get before running the user.save() - in fact this may be a little (very little) more performant as the newUser.save and $http.get will run in tandem
Ok, I know your meaning, you want your every element of your array do something that is async.
So you can use map and Promise.all. Here is my code:
const asyncFunction = (item, cb) => {
setTimeout(() => {
console.log(`done with ${item}`);
cb();
}, 1000);
}
let requests = [1, 2, 3].map((item) => {
return new Promise((resolve) =>{
asyncFunction(item, resolve);
});
});
Promise.all(requests).then(() => console.log('done'));
Please forgive the fairly case-specific question, though I think the general end goal could be of use to other people.
Goal: Populate a MongoDB with data requested from multiple JSON API URLs.
Short question: So far I've had some success with request-promise, which uses Bluebird:
var rp = require('request-promise');
var options = {
uri: 'http://www.bbc.co.uk/programmes/b006qsq5.json',
headers: {
'User-Agent': 'Request-Promise'
},
json: true
};
rp(options)
.then(function (body) {
// Mongoose allows us query db for existing PID and upsert
var query = {pid: body.programme.pid},
update = {
name: body.programme.title,
pid: body.programme.pid,
desc: body.programme.short_synopsis
},
options = { upsert: true, new: true };
// Find the document
Programme.findOneAndUpdate(query, update, options, function(err, result) {
if (err) return res.send(500, { error: err });
return res.send("succesfully saved");
});
})
.catch(function (err) {
return res.send(err);
})
But how do I loop over an array of URLs, without the program failing if any of the promises are rejected?
Something like this for example, using Bluebird, fails if any of the URLs errors.
const urls = ['http://google.be', 'http://google.uk']
Promise.map(urls, rp)
.map((htmlOnePage, index) => {
return htmlOnePage;
})
.then(console.log)
.catch((e) => console.log('We encountered an error' + e));
As I want to write to the DB with successful requests, and ignore those that might not be responding right then, I need something that skips over rejected promises, which .all does not do.
Long question:
I've been reading up about promises all day and it's making my head hurt! But I've found some good resources, such as https://pouchdb.com/2015/05/18/we-have-a-problem-with-promises.html, which mentions the use of a Promise factory. Would this work for my case? I initially thought I should make each request, process the result and add it to the DB, then move on to the next request; but having seen .all I thought I should do all the requests, save the results in an array and loop over that with my DB saving function.
Should I even be using Promises for this? Maybe I should just make use of something like async.js and run my requests in series.
Thanks very much for any help or ideas.
But how do I loop over an array of URLs, without the program failing if any of the promises are rejected?
if you return a value from .catch other than a rejected promise, you will return a resolved promise
So, your .then for each individual request could return an object like
{
success: true,
result: whateverTheResultIs
}
and your catch returns
{
success: false,
error: whateverTheErrorIs
}
Really you don't NEED the success property, it's a convenience though
So the code would be - assuming process(url) returns a Promise
Promise.map(urls, url =>
process(url)
.then(result => ({result, success:true}))
.catch(error => ({error, success:false}))
)
.then(results => {
let succeeded = results.filter(result => result.success).map(result => result.result);
let failed = results.filter(result => !result.success).map(result => result.error);
});
Or, in ES5
Promise.map(urls, function (url) {
return process(url).then(function (result) {
return { result: result, success: true };
}).catch(function (error) {
return { error: error, success: false };
});
}).then(function (results) {
var succeeded = results.filter(function (result) {
return result.success;
}).map(function (result) {
return result.result;
});
var failed = results.filter(function (result) {
return !result.success;
}).map(function (result) {
return result.error;
});
});
I don't know if this fit your case, but I think You can use a counter to check when all promises has returned, regardless of the fact that each one has been resolved or rejected
var heroes = [
'Superman',
'Batman',
'Spiderman',
'Capitan America',
'Ironman',
];
function getHero(hero) {
return new Promise((resolve, reject) => {
setTimeout(() => {
return Math.round(Math.random()) ? resolve(hero + ' lives') : reject(hero + ' dead');
}, Math.random() * 3000)
})
}
function checkHeroes() {
var checked = heroes.length;
heroes.forEach((hero) => {
getHero(hero)
.then((res) => {
checked --;
console.log(res);
if (!checked) done();
})
.catch((err) => {
checked --;
console.log(err);
if (!checked) done();
});
})
}
function done() {
console.log('All heroes checked');
}
checkHeroes();
I think your issue is less about the bluebird api than structuring your promise chain.
const reducePropsToRequests = (props) => Promise.resolve(Object
.keys(props)
.reduce((acc, key) => {
acc[key] = request(sources[key]);
return acc;
}, {}));
const hashToCollection = (hash) => Promise.resolve(Object
.keys(hash)
.reduce((acc, k) => {
return [...acc, {source: k, data: hash[k]}];
}, []));
const fetchFromSources = (sources) => Promise.props(sources);
const findSeveralAndUpdate = (results) => Promise
.each(results.map(obj => {
// you have access to original {a: 'site.com'}
// here, so use that 'a' prop to your advantage by abstracting out
// your db config somewhere outside your service
return Programme.findOneAndUpdate(someConfig[obj.source], obj.data);
}))
const requestFromSeveralAndUpdate = (sources) => reducePropsToRequests(sources)
.then(fetchFromSources)
.then(hashToCollection)
.then(findSeveralAndUpdate)
.catch(/* some err handler */);
requestFromSeveralAndUpdate({ a: 'site.com', b: 'site.net' });
I'd just use request and write my own promise with try catch inside that only resolves. Pseudo example below
var request = require('request')
var urls = ['http://sample1.com/json', 'http://sample2.com/json']
var processUrl = (url) => {
return new Promise((resolve,reject)=> {
var result;
try {
var myRequest = {
uri: url,
method: 'GET',
header: {...}
};
request(option, (res,body,err)=> {
if(err) {
result = err;
return;
}
result = body;
})
}
catch(e) {
result = e;
}
finally {
resolve(result)
}
})
}