I'm pulling information from an API that has the following response format:
{
items: [{}, {}, {}],
nextPage: {
startIndex: 11
}
}
so I wrote this program that checks if there's a nextPage property, and I make a subsequent request to the API with an offset = startIndex. Here's my code:
Serp.prototype.search = function(query, start, serps) {
let deferred = this.q.defer();
let url = '';
if (start === 0) {
url = `${GCS_BASE}/?key=${this.key}&cx=${this.cx}&q=${query}`;
} else {
url = `${GCS_BASE}/?key=${this.key}&cx=${this.cx}&q=${query}&start=${start}`;
}
this.https.get(url, (res) => {
let rawData = '';
res.on('data', (chunk) => {
rawData += chunk;
});
res.on('end', () => {
let contactInfo = [];
let result = JSON.parse(rawData);
let totalResults = result.searchInformation.totalResults;
// if total results are zero, return nothing.
if (totalResults === 0) {
serps.push(contactInfo);
deferred.resolve(serps);
// there's just one page of results.
} else if (totalResults <= 10) {
contactInfo = this._extractContactInfo(result.items, query.toLowerCase());
serps.push(contactInfo);
deferred.resolve(serps);
// if there are more than 10, then page through the response.
} else if ((totalResults > 10) && (result.queries.hasOwnProperty('nextPage'))) {
// recursively and asynchronously pull 100 results.
if (result.queries.nextPage[0].startIndex < 91) {
contactInfo = this._extractContactInfo(result.items, query.toLowerCase());
serps.push(contactInfo);
this.search(query, result.queries.nextPage[0].startIndex, serps)
.then(() => {
deferred.resolve();
});
} else {
contactInfo = this._extractContactInfo(result.items, query.toLowerCase());
serps.push(contactInfo);
let res = this.flatten(serps);
deferred.resolve(res);
}
}
});
});
return deferred.promise;
};
That part of the code works just fine, the problem arises when I'm trying to call that search function I wrote this way:
let promises = keywords.map((keyword) => {
return Serps.search(keyword, startIndex, serps);
});
q.allSettled(promises)
.then((results) => {
console.log(results); // [ { state: 'fulfilled', value: undefined } ]
}
My problem is that the promises are being fulfilled, but the value is undefined.
So what am I doing wrong, and how can I fix it?
I solved the problem by not returning an empty promise here:
this.search(query, result.queries.nextPage[0].startIndex, serps)
.then(() => {
deferred.resolve(serps);
});
I still need to flatten the results, so maybe there's a smarter solution, so far that works perfectly.
Related
I'm struggling to understand how I can return data from multiple promises to build up an array of data.
Is there anyway I can return the data outside of the promise to push to the data variable?
I have the following:
db_sh.find({
selector: {sh: req.params.sh_id},
fields: ['_id', 'sh_id', 'time'],
sort: ['_id']
}).then(function (result) {
let data = {};
console.log('Found: ' + result.docs.length);
if (result.docs.length > 0) {
for (var i = 0; i < result.docs.length; i++) {
let student = result.docs[i];
Promise
.all([getMealBooking(student._id), getStudentData(student._id)])
.then(function(response) {
var meal_booking_data = response[0];
var student_data = response[1];
console.log(meal_booking_data);
console.log(student_data);
})
.catch(function (err) {
return res.send(false);
});
data[student.time] = [
meal_booking_data,
student_data
]
}
}
/** Sort Data Oldest First*/
data = Object.keys(data).sort().reduce((a, c) => (a[c] = data[c], a), {});
console.log(data);
res.send(data);
});
I have two promises (getMealBooking() & getStudentData()): and I am using Promise.all() to return me the results of both of these promises. I have tried to return the data but I cannot get the results to build up the data array.
Any help to be able to build up a list of all my data would be great.
You need two Promise.alls - one to iterate over each student, and a nested one to fetch the getMealBooking and getStudentData for each student.
Put everything into an async function (that catches and sends false if needed) to make the control flow easier to understand.
const { docs } = await db_sh.find({
selector: { sh: req.params.sh_id },
fields: ['_id', 'sh_id', 'time'],
sort: ['_id']
});
if (docs.length === 0) {
// no data; stop here
res.send({});
return;
};
const data = {};
await Promise.all(
docs.map(student => (
Promise.all([getMealBooking(student._id), getStudentData(student._id)])
.then(([mealBookingData, studentData]) => {
data[student.time] = [mealBookingData, studentData];
})
))
);
const sortedData = Object.keys(data).sort().reduce((a, c) => (a[c] = data[c], a), {});
res.send(sortedData);
Another Promise.all() is needed for the loop that contains the Promise.all() you've already figured out. It's better to factor a little so you can see what's happening.
function getStudentMealAndData(student) {
return Promise
.all([getMealBooking(student._id), getStudentData(student._id)])
.then(function(response) {
var meal_booking_data = response[0];
var student_data = response[1];
console.log(meal_booking_data);
console.log(student_data);
return { student, meal_booking_data, student_data };
})
.catch(function (err) {
return res.send(false);
});
}
This simplifies the then block a bit...
}).then(function (result) {
console.log('Found: ' + result.docs.length);
let promises = []
for (var i = 0; i < result.docs.length; i++) {
let student = result.docs[i];
promises.push(getStudentMealAndData(student));
}
return Promise.all(promises);
}).then(results => {
// results are an array of [{ student, meal_booking_data, student_data }, ...]
let data = results.reduce((acc, s) => {
acc[s.student.time] = [ s.meal_booking_data, s.student_data ];
return acc;
}, {});
data = Object.keys(data).sort().reduce((a, c) => (a[c] = data[c], a), {});
console.log(data);
res.send(data);
});
let arr = [];
const datas = await Promise.all([
getMealBooking(),
getStudentData()
]);
arr.push(datas[0]); //adds getMealBooking() results
arr.push(datas[1]); // adds getStudentData() results
I have a an array of chunked data that I need to upload one chunk at time. The current implementation I used it to encapsulate the logic in an Promise.all() since I need to return the result of the promise,
The problem with this approach is that all the upload is done asynchronously resulting in a Timeout error as the server can't process all the requests at the same time, How can I modify this method so that the upload is done one chunk at time ?.
My code:
var chunks = _.chunk(variableRecords, 30);
return Promise.all(
chunks.map(chunk => this.portalService.updateDataForChart(variableId, chunk)))
.then((updateRes: boolean[]) => {
if (updateRes.every(updateStatus => updateStatus)) {
return this.executeRequest<HealthDataSource, boolean>({
path: `/variable/user/datasources/${dataSource.identifier}`,
method: 'PUT',
body: {
libelle: dataSource.datasource.libelle,
type: dataSource.datasource.type,
lastSyncDate: Math.max(maxDate, dataSource.datasource.lastSyncDate)
},
headers: this.getHeaders()
});
} else {
return false;
}
});
You need them in SEQUENCE , for of is the way to go :
async function chunksSequence(chunks) {
for(const chunk of chunks) {
await // your other code here
}
};
If you need to return something
async function chunksSequence(chunks) {
let results = []
for(const chunk of chunks) {
let result = await // your other code here
results.push(result)
}
return results
};
Because of comment needed in a promise on return
async function chunksSequence(chunks) {
return new Promise((resolve, reject)=>{
let results = []
for(const chunk of chunks) {
let result = await // your other code here
results.push(result)
}
resolve(results)
}
};
You can do this with the help of Array.reduce()
const chunks = _.chunk(variableRecords, 30);
return tasks.reduce((promiseChain, currentTask) => {
return promiseChain.then(chainResults =>
currentTask.then(currentResult =>
[ ...chainResults, currentResult ]
)
);
}, Promise.resolve([])).then(arrayOfResults => {
// Do something with all results
});
Source : https://decembersoft.com/posts/promises-in-serial-with-array-reduce/
If you don't / can't use await you could use something like this
function runSequenceItem(chunks, index) {
return new Promise(chunks[index])
.then(res => {
index ++
if (index < chunks.length) {
return runSequence(chunks[index], index + 1)
} else {
// this is not needed actually
return 'done'
}
})
}
function runInSequence(chunks) {
return runSequenceItem(chunks, 0)
}
If you also need the results then you can return an array at the end of the recursion runInSequence
function runSequenceItem(chunks, index, results) {
return new Promise(chunks[index])
.then(res => {
results.push(res)
index ++
if (index < chunks.length) {
return runSequence(chunks[index], index + 1)
} else {
return results
}
})
}
function runInSequence(chunks) {
return runSequenceItem(chunks, 0, [])
}
and then retrieve it at the end
let results = runInSequence(chunks)
I am using an external api that I use as my datasource. That api gives its data back based on a pagination like type.
So I can get data for page 1, 2, 3 etc. But the problem that I have is that I need all data at once, the api doesn't support this so I write it myself.
I have made the actual call code:
function getTransactionPart(start){
return new Promise(function(resolve, reject) {
const options = {
url: 'myurl?limit=40&start='+start,
json: true
};
let result = {total_transactions : 0, transactions : []};
request(options, function (error, response, body) {
if (error) {
return reject(result);
}
body.data.forEach(function (transaction) {
result.transactions.push({
timestamp: transaction.timestamp,
amount: transaction.amount,
confirmed: transaction.confirmed
});
});
result.total_transactions = body.total
return resolve(result);
})
})
}
The above code returns me the expected results with the limit that I gave. Also I get a number back (result.total_transactions) when this is more than 40 I need to make another call with 40 as start etc etc.
The code where I need to combine it:
function getTransactions(start) {
return new Promise(function(resolve, reject) {
getTransactionPart(start).then(result => {
if(result.total_transactions > 40) {
//next call
} else {
// return all?
}
console.log(result)
}).catch(error => {
console.error(error);
return r.json({
})
});
})
}
So I do the first call with getTransactionPart(0) after that the method itself needs to combine the result form all the sub calls and return the whole result as expected. How can I do this with recursion and promises?
This is easier if you use an async function and await the request:
async function getTransactions(start) {
const result = [];
for(let pos = start; ; pos += 40) {
const { total_transactions, transactions } = await getTransactionPart(pos);
result.push(...transactions);
if(total_transactions < 40) break;
}
return result;
}
For sure you could also do this recursively, but do you really need that?
async function getTransactions(start) {
const { total_transactions, transactions } = await getTransactionPart(pos);
if(total_transactions < 40)
return transactions;
return transactions.concat(await getTransactions(start + 40));
}
Am using sequilizer and struggling because the method is forever in pending state.
The following is a simplified version of what I am trying to do. Basically, an API makes use of the below methods, by calling BatchProcessor, which was supposed to process the provided json.
I basically want BatchProcessor to get themeprice and themegate from FinalTheme method but the promise is forever pending.
export default {
async FinalTheme(id) {
return db.Themes.findOne({
where: {
ID: id
},
attributes: ["ThemeCost","ThemeGate"],
limit: 1
})
.then(data => {
if (data == null) {
return -1;
}
return {
cost: data["ThemeCost"],
gate: data["ThemeGate"]
};
})
.catch(err => {
return false;
});
},
async BatchProcessor(record, index_number) {
const SQL ="SELECT * FROM themes";
return db.sequelize
.query(SQL, {
type: db.sequelize.QueryTypes.SELECT
})
.then(themes => {
// do we have data here?
const totalThemes = themes.length;
let lastAmount = record["Amount"];
for (
let counter = 0;
counter < totalThemes - 1;
counter++
) {
const CustomerFinalTheme = this.FinalTheme(record["CustomerID"]); // FOREVER PENDING
}
})
.catch(err => {
console.log(JSON.stringify(err));
});
},
};
What am I doing wrong exaclty?
this.FinalTheme(... returns a promise and not the value you have to do:
this.FinalTheme(record["CustomerId"]) // where is the record assigned?
.then(data => {
const CustomerFinalTheme = data;
})
also no need to use async when declaring the functions ie the following is ok:
FinalTheme(id) {
return db.Themes.findOne({
[...]
}
You are running a loop inside then block of BatchProcessor. you can await inside for loop.
async BatchProcessor(record, index_number) {
const SQL ="SELECT * FROM themes";
const themes = await db.sequelize.query(SQL, { type: db.sequelize.QueryTypes.SELECT });
const totalThemes = themes.length;
let lastAmount = record["Amount"];
for (let counter = 0; counter < totalThemes - 1; counter++) {
const CustomerFinalTheme = await this.FinalTheme(record["CustomerID"]);
}
return 'ALL DONE';
}
I have a function that runs periodically, that updates the item.price of some Documents in my Prices Collection. The Price Collection has 100k+ items. The function looks like this:
//Just a helper function for multiple GET requests with request.
let _request = (urls, cb) => {
let results = {}, i = urls.length, c = 0;
handler = (err, response, body) => {
let url = response.request.uri.href;
results[url] = { err, response, body };
if (++c === urls.length) {
cb(results);
}
};
while (i--) {
request(urls[i], handler);
}
};
// function to update the prices in our Prices collection.
const update = (cb) => {
Price.remove({}, (err, remove) => {
if (err) {
return logger.error(`Error removing items...`);
}
logger.info(`Removed all items... Beginning to update.`);
_request(urls, (responses) => {
let url, response, gameid;
for (url in responses) {
id = url.split('/')[5].split('?')[0];
response = responses[url];
if (response.err) {
logger.error(`Error in request to ${url}: ${err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
logger.info(`Response body for ${id} is ${Object.keys(jsonResult).length}.`);
let allItemsArray = Object.keys(jsonResult).map((key, index) => {
return {
itemid: id,
hash_name: key,
price: jsonResult[key]
}
});
Price.insertMany(allItemsArray).then(docs => {
logger.info(`Saved docs for ${id}`)
}, (e) => {
logger.error(`Error saving docs.`);
});
}
}
if (cb && typeof cb == 'function') {
cb();
}
})
});
}
As you can see, to avoid iterating through 100k+ Documents, and updating each and every one of them separately, I delete them all at the beginning, and just call the API that gives me these Items with prices, and use InsertMany to Insert all of them into my Prices Collection.
This updating process will happen every 30 minutes.
But I just now realised, what if some user wants to check the Prices and my Prices Collection is currently empty because it's in the middle of updating itself?
The Question
So do I have to iterate through all of them in order to not delete it? (Remember, there are MANY documents to be updated every 30 mins.) Or is there another solution?
Here's a picture of how my Prices Collection looks (there are 100k docs like these, I just want to update the price property):
Update:
I have re-written my update function a bit and now it looks like this:
const update = (cb = null) => {
Price.remove({}, (err, remove) => {
if (err) {
return logger.error(`Error removing items...`);
}
logger.info(`Removed all items... Beginning to update.`);
_request(urls, (responses) => {
let url, response, gameid;
for (url in responses) {
gameid = url.split('/')[5].split('?')[0];
response = responses[url];
if (response.err) {
logger.error(`Error in request to ${url}: ${err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
logger.info(`Response body for ${gameid} is ${Object.keys(jsonResult).length}.`);
let allItemsArray = Object.keys(jsonResult).map((key, index) => {
return {
game_id: gameid,
market_hash_name: key,
price: jsonResult[key]
}
});
let bulk = Price.collection.initializeUnorderedBulkOp();
allItemsArray.forEach(item => {
bulk.find({market_hash_name: item.market_hash_name})
.upsert().updateOne(item);
});
bulk.execute((err, bulkers) => {
if (err) {
return logger.error(`Error bulking: ${e}`);
}
logger.info(`Updated Items for ${gameid}`)
});
// Price.insertMany(allItemsArray).then(docs => {
// logger.info(`Saved docs for ${gameid}`)
// }, (e) => {
// logger.error(`Error saving docs.`);
// });
}
}
if (cb && typeof cb == 'function') {
cb();
}
})
});
}
Notice the bulk variable now (Thanks #Rahul) but now, the collection takes ages to update. My processor is burning up and it literally takes 3+ minutes to update 60k+ documents. I honestly feel like the previous method, while it might delete all of them and then reinserting them, it also takes 10x faster.
Anyone?
From my experience (updating millions of mongo docs on a hourly basis), here's a realistic approach to very large bulk updates:
do all your API calls separately and write results in as bson into a file
invoke mongoimport and import that bson file into a new empty collection prices_new. Javascript, let alone high-level OO wrappers, are just too slow for that
rename prices_new -> prices dropTarget=true (this will be atomic hence no downtime)
Schematically, it would look like this in JS
let fname = '/tmp/data.bson';
let apiUrls = [...];
async function doRequest(url) {
// perform a request and return an array of records
}
let responses = await Promise.all(apiUrls.map(doRequest));
// if the data too big to fit in memory, use streams instead of this:
let data = flatMap(responses, BSON.serialize).join('\n'));
await fs.writeFile(fname, data);
await child_process.exec(`mongoimport --collection prices_new --drop ${fname}`);
await db.prices_new.renameCollection('prices', true);
There's no need to clear the database and do a fresh insert. You can use the bulkWrite() method for this or use the updateMany() method to do the updates.
You can refactor the existing code to
const update = (cb) => {
_request(urls, responses => {
let bulkUpdateOps = [], gameid;
responses.forEach(url => {
let response = responses[url];
gameid = url.split('/')[5].split('?')[0];
if (response.err) {
logger.error(`Error in request to ${url}: ${response.err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
Object.keys(jsonResult).forEach(key => {
bulkUpdateOps.push({
"updateOne": {
"filter": { market_hash_name: key },
"update": { "$set": {
game_id: gameid,
price: jsonResult[key]
} },
"upsert": true
}
});
});
}
if (bulkUpdateOps.length === 1000) {
Price.bulkWrite(bulkUpdateOps).then(result => {
logger.info(`Updated Items`)
}).catch(e => logger.error(`Error bulking: ${e}`));
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
Price.bulkWrite(bulkUpdateOps).then(result => {
logger.info(`Updated Items`)
}).catch(e => logger.error(`Error bulking: ${e}`));
}
});
if (cb && typeof cb == 'function') {
cb();
}
}
I have not tested anything but you can try this, might be helpful. I am using bluebird library for concurrency.
let _request = (url) => {
return new Promise((resolve, reject) => {
request(url, (err, response, body) => {
if (err) {
reject(err);
}
resolve(body);
});
});
};
const formatRespose = async (response) => {
// do stuff
return {
query: {}, // itemid: id,
body: {}
};
}
const bulkUpsert = (allItemsArray) => {
let bulk = Price.collection.initializeUnorderedBulkOp();
return new Promise((resolve, reject) => {
allItemsArray.forEach(item => {
bulk.find(item.query).upsert().updateOne(item.body);
});
bulk.execute((err, bulkers) => {
if (err) {
return reject(err);
}
return resolve(bulkers);
});
});
}
const getAndUpdateData = async (urls) => {
const allItemsArray = urls.map((url) => {
const requestData = await _request(url); // you can make this also parallel
const formattedData = formatRespose(requestData); // return {query: {},body: {} };
return formattedData;
});
return await (bulkUpsert(allItemsArray));
};
function update() {
// split urls into as per your need 100/1000
var i, j, chunkUrls = [],
chunk = 100;
for (i = 0, j = urls.length; i < j; i += chunk) {
chunkUrls.push(getAndUpdateData(urls.slice(i, i + chunk)));
}
Bluebird.map(chunkUrls, function (chunk) {
return await chunk;
}, {
concurrency: 1 // depends on concurrent request change 1 = 100 request get and insert in db at time
}).then(function () {
console.log("done");
}).catch(function () {
console.log("error");
});
}