MongoDB Find queries slow while updating/inserting schema - javascript

I'm doing a big loop once a day - which updating existing documents in the database (and also inserting new documents).
this loop get executed in a separate server ( prevents from the main server to be slow ), but the main problem is that all the find queries on the Data base (while the loop is executed) are very slow (the loop slows it down significantly).
This is a very big issue in my website ( this loop must be executed once a day ) and iv'e been trying to find a solution online - but i couldn't manage to find something.
Is there any way to prevent the find queries from being so slow while inserting/updating the database??
uploadProductsManually = async (name, products, map, valuesMap) => {
return new Promise(async function (resolve, reject) {
const company = await Company.findOne({ name }).exec();
if (!company) return reject(new errors.NotFound("Company not found"));
const rows = products;
const parsedRows = [];
const findCorrectKey = (key) => {
const correctKey = key.trim();
if (productFields[correctKey]) return productFields[correctKey];
const category = map.find((item) => {
return item.options.some((option) => {
return option.trim().toLowerCase() === correctKey.toLowerCase();
});
});
const categoryName = category && category.name;
return productFields[categoryName];
};
const hashProductValues = (product) => {
let valueToHash;
if (product.productId) {
valueToHash = product.productId;
} else if (product.certificateId) {
valueToHash = product.certificateId;
} else {
valueToHash = JSON.stringify(
product.size + product.color
);
}
return base64encode(valueToHash);
};
rows.forEach(function (row, i) {
var newProduct = {};
for (var key in row) {
var val = row[key];
if (val) {
let normalizedKey = findCorrectKey(key);
if (normalizedKey) {
newProduct[normalizedKey] = val;
}
let normalizedValue = normalizeValue(normalizedKey, val,valuesMap);
newProduct[normalizedKey] = normalizedValue;
}
}
newProduct.channels = [];
if (newProduct.productId) {
parsedRows.push(newProduct);
}
});
fetchProducts();
function fetchProducts() {
Product.find({ company: company._id }).exec(function (err, products) {
if (err) console.log(err);
var map = {};
if (products) {
products.forEach(function (product) {
const productIdentifier = hashProductValues(product);
map[productIdentifier] = product;
if (product.productStatus == "manual") {
// product.isAvailable = false;
// product.save();
} else {
product.status = "deleted";
product.save();
}
});
}
mergeData(map);
});
}
async function mergeData(map) {
let created = 0;
let updated = 0;
let manual = 0;
async.each(
parsedRows,
function (row, callback) {
const productIdentifier = hashProductValues(row);
let product = map[productIdentifier];
if (product) {
map[productIdentifier] = undefined;
Product.findByIdAndUpdate(id, { $set: updatedProduct }, function (
err,
updatedProd
) {
if (err) {
// errors.push(productIdentifier);
console.log("err is:", err);
}
updated++;
callback();
});
} else {
row = new Product(row);
row.save(function (err) {
if (err) {
// errors.push(productIdentifier);
console.log(err);
}
created++;
callback();
});
}
},
(err) => {
if (err) return reject(err);
Company.findByIdAndUpdate(
company._id,
{ lastUpdate: new Date() },
function (err, comp) {
if (err) console.log(err);
}
);
console.log(
`Created: ${created}\nUpdated: ${updated} \manual: ${manual}`
);
resolve({
created,
updated,
manual,
errors,
});
}
);
}
});
};

Related

How do I await for subscribe to subscribe

I used the code below until I found out that the getConnectedUser() function takes longer than verifyUser(), so this.userUID is undefined:
this.layoutService.getConnectedUser().subscribe(
(data) => {
this.userSaml = data;
this.layoutService.connectedUser.matricule = this.userSaml.matricule;
this.layoutService.connectedUser.profil = this.userSaml.profil;
this.layoutService.connectedUser.uid = this.userSaml.uid;
this.layoutService.connectedUser.username = this.userSaml.username;
this.layoutService.connectedUser.city = this.userSaml.city;
console.log("dashboard this.layoutService.connectedUser", this.layoutService.connectedUser);
},
(err) => {
throw err;
}
);
this.userUID = this.layoutService.connectedUser.uid;
console.log("this.userUID", this.userUID);
this.adminService.verifyUser(this.userUID).subscribe(
(data) => {
this.userStatus = data[0].status;
this.userProfile = data[0].profil;
console.log("userProfile" + JSON.stringify(data[0].profil));
this.userExists = true;
},
(err) => {
this.userExists = false;
}
);
So, I wanted to make sure that the getConnectedUser subscribe is completed to call the second one, I changed my code and added the .add method just like that:
this.layoutService.getConnectedUser().subscribe(
(data) => {
this.userExistsRefog = true;
this.userSaml = data;
this.layoutService.connectedUser.matricule = this.userSaml.matricule;
this.layoutService.connectedUser.profil = this.userSaml.profil;
this.layoutService.connectedUser.uid = this.userSaml.uid;
this.layoutService.connectedUser.username = this.userSaml.username;
this.layoutService.connectedUser.city = this.userSaml.city;
console.log("home connectedUser", this.layoutService.connectedUser);
},
(err) => {
this.userExistsRefog = false;
throw err;
}
).add(() => {
this.userUID = this.layoutService.connectedUser.uid;
console.log("this.userUID", this.userUID);
this.adminService.verifyUser(this.userUID).subscribe(
(data) => {
this.userStatus = data[0].status;
this.userProfile = data[0].profil;
console.log("userProfile" + JSON.stringify(data[0].profil));
this.userExists = true;
},
(err) => {
this.userExists = false;
}
);
});
I want to learn how to use the Async/await way for this example and what is the best approach to adopt for similar functionality ? Thanks
mainly you have two ways.
1.write the method call verifyuser() inside the subscription of getconnecteduser() method.in that way you will never get a null value.
2.you can use promises instead of observable subscription. then use async/await to delay the execution of the method.
async userTasks() {
const usersDetails = await this.layoutService.getConnectedUser().toPromise();
this.layoutService.connectedUser.uid = usersDetails.uid;
this.adminService.verifyUser(this.userUID).subscribe(
(data) => {
this.userStatus = data[0].status;
this.userProfile = data[0].profil;
console.log("userProfile" + JSON.stringify(data[0].profil));
this.userExists = true;
},
(err) => {
this.userExists = false;
}
);
}

AWS GameLift ThrottlingException: Rate exceeded when attempting to retrieve multiple PlayerSessions via JS SDK

I'm trying to retrieve ALL the data on all the Fleets in our GameLift, to be displayed on a React-powered CMS site.
Currently our page retrieves the Fleets (via listFleets), the Fleet Attributes (via describeFleetAttributes), and Game Sessions (via describeGameSessions).
Here's the code for all of that:
requestGameLiftData = async () => {
const gamelift = new AWS.GameLift();
try {
const { FleetIds } = await new Promise((resolve, reject) => { // Get Fleet IDs
gamelift.listFleets({}, function(err, data) {
if (err) { reject("Fleet id error"); }
else { resolve(data); }
});
});
const { FleetAttributes } = await new Promise((resolve, reject) => { // Get Fleet Attributes by IDs
gamelift.describeFleetAttributes(
{ FleetIds: FleetIds },
function(err, data) {
if (err) { reject("Fleet attributes error"); }
else { resolve(data); }
}
);
});
await new Promise((resolve, reject) => { // Save Fleet Attributes to state
this.setState(
{ fleetList: [...FleetAttributes] },
() => { resolve(); }
);
});
const instancePromiseArr = [];
const gameSessionPromiseArr = [];
const playerSessionPromiseArr = [];
const { fleetList } = this.state;
for (let fleet of fleetList) {
instancePromiseArr.push(this.getFleetInstances(fleet, gamelift));
gameSessionPromiseArr.push(this.getFleetGameSessions(fleet, gamelift));
}
let instanceData; // Get all Instances of every Fleet
try { instanceData = await Promise.all(instancePromiseArr); }
catch (err) { throw new Error("Fleet instances error"); }
let gameSessionData; // Get all Game Sessions of every Fleet
try { gameSessionData = await Promise.all(gameSessionPromiseArr); }
catch (err) { throw new Error("Fleet game session error"); }
fleetList.forEach((fleet, index) => { // Nesting game sessions and instances inside their respective fleets
fleet["Instances"] = instanceData[index].Instances;
fleet["GameSessions"] = gameSessionData[index].GameSessions;
});
await new Promise((resolve, reject) => {
this.setState(
{ fleetList: [...fleetList] },
() => { resolve(); }
);
});
this.setState({isFetched: true});
} catch (error) { this.setState({isFetched: true}); }
};
getFleetInstances = (fleet, gamelift) => {
return new Promise((resolve, reject) => {
gamelift.describeInstances(
{ FleetId: fleet.FleetId },
function(err, data) {
if (err) { reject("Fleet instances error"); }
else { resolve(data); }
}
);
});
};
getFleetGameSessions = (fleet, gamelift) => {
return new Promise((resolve, reject) => {
gamelift.describeGameSessions(
{ FleetId: fleet.FleetId },
function(err, data) {
if (err) { reject("Fleet game sessions error"); }
else { resolve(data); }
}
);
});
};
Now I have to get the Player Sessions. To that end, I added the following:
let playerSessionData; // Before the "Nesting"
try { playerSessionData = await Promise.all(playerSessionPromiseArr); }
catch (err) { throw new Error("Player session error"); }
getPlayersInSession = (gameSession, gamelift) => {
return new Promise((resolve, reject) => { // Function to get Player sessions, outside of requestGameLiftData
gamelift.describePlayerSessions(
{ GameSessionId: gameSession.GameSessionId },
function(err, data) {
if (err) { reject("Fleet player sessions error"); }
else { resolve(data); }
}
);
});
};
And then modified the Get all Game Sessions portion to the following:
try {
gameSessionData = await Promise.all(gameSessionPromiseArr);
for (const gameSessionItem of gameSessionData) {
for (const data of gameSessionItem.GameSessions) {
playerSessionPromiseArr.push(this.getPlayersInSession(data, gamelift, delay));
}
}
} catch (err) { throw new Error("Fleet game session error"); }
And nested it in:
fleetList.forEach((fleet, index) => { // Nesting game sessions and instances inside their respective fleets
fleet["Instances"] = instanceData[index].Instances;
for (const gameSession of gameSessionData[index].GameSessions) {
gameSession['PlayerSessions'] = [];
for (const playerSessions of playerSessionData) {
if (playerSessions.PlayerSessions.length > 0) {
for (const playerSessionItem of playerSessions.PlayerSessions) {
if (playerSessionItem.GameSessionId === gameSession.GameSessionId) {
gameSession['PlayerSessions'].push(playerSessionItem);
}
}
}
}
}
fleet["GameSessions"] = gameSessionData[index].GameSessions;
});
This works... sometimes. Most of the times, I get a ThrottlingException: Rate exceeded and 400 Bad Request. This doesn't happen in other regions with significantly less fleets, so I thought it was related to the sheer number of requests made at once (as of this writing, 8 for the fleets, 8 for GameSessions, and no less than 28 for the PlayerSessions). So I tried adding a delay:
for (const gameSessionItem of gameSessionData) {
let delay = 0;
for (const data of gameSessionItem.GameSessions) {
delay += 50;
playerSessionPromiseArr.push(this.getPlayersInSession(data, gamelift, delay));
}
}
getPlayersInSession = (gameSession, gamelift, delay) => {
return new Promise(resolve => setTimeout(resolve, delay)).then(() => {
return new Promise((resolve, reject) => {
gamelift.describePlayerSessions(
{ GameSessionId: gameSession.GameSessionId},
function(err, data) {
if (err) { reject("Fleet player sessions error"); }
else { resolve(data); }
}
);
});
});
};
Which didn't work, of course. Is there anything I'm missing? Or is there another approach to this, to get all the data in one sitting without making too many requests?

mysql node.js query error(undefined)? how to fix it

The problem is that when the second request is executed, it returns undefined, i.e. for some reason, it does not see the result of the second request. It should work like this: We make the first request, and if there are less than two lines, then we execute the second request. What could be the error? how to fix it
let arr = [name1, name2 /* ... */];
let ipObject = { Objects: [] };
arr.forEach(function(elem, index) {
connection.query("select 1 from i.goa where object_name = ?", elem, (err, rows) => {
// console.log (rows.length);
if (rows.length < 2) {
// console.log (elem);
connection.query(
"SELECT ip_adress FROM i.gs where server_kod=(SELECT server_kod FROM i.gol where object_kod =(SELECT object_kod FROM i.goa where object_name=?))",
elem,
(err, rows2) => {
console.log(elem);
console.log(rows2);
if (undefined !== rows2 && rows2.length > 0) {
// if(rows2.length>0 ){
ipObject.Objects.push({ objectName: elem, serverIp: rows2[0].ip_adress });
}
i++;
if (i > count) {
cb(JSON.stringify(ipObject));
console.log(JSON.stringify(ipObject));
// fs.writeFileSync('e.json',JSON.stringify(ipObject),'utf8');
}
},
);
} else if (rows.length >= 2) {
ipObject.Objects.push({ objectName: elem, serverIp: "ошибка" });
cb(JSON.stringify(ipObject));
}
});
});
You're probably bumping into asynchronicity issues here.
Refactoring things to use async/await and Promise.map(), maybe this is closer to what you want:
function queryP(connection, query, params) {
return new Promise((resolve, reject) => {
connection.query(query, params, (err, result) => {
if (err) {
return reject(err);
}
resolve(result);
});
});
}
async function queryForName(connection, objectName) {
const rows = await queryP(connection, "select 1 from i.goa where object_name = ?", objectName);
if (rows.length >= 2) {
return { objectName, serverIp: "ошибка" };
}
const rows2 = await queryP(connection, "SELECT ip_adress FROM i.gs where server_kod=(SELECT server_kod FROM i.gol where object_kod =(SELECT object_kod FROM i.goa where object_name=?))", objectName);
if (rows2.length > 0) {
return { objectName, serverIp: rows2[0].ip_adress };
}
return { objectName, serverIp: "???" };
}
async function queryForNames(connection, names) {
return {
Objects: await Promise.all(names.map((name) => queryForName(connection, name))),
};
}
// could as well be `const resultObject = await queryForNames(...)` if you're calling this from an async function.
queryForNames(connection, [name1, name2]).then((resultObject) => {
console.log(resultObject);
});

AWS Lambda function handler not inserting to Athena

I'm using a snippet example for Amazon Athena just to test inserting some data. I can't tell why it isn't working and CloudWatch logs does not show any output when the statement execution is completed. Even when I change it to a simple select statement I can't see any output. I know the query, database and table is fine, because when I test it using the Athena query editor it executes without a problem.
module.exports.dlr = async event => {
let awsFileCreds = {
accessKeyId: "XXX",
secretAccessKey: "XXX"
};
let creds = new AWS.Credentials(awsFileCreds);
AWS.config.credentials = creds;
let client = new AWS.Athena({ region: "eu-west-1" });
let q = Queue((id, cb) => {
startPolling(id)
.then(data => {
return cb(null, data);
})
.catch(err => {
console.log("Failed to poll query: ", err);
return cb(err);
});
}, 5);
const sql = "INSERT INTO delivery_receipts (status, eventid, mcc, mnc, msgcount, msisdn, received, userreference) VALUES ('TestDLR', 345345, 4353, '5345435', 234, '345754', 234, '8833')"
makeQuery(sql)
.then(data => {
console.log("Row Count: ", data.length);
console.log("DATA: ", data);
})
.catch(e => {
console.log("ERROR: ", e);
});
function makeQuery(sql) {
return new Promise((resolve, reject) => {
let params = {
QueryString: sql,
ResultConfiguration: { OutputLocation: ATHENA_OUTPUT_LOCATION },
QueryExecutionContext: { Database: ATHENA_DB }
};
client.startQueryExecution(params, (err, results) => {
if (err) return reject(err);
q.push(results.QueryExecutionId, (err, qid) => {
if (err) return reject(err);
return buildResults(qid)
.then(data => {
return resolve(data);
})
.catch(err => {
return reject(err);
});
});
});
});
}
function buildResults(query_id, max, page) {
let max_num_results = max ? max : RESULT_SIZE;
let page_token = page ? page : undefined;
return new Promise((resolve, reject) => {
let params = {
QueryExecutionId: query_id,
MaxResults: max_num_results,
NextToken: page_token
};
let dataBlob = [];
go(params);
function go(param) {
getResults(param)
.then(res => {
dataBlob = _.concat(dataBlob, res.list);
if (res.next) {
param.NextToken = res.next;
return go(param);
} else return resolve(dataBlob);
})
.catch(err => {
return reject(err);
});
}
function getResults() {
return new Promise((resolve, reject) => {
client.getQueryResults(params, (err, data) => {
if (err) return reject(err);
var list = [];
let header = buildHeader(
data.ResultSet.ResultSetMetadata.ColumnInfo
);
let top_row = _.map(_.head(data.ResultSet.Rows).Data, n => {
return n.VarCharValue;
});
let resultSet =
_.difference(header, top_row).length > 0
? data.ResultSet.Rows
: _.drop(data.ResultSet.Rows);
resultSet.forEach(item => {
list.push(
_.zipObject(
header,
_.map(item.Data, n => {
return n.VarCharValue;
})
)
);
});
return resolve({
next: "NextToken" in data ? data.NextToken : undefined,
list: list
});
});
});
}
});
}
function startPolling(id) {
return new Promise((resolve, reject) => {
function poll(id) {
client.getQueryExecution({ QueryExecutionId: id }, (err, data) => {
if (err) return reject(err);
if (data.QueryExecution.Status.State === "SUCCEEDED")
return resolve(id);
else if (
["FAILED", "CANCELLED"].includes(data.QueryExecution.Status.State)
)
return reject(
new Error(`Query ${data.QueryExecution.Status.State}`)
);
else {
setTimeout(poll, POLL_INTERVAL, id);
}
});
}
poll(id);
});
}
function buildHeader(columns) {
return _.map(columns, i => {
return i.Name;
});
}
return { message: 'Go Serverless v1.0! Your function executed successfully!', event };
};
Figured it out. Using aws lambda events with athena is easy using the athena-express package. You can specify your configuration and query the athena database like you normally would with significantly less code than what's provided in the amazon athena nodejs example.
This is the code I used to achieve a result:
"use strict";
const AthenaExpress = require("athena-express"),
aws = require("aws-sdk");
const athenaExpressConfig = {
aws,
db: "messaging",
getStats: true
};
const athenaExpress = new AthenaExpress(athenaExpressConfig);
exports.handler = async event => {
const sqlQuery = "SELECT * FROM delivery_receipts LIMIT 3";
try {
let results = await athenaExpress.query(sqlQuery);
return results;
} catch (error) {
return error;
}
};

Updating many(100k+) documents in the most efficient way MongoDB

I have a function that runs periodically, that updates the item.price of some Documents in my Prices Collection. The Price Collection has 100k+ items. The function looks like this:
//Just a helper function for multiple GET requests with request.
let _request = (urls, cb) => {
let results = {}, i = urls.length, c = 0;
handler = (err, response, body) => {
let url = response.request.uri.href;
results[url] = { err, response, body };
if (++c === urls.length) {
cb(results);
}
};
while (i--) {
request(urls[i], handler);
}
};
// function to update the prices in our Prices collection.
const update = (cb) => {
Price.remove({}, (err, remove) => {
if (err) {
return logger.error(`Error removing items...`);
}
logger.info(`Removed all items... Beginning to update.`);
_request(urls, (responses) => {
let url, response, gameid;
for (url in responses) {
id = url.split('/')[5].split('?')[0];
response = responses[url];
if (response.err) {
logger.error(`Error in request to ${url}: ${err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
logger.info(`Response body for ${id} is ${Object.keys(jsonResult).length}.`);
let allItemsArray = Object.keys(jsonResult).map((key, index) => {
return {
itemid: id,
hash_name: key,
price: jsonResult[key]
}
});
Price.insertMany(allItemsArray).then(docs => {
logger.info(`Saved docs for ${id}`)
}, (e) => {
logger.error(`Error saving docs.`);
});
}
}
if (cb && typeof cb == 'function') {
cb();
}
})
});
}
As you can see, to avoid iterating through 100k+ Documents, and updating each and every one of them separately, I delete them all at the beginning, and just call the API that gives me these Items with prices, and use InsertMany to Insert all of them into my Prices Collection.
This updating process will happen every 30 minutes.
But I just now realised, what if some user wants to check the Prices and my Prices Collection is currently empty because it's in the middle of updating itself?
The Question
So do I have to iterate through all of them in order to not delete it? (Remember, there are MANY documents to be updated every 30 mins.) Or is there another solution?
Here's a picture of how my Prices Collection looks (there are 100k docs like these, I just want to update the price property):
Update:
I have re-written my update function a bit and now it looks like this:
const update = (cb = null) => {
Price.remove({}, (err, remove) => {
if (err) {
return logger.error(`Error removing items...`);
}
logger.info(`Removed all items... Beginning to update.`);
_request(urls, (responses) => {
let url, response, gameid;
for (url in responses) {
gameid = url.split('/')[5].split('?')[0];
response = responses[url];
if (response.err) {
logger.error(`Error in request to ${url}: ${err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
logger.info(`Response body for ${gameid} is ${Object.keys(jsonResult).length}.`);
let allItemsArray = Object.keys(jsonResult).map((key, index) => {
return {
game_id: gameid,
market_hash_name: key,
price: jsonResult[key]
}
});
let bulk = Price.collection.initializeUnorderedBulkOp();
allItemsArray.forEach(item => {
bulk.find({market_hash_name: item.market_hash_name})
.upsert().updateOne(item);
});
bulk.execute((err, bulkers) => {
if (err) {
return logger.error(`Error bulking: ${e}`);
}
logger.info(`Updated Items for ${gameid}`)
});
// Price.insertMany(allItemsArray).then(docs => {
// logger.info(`Saved docs for ${gameid}`)
// }, (e) => {
// logger.error(`Error saving docs.`);
// });
}
}
if (cb && typeof cb == 'function') {
cb();
}
})
});
}
Notice the bulk variable now (Thanks #Rahul) but now, the collection takes ages to update. My processor is burning up and it literally takes 3+ minutes to update 60k+ documents. I honestly feel like the previous method, while it might delete all of them and then reinserting them, it also takes 10x faster.
Anyone?
From my experience (updating millions of mongo docs on a hourly basis), here's a realistic approach to very large bulk updates:
do all your API calls separately and write results in as bson into a file
invoke mongoimport and import that bson file into a new empty collection prices_new. Javascript, let alone high-level OO wrappers, are just too slow for that
rename prices_new -> prices dropTarget=true (this will be atomic hence no downtime)
Schematically, it would look like this in JS
let fname = '/tmp/data.bson';
let apiUrls = [...];
async function doRequest(url) {
// perform a request and return an array of records
}
let responses = await Promise.all(apiUrls.map(doRequest));
// if the data too big to fit in memory, use streams instead of this:
let data = flatMap(responses, BSON.serialize).join('\n'));
await fs.writeFile(fname, data);
await child_process.exec(`mongoimport --collection prices_new --drop ${fname}`);
await db.prices_new.renameCollection('prices', true);
There's no need to clear the database and do a fresh insert. You can use the bulkWrite() method for this or use the updateMany() method to do the updates.
You can refactor the existing code to
const update = (cb) => {
_request(urls, responses => {
let bulkUpdateOps = [], gameid;
responses.forEach(url => {
let response = responses[url];
gameid = url.split('/')[5].split('?')[0];
if (response.err) {
logger.error(`Error in request to ${url}: ${response.err}`);
return;
}
if (response.body) {
logger.info(`Request to ${url} successful.`)
let jsonResult = {};
try {
jsonResult = JSON.parse(response.body);
} catch (e) {
logger.error(`Could not parse.`);
}
Object.keys(jsonResult).forEach(key => {
bulkUpdateOps.push({
"updateOne": {
"filter": { market_hash_name: key },
"update": { "$set": {
game_id: gameid,
price: jsonResult[key]
} },
"upsert": true
}
});
});
}
if (bulkUpdateOps.length === 1000) {
Price.bulkWrite(bulkUpdateOps).then(result => {
logger.info(`Updated Items`)
}).catch(e => logger.error(`Error bulking: ${e}`));
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
Price.bulkWrite(bulkUpdateOps).then(result => {
logger.info(`Updated Items`)
}).catch(e => logger.error(`Error bulking: ${e}`));
}
});
if (cb && typeof cb == 'function') {
cb();
}
}
I have not tested anything but you can try this, might be helpful. I am using bluebird library for concurrency.
let _request = (url) => {
return new Promise((resolve, reject) => {
request(url, (err, response, body) => {
if (err) {
reject(err);
}
resolve(body);
});
});
};
const formatRespose = async (response) => {
// do stuff
return {
query: {}, // itemid: id,
body: {}
};
}
const bulkUpsert = (allItemsArray) => {
let bulk = Price.collection.initializeUnorderedBulkOp();
return new Promise((resolve, reject) => {
allItemsArray.forEach(item => {
bulk.find(item.query).upsert().updateOne(item.body);
});
bulk.execute((err, bulkers) => {
if (err) {
return reject(err);
}
return resolve(bulkers);
});
});
}
const getAndUpdateData = async (urls) => {
const allItemsArray = urls.map((url) => {
const requestData = await _request(url); // you can make this also parallel
const formattedData = formatRespose(requestData); // return {query: {},body: {} };
return formattedData;
});
return await (bulkUpsert(allItemsArray));
};
function update() {
// split urls into as per your need 100/1000
var i, j, chunkUrls = [],
chunk = 100;
for (i = 0, j = urls.length; i < j; i += chunk) {
chunkUrls.push(getAndUpdateData(urls.slice(i, i + chunk)));
}
Bluebird.map(chunkUrls, function (chunk) {
return await chunk;
}, {
concurrency: 1 // depends on concurrent request change 1 = 100 request get and insert in db at time
}).then(function () {
console.log("done");
}).catch(function () {
console.log("error");
});
}

Categories

Resources