I need to execute unknown number of http requests in a node.js program, and it needs to happen synchronously. only when one get the response the next request will be execute. How can I implement that in JS?
I tried it synchronously with the requset package:
function HttpHandler(url){
request(url, function (error, response, body) {
...
})
}
HttpHandler("address-1")
HttpHandler("address-2")
...
HttpHandler("address-100")
And asynchronously with request-promise:
async function HttpHandler(url){
const res = await request(url)
...
}
HttpHandler("address-1")
HttpHandler("address-2")
...
HttpHandler("address-100")
Non of them work. and as I said I can have unknown number of http request over the program, it depends on the end user.
Any ideas on to handle that?
Use the got() library, not the request() library because the request() library has been deprecated and does not support promises. Then, you can use async/await and a for loop to sequence your calls one after another.
const got = require('got');
let urls = [...]; // some array of urls
async function processUrls(list) {
for (let url of urls) {
await got(url);
}
}
processUrls(urls).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
You are claiming some sort of dynamic list of URLs, but won't show how that works so you'll have to figure out that part of the logic yourself. I'd be happy to show how to solve that part, but you haven't given us any idea how that should work.
If you want a queue that you can regularly add items to, you can do something like this:
class sequencedQueue {
// fn is a function to call on each item in the queue
// if its asynchronous, it should return a promise
constructor(fn) {
this.queue = [];
this.processing = false;
this.fn = fn;
}
add(...items) {
this.queue.push(...items);
return this.run();
}
async run() {
// if not already processing, start processing
// because of await, this is not a blocking while loop
while (!this.processing && this.queue.length) {
try {
this.processing = true;
await this.fn(this.queue.shift());
} catch (e) {
// need to decide what to do upon error
// this is currently coded to just log the error and
// keep processing. To end processing, throw an error here.
console.log(e);
} finally {
this.processing = false;
}
}
}
}
Related
Lets say this is my code (just a sample I wrote up to show the idea)
var extract = require("./postextract.js");
var rescore = require("./standardaddress.js");
RunFunc();
function RunFunc() {
extract.Start();
console.log("Extraction complete");
rescore.Start();
console.log("Scoring complete");
}
And I want to not let the rescore.Start() run until the entire extract.Start() has finished. Both scripts contain a spiderweb of functions inside of them, so having a callback put directly into the Start() function is not appearing viable as the final function won't return it, and I am having a lot of trouble understanding how to use Promises. What are ways I can make this work?
These are the scripts that extract.Start() begins and ends with. OpenWriter() is gotten to through multiple other functions and streams, with the actual fileWrite.write() being in another script that's attached to this (although not needed to detect the end of run. Currently, fileWrite.on('finish') is where I want the script to be determined as done
module.exports = {
Start: function CodeFileRead() {
//this.country = countryIn;
//Read stream of thate address components
fs.createReadStream("Reference\\" + postValid.country + " ADDRESS REF DATA.csv")
//Change separator based on file
.pipe(csv({escape: null, headers: false, separator: delim}))
//Indicate start of reading
.on('resume', (data) => console.log("Reading complete postal code file..."))
//Processes lines of data into storage array for comparison
.on('data', (data) => {
postValid.addProper[data[1]] = JSON.stringify(Object.values(data)).replace(/"/g, '').split(',').join('*');
})
//End of reading file
.on('end', () => {
postValid.complete = true;
console.log("Done reading");
//Launch main script, delayed to here in order to not read ahead of this stream
ThisFunc();
});
},
extractDone
}
function OpenWriter() {
//File stream for writing the processed chunks into a new file
fileWrite = fs.createWriteStream("Processed\\" + fileName.split('.')[0] + "_processed." + fileName.split('.')[1]);
fileWrite.on('open', () => console.log("File write is open"));
fileWrite.on('finish', () => {
console.log("File write is closed");
});
}
EDIT: I do not want to simply add the next script onto the end of the previous one and forego the master file, as I don't know how long it will be and its supposed to be designed to be capable of taking additional scripts past our development period. I cannot just use a package as it stands because approval time in the company takes up to two weeks and I need this more immediately
DOUBLE EDIT: This is all my code, every script and function is all written by me, so I can make the scripts being called do what's needed
You can just wrap your function in Promise and return that.
module.exports = {
Start: function CodeFileRead() {
return new Promise((resolve, reject) => {
fs.createReadStream(
'Reference\\' + postValid.country + ' ADDRESS REF DATA.csv'
)
// .......some code...
.on('end', () => {
postValid.complete = true;
console.log('Done reading');
resolve('success');
});
});
}
};
And Run the RunFunc like this:
async function RunFunc() {
await extract.Start();
console.log("Extraction complete");
await rescore.Start();
console.log("Scoring complete");
}
//or IIFE
RunFunc().then(()=>{
console.log("All Complete");
})
Note: Also you can/should handle error by reject("some error") when some error occurs.
EDIT After knowing about TheFunc():
Making a new Event emitter will probably the easiest solution:
eventEmitter.js
const EventEmitter = require('events').EventEmitter
module.exports = new EventEmitter()
const eventEmitter = require('./eventEmitter');
module.exports = {
Start: function CodeFileRead() {
return new Promise((resolve, reject) => {
//after all of your code
eventEmitter.once('WORK_DONE', ()=>{
resolve("Done");
})
});
}
};
function OpenWriter() {
...
fileWrite.on('finish', () => {
console.log("File write is closed");
eventEmitter.emit("WORK_DONE");
});
}
And Run the RunFunc like as before.
There's no generic way to determine when everything a function call does has finished.
It might accept a callback. It might return a promise. It might not provide any kind of method to determine when it is done. It might have side effects that you could monitor by polling.
You need to read the documentation and/or source code for that particular function.
Use async/await (promises), example:
var extract = require("./postextract.js");
var rescore = require("./standardaddress.js");
RunFunc();
async function extract_start() {
try {
extract.Start()
}
catch(e){
console.log(e)
}
}
async function rescore_start() {
try {
rescore.Start()
}
catch(e){
console.log(e)
}
}
async function RunFunc() {
await extract_start();
console.log("Extraction complete");
await rescore_start();
console.log("Scoring complete");
}
I want to have two concurrent loops in a modern nodeJS tool.
One loop scans for interesting things and pushes them onto an array.
The second loop shifts things off the array and processes them.
The idea is that the two loops don't block each other. The second loop will be able to catch up on the processing backlog whenever the first loop is doing a lot of scanning but not finding much. Only when there is no backlog and no fresh data coming in will the processing loop have to actually wait.
I've implemented the scanning loop in several ways. It's the easy part. (In this case, I'm recursively scanning the fs looking for files of a certain type.)
For the processing loop, I have it working by polling but feel I should be able to get it working with pure async/await.
But I can't get my head around it. Conceptually the scanning loop should fulfill a promise to alert the second loop there's something in the array, or should be a generator yielding each new value instead of using an array.
But I can't see how to do a promise over and over, or to wait on array activity directly, or to do it as a generator without it causing blocking between the two loops.
I must be overthinking it! What am I missing?
Code working via polling, with commented out bits where an async/await implementation might belong:
"use strict";
const { basename, join } = require('path')
const { promisify } = require('util')
const fs = require('fs')
const readdir = promisify(fs.readdir)
const lstat = promisify(fs.lstat)
async function* scanpaths(paths) {
for (const path of paths) {
yield* scanonepath(path)
}
}
async function* scanonepath(path) {
try {
const s = await lstat(path)
if (s.isDirectory()) {
for (const entry of await readdir(path)) {
yield* scanonepath(join(path, entry))
}
} else if (/\.[mM][pP]3$/.test(path)) {
yield { pathname:path, basename:basename(path), stat:s }
}
} catch (e) {
// special file, deleted file, etc
}
}
async function* checkqueue(buf) {
if (buf.length) {
yield buf.shift()
} else {
// TODO await something to arrive in the buf - HOW?
}
}
async function processmp3(fullname, name, stat) {
try {
console.log(fullname)
// TODO tricky processing goes here
} catch (e) {
console.log(name, e)
}
}
(async () => {
let int = null
let globaldone = false
let globalprocessing = []
let buf = []
async function poll() {
if (buf.length) {
let clone = Array.from(buf)
buf.length = 0 // NOT buf = [] as that doesn't change other refs to buf
for (let e of clone) {
globalprocessing.push(processmp3(e.pathname, e.basename, e.stat))
}
}
if (globaldone) {
await Promise.all(globalprocessing)
console.warn("*** finished processing")
} else {
setTimeout(poll, 125)
}
}
// start polling for scanned files ready to process
console.log("** start polling")
setTimeout(poll, 0)
//console.log("** start scanning")
//await enqueue(buf)
globaldone = true
console.warn("*** finished scanning")
})()
// TODO how?
async function enqueue(buf) {
// start scanning by iterating over our generator which does the recursive directory stuff
for await (const file of scanpaths(process.argv.slice(2))) {
buf.push(file)
// TODO resolve a promise to notify dequeue? or also yield this file
}
}
Have the scanner loop, when it finds something and pushes it, call a function that tells the processing queue that something was added to the array. If that function is called while the processing queue is actively doing something, do nothing - otherwise, if the processing queue is idle, that'll tell the processing queue to shift an item from the scanning array and process it.
Of course, when the processing queue is finished processing an item, have it check to see if there are additional items to be processed, and if there are, process the next one immediately. This ensures that the processing queue will only be idle when there are no more items to process, and it'll start running again as soon as a new item gets pushed.
For example, with your code, you could do something like this instead:
let processing = false;
async function processNext() {
if (processing) {
return;
}
processing = true;
const e = buf.shift();
const mp3 = await processmp3(e.pathname, e.basename, e.stat);
// do something with parsed mp3?
processing = false;
processNext();
}
async function enqueue(buf) {
for await (const file of scanpaths(process.argv.slice(2))) {
buf.push(file);
processNext();
}
}
Actually I'm not sure that Title of my question is 'correct', if you
have any idea with it, you could leave a comment and I'll rename it.
I am trying to rewrite my old function which make http-requests and insert many object at mongoDB via mongoose. I already have a working version of it, but I face a problem while using it. Basically, because when I'm trying to insertMany 20 arrays from 20+ request with ~50'000 elements from one request it cause a huge memory leak. Even with MongoDB optimization.
Logic of my code:
function main() {
server.find({locale: "en_GB"}).exec(function (err, server) {
for (let i = 0; i < server.length; i++) { //for example 20 servers
rp({url: server[i].slug}).then(response => {
auctions.count({
server: server[i].name,
lastModified: {$gte: response.data.files[0].lastModified}
}).then(function (docs) {
if (docs < 0) {
//We don't insert data if they are already up-to-date
}
else {
//I needed response.data.files[0].url and server[i].name from prev. block
//And here is my problem
requests & insertMany and then => loop main()
})
}
})
}).catch(function (error) {
console.log(error);
})
}
})
}
main()
Actually I have already trying many different things to fix it. First-of-all I was trying to add setInterval after else block like this:
setTimeout(function () {
//request every server with interval, instead of all at once
}, 1000 * (i + 1));
but I create another problem for myself because I needed to recursive my main() function right after. So I can't use: if (i === server[i].length-1) to call garbage collector or to restart main() because not all server skip count validation
Or let's see another example of mine:
I change for (let i = 0; i < server.length; i++) from 3-rd line to .map and move it from 3-rd line close to else block but setTimeout doesn't work with .map version, but as you may already understand script lose correct order and I can't make a delay with it.
Actually I already understand how to fix it at once. Just re-create array via let array_new = [], array_new.push = response.data.files[0].url with use of async/await. But I'm not a big expert in it, so I already waste a couple of hours. So the only problem for now, that I don't know how to return values from else block
As for now I'm trying to form array inside else block
function main() {
--added let array_new = [];
[v1]array_new.url += response.data.files[0].url;
[v2]array_new.push(response.data.files[0].url);
return array_new
and then call array_new array via .then , but not one of these works fine for now. So maybe someone will give me a tip or show me already answered question #Stackoverflow that could be useful in my situation.
Since you are essentially dealing with promises, you can refactor your function logic to use async await as follows:
function async main() {
try {
const servers = await server.find({locale: "en_GB"}).exec()
const data = servers.map(async ({ name, slug }) => {
const response = await rp({ url: slug })
const { lastModified, url } = response.data.files[0]
const count = await auctions.count({
server: name,
lastModified: { $gte: lastModified }
})
let result = {}
if (count > 0) result = { name, url }
return result
}).filter(d => Object.keys(d).length > 0)
Model.insertMany(data)
} catch (err) {
console.error(err)
}
}
Your problem is with logic obscured by your promises. Your main function recursively calls itself N times, where N is the number of servers. This builds up exponentially to eat memory both by the node process and MongoDB handling all the requests.
Instead of jumping into async / await, start by using the promises and waiting for the batch of N queries to complete before starting another batch. You can use [Promise.all] for this.
function main() {
server.find({locale: "en_GB"}).exec(function (err, server) {
// need to keep track of each promise for each server
let promises = []
for (let i = 0; i < server.length; i++) {
let promise = rp({
url: server[i].slug
}).then(function(response) {
// instead of nesting promises, return the promise so it is handled by
// the next then in the chain.
return auctions.count({
server: server[i].name,
lastModified: {
$gte: response.data.files[0].lastModified
}
});
}).then(function (docs) {
if (docs > 0) {
// do whatever you need to here regarding making requests and
// inserting into DB, but don't call main() here.
return requestAndInsert();
}
}).catch(function (error) {
console.log(error);
})
// add the above promise to out list.
promises.push(promise)
}
// register a new promise to run once all of the above promises generated
// by the loop have been completed
Promise.all(promises).then(function () {
// now you can call main again, optionally in a setTimeout so it waits a
// few seconds before fetchin more data.
setTimeout(main, 5000);
})
})
}
main()
I am trying to write a function that:
Takes an array of URLs
Gets files from URLs in parallel (order's irrelevant)
Processes each file
Returns an object with the processed files
Furthermore, I don't need for errors in #2 or #3 to affect the rest of the execution in my application in any way - the app could continue even if all the requests or processing failed.
I know how to fire all the requests in a loop, then once I have all the data, fire the callback to process the files, by using this insertCollection pattern.
However, this is not efficient, as I shouldn't need to wait for ALL files to download before attempting to process them - I would like to process them as each download finishes.
So far I have this code:
const request = require('request');
const urlArray = [urlA, urlB, urlC];
const results = {};
let count = 0;
let processedResult;
const makeRequests = function (urls, callback) {
for (let url of urls) {
request(url, function(error, response, body) {
if (error) {
callback(error);
return;
}
processedResult = callback(null, body)
if (processedResult) {
console.log(processedResult); // prints correctly!
return processedResult;
}
})
}
};
const processResult = function(error, file) {
if (error) {
console.log(error);
results.errors.push(error);
}
const processedFile = file + `<!-- Hello, Dolly! ${count}-->`;
results.processedFiles.push(processedFile);
if (++count === urlArray.length) {
return results;
}
};
const finalResult = makeRequests(urlArray, processResult);
console.log(finalResult); // undefined;
In the last call to processResult I manage to send a return, and makeRequests captures it, but I'm failing to "reign it in" in finalResult after that.
My questions are:
Why is this not working? I can print a well-formed processedResult
on the last iteration of makeRequests, but somehow I cannot return
it back to the caller (finalResult)
How can this be solved, ideally "by hand", without promises or the
help of libraries like async?
The makeRequests function returns undefined to finalResult because that is a synchronous function. Nothing stops the code executing, so it gets to the end of the function and, because there is no defined return statement, it returns undefined as default.
I have some asynchronous code running inside a JavaScript forEach loop. I want to wait until the code inside the asynchronous process has finished running before proceeding after the loop.
Example below:
ids is an array of strings. db is a node module I created to work with MongoDB
var appIdsNotFound = "";
var count = 0;
ids.forEach(function(id) {
output[count] = {};
//console.log(id);
db.findApp(id, function(error, result) {
if(error) {
fatalError = true;
console.log(error);
} else {
if (result) {
output[count] = result;
//console.log(output[count]);
count++;
} else {
appNotFound = true;
appIdsNotFound += id + ", ";
console.log(appIdsNotFound);
}
}
});
});
//more code that we want to wait before executing
Is there a way to wait before executing the rest of the code that is outside the loop, and if so, how would I go about doing that.
Assuming db is some module to access your DB, try to look for the synchronous version. This assumes you are ok with synchronous, since you're attempting to write it that way, waiting for everything before proceeding.
If your db library uses promises, you can use it in conjunction with Promise.all. Fire a request for each item, collect all their promises in an array, feed them to Promise.all. The promise from Promise.all will resolve when all promises resolve.
const promises = ids.map(id => db.promiseReturningFindApp(id));
const allRequests = Promise.all(promises).then(responses => {
// responses is an array of all results
});
If you don't have a promise-returning version of your API, wrap db.findApp in a promise, do suggestion #2.
function promiseReturningFindApp(id){
return new Promise((resolve, reject) => {
db.findApp(id, (error, result) => {
if(error) reject(error);
else resolve(result);
});
});
}
Options 2 and 3 are asynchronous, and as such, you technically don't "wait". Therefore, code that needs to execute after can only reside in a callback.
You could make every item into an function and use async:
var async = require('async');
var output = [], appsNotFound = [];
var appRequests = ids.map((id) => (cb) => {
db.findApp(id, (error, result) => {
if (error) {
appsNotFound.push(id);
return cb();
}
output.push(id);
return cb();
})
})
async.parallel(appRequests, () => {
console.log('N# of Apps found',output.length);
console.log("Ids not found:",appIdsNotFound.join(','))
console.log("N# Apps not found:",appIdsNotFound.length)
})
If the DB don't handle it, try to use async.serial.
You can make something similar with promises if you prefer, but this requires less lines of code.