I am trying to run the node js Lighthouse function serially (one at a time) with an array of URLs. My problem is that whenever I loop through the array, Lighthouse runs all the URLs at once, which I imagine is problematic if you have a very large array of URLs.
The code:
for(let url of urls) {
function launchChromeAndRunLighthouse(url, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return lighthouse(url, opts, config).then(results => {
return chrome.kill().then(() => results.lhr)
});
});
}
}
launchChromeAndRunLighthouse('https://example.com', opts).then(results => {
// Use results!
});
Please help! And thank you for your time!
Your answer is correct but it can be improved. Since you have access to async and await, you should fully utilize it to make your code cleaner:
async function launchChromeAndRunLighthouse (url, opts, config = null) {
const chrome = await chromeLauncher.launch({chromeFlags: opts.chromeFlags});
opts.port = chrome.port;
const { lhr } = await lighthouse(url, opts, config);
await chrome.kill();
return lhr;
}
async function launchAudit (urls) {
for (const url of urls) {
const results = await launchChromeAndRunLighthouse(url, opts);
// Use results!
};
}
launchAudit(urls);
I believe I figured it out. What I did is below. Please continue to send feedback if you think this is wrong.
function launchChromeAndRunLighthouse(url, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return lighthouse(url, opts, config).then(results => {
return chrome.kill().then(() => results.lhr)
});
});
};
async function launchAudit(urls) {
for (let url of urls) {
await launchChromeAndRunLighthouse(url, opts).then(results => {
// Use results!
});
};
};
launchAudit(urls);
A variation on Patric Roberts answer (which should be the accepted answer).
I was wondering if it was necessary to kill chrome every iteration.
const lighthouse = require('lighthouse');
const chromeLauncher = require('chrome-launcher');
function launchChromeAndRunLighthouse(sites, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
const siteResults = [];
return new Promise((resolve, reject) => {
// batch async functions.
// C/O https://stackoverflow.com/questions/43082934/how-to-execute-promises-sequentially-passing-the-parameters-from-an-array
const runBatch = async (iterable, action) => {
for (const x of iterable) {
await action(x)
}
}
// func to run lighthouse
const doLightHouse = (site) => new Promise((resolve, reject) => {
lighthouse(site, opts, config).then(results => {
siteResults.push(results.lhr);
resolve();
});
});
// go go go
runBatch(sites, doLightHouse).then(d => {
chrome.kill().then((result) => {
resolve(siteResults)
})
});
});
});
}
const opts = {
chromeFlags: ['--show-paint-rects'],
onlyCategories: ['performance']
};
const sites = ['https://www.example.com', 'https://www.test.com']
launchChromeAndRunLighthouse(sites, opts).then(results => {
// Use results!
console.log(results);
});
Just to execute your code as test, we'll use async/await and IIFE
Then, will create function which will put all our request to array of non resolved promises, so we could use it with Promise.all()
You need to rewrite code in something like this:
(async() => {
const promisesToExecute = [];
const launchChromeAndRunLighthouse = async (url, opts, config = null) => {
const chrome = await return chromeLauncher.launch({chromeFlags: opts.chromeFlags});
opts.port = chrome.port;
promisesToExecute.push(lighthouse(url, opts, config));
}
const results = await Promise.all(promisesToExecute);
for(const result of results) {
const resolvedResult = await result.kill();
// here you can access your results.lhr
console.log(resolvedResult.lhr);
}
})()
Please note, this code wasn't tested, so there might be problems with kill() on result. But, the main goal is to answer your question and explain how to execute promises.
Also, if you don't want to execute all promises at the same time, you could use Promise.waterfall with some npm package, like this
Related
I have a function that reads files in a directory asynchronously (readdir) and filters for csv files. I also have an async function that calls readdir filtered for csv files and then iterates through them with fast-csv. Logging to the console the list and its length within the .on('end') function, I can see that they produce the desired results. however, my async call only resolves the first iteration.
const fs = require(`fs`);
const path = require(`path`);
const csv = require(`fast-csv`);
var ofsActivities = [];
const currDir = path.join(__dirname + `/../Downloads/`);
const readdir = async dirname => {
return new Promise((resolve, reject) => {
fs.readdir(dirname, (error, filenames) => {
error ? reject(error) : resolve(filenames);
});
});
};
const filtercsvFiles = (filename) => {
return filename.split(`.`)[1] == `csv`;
};
const ofsDataObjectArray = async () => {
return readdir(currDir).then(async filenames => {
return await new Promise((resolve, reject) => {
filenames = filenames.filter(filtercsvFiles);
for (let i = 0; i < filenames.length; i++) {
let currFilePath = currDir + filenames[i];
console.log(`Reading File: ${filenames[i]}`);
csv
.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, error => reject(error))
.on(`end`, () => resolve(ofsActivities)); //Inserting a console.log(ofsActivities.length) logs the correct and expected length on the last iteration
}
});
});
};
(async () => {
let list = await ofsDataObjectArray(); // This seems to only resolve the first iteration within the promise
console.log(list.length);
})();
You need to call resolve() only when the LAST csv.parseFile() is done. You're calling it when the FIRST one is done, thus the promise doesn't wait for all the others to complete. I'd suggest you promisify csv.parseFile() by itself and then await that inside the loop or accumulate all the promises from csv.parseFile() and use Promise.all() with all of them.
Here's using await on each csv.parseFile():
const ofsDataObjectArray = async () => {
return readdir(currDir).then(async filenames => {
filenames = filenames.filter(filtercsvFiles);
for (let i = 0; i < filenames.length; i++) {
let currFilePath = currDir + filenames[i];
console.log(`Reading File: ${filenames[i]}`);
await new Promise((resolve, reject) => {
csv.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, reject)
.on(`end`, () => resolve(ofsActivities));
});
}
return ofsActivities;
});
};
Or, here's running them in parallel with Promise.all():
const ofsDataObjectArray = async () => {
return readdir(currDir).then(filenames => {
filenames = filenames.filter(filtercsvFiles);
return Promise.all(filenames.map(file => {
let currFilePath = currDir + file;
console.log(`Reading File: ${file}`);
return new Promise((resolve, reject) => {
csv.parseFile(currFilePath)
.on(`data`, (data) => {
//Doing stuff
})
.on(`error`, error => reject(error))
.on(`end`, () => resolve(ofsActivities));
});
}))
});
};
P.S. It's unclear from your question what final result you're trying to accumulate (you have left that out) so you will have to add that to this code in the "doing stuff" code or by modifying the resolve(something) code.
I couldn't get my for loop to run synchronously.The order of registration that it returns for each domain changes. I guess whichever answers the fastest brings those records first. Where am I doing wrong?
const rrtypes= ["A","MX","CNAME","NS","TXT"];
var resData = [];
export const getAllRecords = async (req,res) => {
const {domain} = req.params;
for await(const rrtype of rrtypes){
dns.resolve (domain, rrtype, (err, records) => {
resData.push(rrtype+" "+records);
});
}
res.send(resData);
resData = [];
}
Notice that you can use the dns.promises.resolve function: https://nodejs.org/api/dns.html#dns_dnspromises_resolve_hostname_rrtype
so you would change your code to:
const rrtypes = ["A", "MX", "CNAME", "NS", "TXT"];
export const getAllRecords = async (req, res) => {
const { domain } = req.params;
// Notice that resData should be local to this function
let resData = [];
for (const rrtype of rrtypes) {
try {
const records = await dns.promises.resolve(domain, rrtype);
resData.push(rrtype + " " + records);
// IMO this would be better: resData.push({ type: rrtype, value: records });
} catch (err) {
// Log the error here
}
}
res.send(resData);
};
The problem lies in your use of await. await should be used when calling async functions. At the moment you are using it when instantiating variables from an array.
Thanks to a comment by slebetman I was able to see this dns.resolve does not return a promise but uses callbacks to manage the async calls. As he also suggested we can fix this by creating a promise to manage these callbacks.
const rrtypes= ["A","MX","CNAME","NS","TXT"];
var resData = [];
const dnsResolvePromise = (domain, rrtype) => {
return new Promise((resolve, reject) => {
dns.resolve(domain, rrtype, (err, records) => {
if(err) return reject(err);
resolve(rrtype+" "+records);
});
})
}
export const getAllRecords = async (req,res) => {
const {domain} = req.params;
for(const rrtype of rrtypes){
try{
const records = await dnsResolvePromise(domain, rrtype);
resData.push(records);
} catch (e){
// Handle error
}
}
res.send(resData);
resData = [];
}
The Problem is with the uplines.push.
I always get an empty uplines array so the last part of the code doesn't run. The promises resolve later and I get the correct data. May I know how to go about doing it the correct way?
const getAllUplines = async () => {
uplines = [];
const findUser = async (userFid) => {
const userDoc = await firestore.collection("users").doc(userFid).get();
if (userDoc.exists) {
const user = { ...userDoc.data(), id: userDoc.id };
console.log(user);
uplines.push(user);
if (user.immediateUplineFid) {
findUser(user.immediateUplineFid); //self looping
}
} else {
console.log("No User Found");
return null;
}
};
sale.rens.forEach(async (ren) => {
findUser(ren.userFid);
});
console.log(uplines);
return uplines;
};
let uplines = await getAllUplines();
console.log(uplines);
uplines = uplines.filter(
(v, i) => uplines.findIndex((index) => index === v) === i
); //remove duplicates
uplines.forEach((user) => {
if (user.chatId) {
sendTelegramMessage(user.chatId, saleToDisplay, currentUser.displayName);
console.log("Telegram Message Sent to " + user.displayName);
} else {
console.log(user.displayName + " has no chatId");
}
});
There are a few things that you have missed out while implementing the async call, which are explained in the inline comments in the code snippet.
A short explanation for what happened in your code is that in the line sale.rens.forEach you are passing an async function in the argument, which does not make any difference to the function forEach, it will execute it without waiting for it to complete.
Therefore in my answer I am using Promise.all to wait for all the async function calls to complete before returning the result.
// This is wrapped in an immediately executed async function because await in root is not supported here
(async () => {
const mockGetData = () => new Promise(resolve => setTimeout(resolve, 1000));
const sale = {
rens: [
{ userFid: 1 },
{ userFid: 2 },
{ userFid: 3 }
]
};
const getAllUplines = async () => {
const uplines = [];
const findUser = async (userFid) => {
// Simulating an async function call
const userDoc = await mockGetData();
console.log("User data received");
uplines.push(`User ${userFid}`);
};
const promises = [];
sale.rens.forEach(ren => { // This function in foreach does not have to be declared as async
// The function findUser is an async function, which returns a promise, so we have to keep track of all the promises returned to be used later
promises.push(findUser(ren.userFid));
});
await Promise.all(promises);
return uplines;
};
let uplines = await getAllUplines();
console.log(uplines);
})();
In order to get the results of getAllUplines() properly, you need to add await to all async functions called in getAllUplines().
const getAllUplines = async () => {
uplines = [];
const findUser = async (userFid) => {
const userDoc = await firestore.collection("users").doc(userFid).get();
if (userDoc.exists) {
const user = { ...userDoc.data(), id: userDoc.id };
console.log(user);
uplines.push(user);
if (user.immediateUplineFid) {
await findUser(user.immediateUplineFid); //self looping
}
} else {
console.log("No User Found");
return null;
}
};
sale.rens.forEach(async (ren) => {
await findUser(ren.userFid);
});
console.log(uplines);
return uplines;
};
I know that old school for loop works in the traditional way - that it waits for the await to finish getting results.
But in my use case, I need to read a file from local/s3 and process it line by line, and for each line I need to call an External API.
Generally I use await inside the loop because all are running inside a lambda and I don't want to use all memory for running it parallelly.
Here I am reading the file using a stream.on() method, and in order to use await inside that, I need to add async in read method, like so:
stream.on('data',async () =>{
while(data=stream.read()!==null){
console.log('line');
const requests = getRequests(); // sync code,no pblms
for(let i=0;i<requests.length;i++){
const result = await apiCall(request[i);
console.log('result from api')
const finalResult = await anotherapiCall(result.data);
}
}
});
This is working but order in which the lines are processed is not guaranteed. I need all in a sync manner. Any help?
Complete Code
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
console.log('Read line');
console.log(id);
try {
for (let i = 0;i < XMLRequests.length;i++) {
totalRequestsCount++;
console.log('Sending request');
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log("got response");
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
failedRequestsCount++;
} else {
successRequestsCount++;
console.log('Response from the Lost Server');
console.log(response[i].data);
}
}
} catch (err) {
console.log(err);
}
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
});
}
async function sendLoSTRequest (options, params) {
const { axios } = options;
const { url, data } = params;
if (url) {
return axios.post(url, data);
// eslint-disable-next-line no-else-return
} else {
console.log('URL is not found');
return null;
}
}
Code needs to flow like so:
read a line in a sync way
process the line and transform the line into an array of two members
for every member call API and do stuff
once line is complete, look for another line, all done in order
UPDATE: Got a workaround..but it fires stream.end() without waiting stream to finish read
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const { ERR_DIR, fs, xml2js, LOST_URL, byline, event } = options;
const { key } = params;
const responseObject = {};
let totalRecordsCount = 0;
let totalRequestsCount = 0;
let failedRequestsCount = 0;
let successRequestsCount = 0;
let dataObject = {};
const queue = (() => {
let q = Promise.resolve();
return fn => (q = q.then(fn));
})();
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
// eslint-disable-next-line no-loop-func
queue(async () => {
try {
for (let i = 0;i < XMLRequests.length;i++) {
console.log('Sending request');
console.log(id);
totalRequestsCount++;
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log('got response');
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
// console.log('Response have the error:');
// await handleError(options, { err: responseObj, id });
failedRequestsCount++;
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
} else {
console.log('Response from the Lost Server');
console.log(response[i].data);
successRequestsCount++;
}
}
} catch (err) {
console.log(err);
}
});
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
Object.assign(responseObject, {
failedRequestsCount,
successRequestsCount,
totalRecordsCount,
totalRequestsCount
});
});
}
Thank You
The sample code at the top of your question could be rewritten like
const queue = (() => {
let q = Promise.resolve();
return (fn) => (q = q.then(fn));
})();
stream.on('data', async() => {
while (data = stream.read() !== null) {
console.log('line');
const requests = getRequests(); // sync code,no pblms
queue(async () => {
for (let i = 0; i < requests.length; i++) {
const result = await apiCall(request[i]);
console.log('result from api');
const finalResult = await anotherapiCall(result.data);
}
});
}
});
Hopefully that will be useful for the complete code
If anyone want a solution for synchronisely process the file, ie, linebyline read and execute some Async call, it's recommended to use inbuilt stream transform. There we can create a transform function and return a callback when finishes.
That's will help of any one face this issues.
Through2 is a small npm library that also can be used for the same.
I have an API to call every 2.5 seconds. Initially, the data inside the response object is null as the database is still updating it through a transaction. But on the subsequent 3rd or 4th try, I get the data. I am writing a reusable function for the same, however I get undefined. My goal is to keep calling the API until I get the value in my path and close the connection. Please advice.
P.S: The below API URL doesnt have any delay, but my private API has.
const getData = (url, path) => {
const interval = setInterval(async () => {
const result = await axios.get(url);
if (_.has(result.data, path) && result.data[path]) {
return result[path]
}
}, 2500)
return clearInterval(interval)
}
getData('https://api.oceandrivers.com/static/resources.json', 'swaggerVersion')
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.15/lodash.js"></script>
JS Fiddle URL
Please advice.
You
return clearInterval(interval); // undefined
If you want to return a Promise which will resolve when the data is available, you could do something like this:
const getData = (url, path) => {
return new Promise((resolve, reject) => {
const interval = setInterval(async() => {
const result = await axios.get(url);
if (_.has(result.data, path) && result.data[path]) {
clearInterval(interval); // Clear the interval
resolve(result.data[path]); // Resolve with the data
}
}, 2500);
});
}
getData('https://api.oceandrivers.com/static/resources.json', 'swaggerVersion')
.then(data => {
console.log(data); // Your data is available here
});
// OR
(async () => {
const version = await getData('https://api.oceandrivers.com/static/resources.json', 'swaggerVersion');
console.log(version);
})();
Its because javascript is asynchronous as above comment is already mentioned. You can use either callbacks or promise in javascript. Here is the code:
const getData = (url, path, cb) => {
const interval = setInterval(async () => {
const result = await axios.get(url);
if (_.has(result.data, path) && result.data[path]) {
clearInterval(interval); //We found it remove the interval
cb(result.data[path]);
}
}, 2500);
};
getData(
"https://api.oceandrivers.com/static/resources.json",
"swaggerVersion",
data => {
console.log("test",data);
}
);
Here is the fiddle: https://jsfiddle.net/7pc4hq6t/3/
You could create an asynchronous delay:
const delay = milliseconds => new Promise(resolve, setTimeout(resolve, milliseconds));
Then use like this:
const getDataAsync = async (url, path) => {
while (true) {
const result = await axios.get(url);
if (_.has(result.data, path) && result.data[path]) {
return result.data[path];
}
await delay(2500);
}
}
const data = await getDataAsync('https://api.oceandrivers.com/static/resources.json', 'swaggerVersion');
This avoids the multiple layers of nested callbacks, and produces much more readable code.