Node - wait for map to finish before continuing - javascript

I have this file in my node app that supposed to go fetch me some data about every league champion from their official website using cheerio and its going all great but when I add all the data to my array to then return it as json data the write function runs before the map finishes so I just creating a json file with an empty array in it:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const champions = fs.readFileSync('champions.json');
const championsObj = JSON.parse(champions);
let champsList = [];
championsObj.map(champ => {
request(champ.href, (err, res, html) => {
if (!err && res.statusCode == 200) {
const $ = cheerio.load(html);
const champName = $('.style__Title-sc-14gxj1e-3 span').text();
let skins = [];
const skinsList = $('.style__CarouselItemText-sc-1tlyqoa-16').each(
(i, el) => {
const skinName = $(el).text();
skins.push = skinName;
}
);
const champion = {
champName,
skins
};
console.log(champion);
champsList.push = champion;
}
});
});
const jsonContent = JSON.stringify(champsList);
fs.writeFile('champions2.json', jsonContent, 'utf8', function(err) {
if (err) {
console.log(err);
}
});
I'm not a node expert but I tried using Promise but it didn't work but I'm not sure maybe I used it wrong.
UPDATE #1: using axios
championsObj.map(async champ => {
const html = await axios.get(champ.href);
const $ = await cheerio.load(html);
const champName = $('.style__Title-sc-14gxj1e-3 span').text();
let skins = [];
const skinsList = $('.style__CarouselItemText-sc-1tlyqoa-16').each(
(i, el) => {
const skinName = $(el).text();
skins.push = skinName;
}
);
const champion = {
champName,
skins
};
console.log(champion);
champsList.push = champion;
});

you can use await Promise.all(<array>.map(async () => {...}). it does not require any additional dependencies. however you have no guarantees about the order of asynchronous iterations (starting all the iterations in the right order, but no guarantees about iterations' endings).

Your problem here is that Array#map doesn't wait for asynchronous functions such as the request calls to finish before moving on. I recommend p-map with got. To ensure perfect execution order, I also recommend reading and writing the file asynchronously.
const got = require('got');
const pMap = require('p-map');
const cheerio = require('cheerio');
const fs = require('fs').promises;
(async () => {
const champions = JSON.parse(await fs.readFile('champions.json', 'utf8'));
let champsList = await pMap(champions, async champ => {
const {
body
} = await got(champ.href)
const $ = cheerio.load(body);
const champName = $('.style__Title-sc-14gxj1e-3 span').text();
let skins = [];
$('.style__CarouselItemText-sc-1tlyqoa-16').each(
(_, el) => {
const skinName = $(el).text();
skins.push(skinName);
}
);
const champion = {
champName,
skins
};
console.log(champion);
return champion;
})
await fs.writeFile('champions2.json', JSON.stringify(champsList));
})();

Related

How to run a function which call axios for every 30 seconds

I'm creating a web scraper using node, cheerio and calling the website using axios(async/await). I want the function to run every 30 seconds. I tried using setTimeout and setInterval but did not get the expected result. Instead got heap out of memory error. I want to run the mvcAppointmentSearch function in the while loop for every 30 seconds. Following is the code. also attaching the codepen link for better readability.
Code pen link
const express = require('express');
const request = require('request-promise');
const cheerio = require('cheerio');
const axios = require('axios');
const cssSelect = require('css-select');
const open = require('open');
// const mvcUrl = 'https://telegov.njportal.com/njmvc/AppointmentWizard/17/';
const mvcUrl = 'https://telegov.njportal.com/njmvc/AppointmentWizard/14/';
const mvcLocation = ['Edison', 'Rahway', 'SouthPlainfield'];
// const mvcLocationNumber = ['240', '252', '239'];
const mvcLocationNumber = ['163'];
const requiredMonths = ['September', 'October'];
const callUrl = async (url, locationNumberIndex) => {
try {
const response = await axios.get(url);
//console.log('call url', response.data);
getData(response.data, locationNumberIndex);
} catch (err) {
console.log(err);
}
};
const mvcAppointmentSearch = () => {
for (let i = 0; i < mvcLocationNumber.length; i++) {
const currentUrl = mvcUrl + mvcLocationNumber[i];
console.log(mvcLocationNumber[i]);
callUrl(currentUrl, i);
}
};
const getData = (html, locationNumberIndex) => {
let data = [];
let $ = cheerio.load(html);
console.log('datais ', $);
$.prototype.exists = function (selector) {
return this.find(selector).length > 0;
};
const checkerLength = $('div').exists('.alert-danger');
console.log(checkerLength);
if (checkerLength) {
console.log(
`No appointment available in ${mvcLocation[locationNumberIndex]}`
);
} else {
const dateString = $('.control-label').text();
const availableMonth = dateString.trim().split(' ')[7];
const exactDateAvailability = dateString.slice(24, -1);
console.log(availableMonth);
if (requiredMonths.includes(availableMonth)) {
console.log('Hurray there is an appointment available');
const message = `Appointment available for the location ${mvcLocation[locationNumberIndex]} on ${exactDateAvailability}`;
open(`${mvcUrl + mvcLocationNumber[locationNumberIndex]}`);
console.log(message);
} else {
console.log('required Month is not available still searching');
}
}
};
while (true) {
try {
// mvcAppointmentSearch();
// want to run the following function for every 30 seconds.
mvcAppointmentSearch();
} catch (err) {
console.log(`Error has Occured ${err}`);
}
}

How to return REST API response after utility execution is finished in expressJs

I have written one POST endpoint in expressJS with node.when I a make call to API It runs a utility with setInterval() and I want to send the API response after utility executes clearInterval().
How I can I wait and send response after utility execution is finished?
Please see the code below
REST API code:
const router= express.Router();
const multer= require('multer');
const {readCSVFile}= require('../util/index');
var storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, 'uploads');
},
filename: (req, file, cb) => {
cb(null, file.fieldname + '-' + Date.now()+'.xlsx');
}
});
var upload = multer({storage: storage});
router.post('/fileUpload', upload.single('filename'), async (req, res) => {
readCSVFile();
res.status(201).json({id:1});
});
router.get('/',(req,res)=>{
res.sendFile(__dirname+'/index.html');
});
module.exports=router;
Utilty Code
const config = require('config')
const excelToJson = require('convert-excel-to-json')
const HttpsProxyAgent = require('https-proxy-agent')
const AWS = require('aws-sdk')
const json2xls = require('json2xls')
const fs = require('fs')
const awsConfig = {
httpOptions: {
agent: new HttpsProxyAgent(
config.get('aws.proxy')
),
}
}
AWS.config.credentials = new AWS.SharedIniFileCredentials({
profile: config.get('aws.profile'),
})
AWS.config.update(awsConfig)
let uuidv4 = require('uuid/v4')
let csv = [];
const lexRunTime = new AWS.LexRuntime({
region: config.get('aws.region'),
})
let refreshId
const readCSVFile = () => {
const csvSheet = excelToJson({
sourceFile: './Test.xlsx',
})
csvSheet.Sheet1.forEach(element => {
csv.push((element.A.slice(0, element.A.length)))
})
runTask()
refreshId = setInterval(runTask, 1000)
}
let botParams = {
botAlias: config.get('bot.alias'),
botName: config.get('bot.name'),
sessionAttributes: {},
}
const missedUtterancesArray = []
const matchedUtterancesArray = []
let start = 0
let end = 50
let count = 50
const runTask = () => {
let itemsProcessed = 0
console.log('executing...')
const arrayChunks = csv.slice(start, end)
arrayChunks.forEach((element) => {
botParams.inputText = element
botParams.userId = `${uuidv4()}`
lexRunTime.postText(botParams, function (err, data) {
itemsProcessed++
if (err) console.log(err, err.stack)
else {
if (data.intentName === null) {
missedUtterancesArray.push({
Utterance: element,
})
}
else{
matchedUtterancesArray.push({
Utterance: element,
})
}
}
if (itemsProcessed === arrayChunks.length) {
start = csv.indexOf(csv[end])
end = start + count
}
if (start === -1) {
let xls = json2xls(missedUtterancesArray)
fs.writeFileSync('./MissedUtterances.xlsx', xls, 'binary')
let matchedXls = json2xls(matchedUtterancesArray)
fs.writeFileSync('./MatchedUtterances.xlsx', matchedXls, 'binary')
console.log('File saved successfully!! ')
console.log('Total Matched utterances count: ',csv.length-missedUtterancesArray.length)
console.log('Total Missed utterances count: ',missedUtterancesArray.length)
console.log('Total Utterances count: ',csv.length)
clearInterval(refreshId)
}
})
})
}
I would have needed few more information to answer this but pardon my try if this does not work -
the setInterval method in the readCSVFile the reason. Being an asynchronous function, this will not stop the code progression.
lexRunTime.postText also looks like asynchronous. I think you'd be better off with using promises while responding to the client.

Making a web-crawler to have loop

I tried to make my web-crawler to have a loop to crawl the webpage from 1 to around 500. But the result does not include any directed one but to return an only void array.
This code is based on cheerio, jQuery, and axios. JavaScript.
const axios = require("axios");
const cheerio = require("cheerio");
const log = console.log;
const getHtml = async() => {
var i=0
while (i<493){
try {
return await axios.get("https://playentry.org/ds#!/qna?sort=created&rows=20&page="+i);
} catch (error) {
console.error(error);
}
}
};
getHtml()
.then(html => {
let ulList = [];
const $ = cheerio.load(html.data);
const $bodyList = $("div.discussContentWrapper div.discussListWrapper table.discussList").children("tr.discussRow");
$bodyList.each(function(i, elem){
ulList[i] = {
title:$(this).find('td.discussTitle div.discussTitleWrapper'),
writer:$(this).find('td.discussTitle td.discussViewCount'),
viewcount:$(this).find('td.discussTitle td.discussViewCount'),
likecount:$(this).find('td.discussTitle div.discussLikeCount'),
date:$(this).find('td.discussTitle td.discussDate'),
};
});
const data = ulList.filter(n => n.title);
return data;
})
.then(res => log(res));
The output is '''[]''' or '''[ [] ]''' with no real outputs.
Thanks for your help in advance.

How to execute those 2 code snippets asynchronously/parallell

I am beginner at javascript so please bear with me. I wonder how to put async() event in the right way.
I have 2 code snippets that I want to execute asynchronously and not synchronously. The code snippets use a library that do HTTP requests so that is out of my control.
So I like the 2 code snippets to execute in parallell somehow. What I have are those 2 code snippets and I also think I understand that I only want to declare the first 2 lines once as those lines takes time:
'use strict';
const ccxt = require ('ccxt');
The 2 code snippets are the below
Code snippet1:
'use strict';
const ccxt = require ('ccxt');
(async () => {
try{
const exchange = new ccxt.one({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
fs.writeFile("/myproject/file1.txt", JSON.stringify(obj), function(err) { });
}catch{}
}) ()
Code snippet2:
'use strict';
const ccxt = require ('ccxt');
(async () => {
try{
const exchange = new ccxt.two({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
fs.writeFile("/myproject/file2.txt", JSON.stringify(obj), function(err) { });
}catch{}
}) ()
I tried this code and it actually did it in parallell. It executed very fast.
If you have any idéas of code to add to make it even more efficient I would be very happy to hear how to do that. (For example open up more ports or any other bottlenecks?)
'use strict';
const ccxt = require ('ccxt');
(async () => {
try{
const exchange = new ccxt.one({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
fs.writeFile("/myproject/file1.txt", JSON.stringify(obj), function(err) { });
}catch{}
}) ();
(async () => {
try{
const exchange = new ccxt.two({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
fs.writeFile("/myproject/file2.txt", JSON.stringify(obj), function(err) { });
}catch{}
}) ();
Use Promise.all:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/all
'use strict';
const ccxt = require ('ccxt')
const fs = require('fs')
async function work (exchangeId) {
try {
const exchange = new ccxt[exchangeId] ({ enableRateLimit: true })
const tickers = await exchange.fetchTickers ()
const obj = { tickers }
const filename = exchangeId + '.txt'
fs.writeFileSync (filename, JSON.stringify (obj))
console.log ('saved', filename)
} catch {
}
}
(async () => {
const exchangeIds = [ 'bittrex', 'bitfinex' ]
await Promise.all (exchangeIds.map (exchangeId => work (exchangeId)))
}) ()
It's not clear to me what you want to happen but your code will not catch all errors as written. I know you seemed to be ignoring all errors but just in case, ...
If you're going to use async/await then you should go all in. That means you should use fs.promises.readFile not fs.readFile. Either that or you should wrap fs.readFile in a promise either manually or using util.promisify.
So the code becomes
'use strict';
const ccxt = require ('ccxt');
const thing1 = (async () => {
try{
const exchange = new ccxt.one({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
await fs.promises.writeFile("/myproject/file1.txt", JSON.stringify(obj));
} catch {
// catch error here
}
}) ();
const thing2 = (async () => {
try{
const exchange = new ccxt.two({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
await fs.promises.writeFile("/myproject/file2.txt", JSON.stringify(obj));
} catch {
// catch error here
}
}) ();
If wanted to do both to wait for both things then you could use Promise.all by passing in an array that contain each of the promises returned by both async functions.
'use strict';
const ccxt = require ('ccxt');
const thing1 = (async () => {
try{
const exchange = new ccxt.one({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
await fs.promises.writeFile("/myproject/file1.txt", JSON.stringify(obj));
} catch {
// catch error here
}
}) ();
const thing2 = (async () => {
try{
const exchange = new ccxt.two({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
await fs.promises.writeFile("/myproject/file2.txt", JSON.stringify(obj));
} catch {
// catch error here
}
}) ();
(async() => {
await Promise.all([thing1, thing2]);
// do something after thing1 and thing2
}) ();
And of course at given the 2 functions are the same except for the filename then
'use strict';
const ccxt = require ('ccxt');
async function fetchTickersAndWrite({method, filename}) {
try{
const exchange = new ccxt[method]({ enableRateLimit: true })
const tickers = await exchange.fetchTickers()
const obj = { tickers }
const fs = require('fs');
await fs.promises.writeFile(filename, JSON.stringify(obj));
} catch {
// catch error here
}
}
(async() => {
await Promise.all([
{ method: 'one', filename: `/myproject/file1.txt` },
{ method: 'two', filename: `/myproject/file2.txt` },
].map(fetchTickersAndWrite));
// do something
}) ();

Node.js Call a method after another method is completed

I would like to call my "app.get('/news/news-desc', (req, res)" method after "app.get('/news/api/:newsName', function(req, res)" is completed.
Here is my code:
let articleUrlArray = [];
app.get('/news/api/:newsName', function(req, res) {
const API_KEY = 'example';
let data = '';
const techCrunchURL = `https://newsapi.org/v2/top-headlines?sources=techcrunch&apiKey=${API_KEY}`
switch(req.params.newsName) {
case 'tech-crunch':
request(techCrunchURL, function(err, response, html) {
let formattedData = JSON.parse(response.body);
for(let i = 0; i < formattedData.articles.length; i++) {
articleUrlArray.push(formattedData.articles[i].url);
}
data = response.body;
res.setHeader('Content-Type', 'application/json');
res.send(data);
});
break;
default:
data = 'Please type in correct news source';
break;
}
})
const checkBody = res => (err, response, html) => {
const $ = cheerio.load(html);
const articleContent = $('.article-content').children('p')
const bodyOne = articleContent.eq(0).text()
const bodyTwo = articleContent.eq(1).text()
const isExtensive = bodyOne.split(' ').length > 50
res(isExtensive ? { bodyOne } : { bodyOne, bodyTwo })
}
const getArticle = article => new Promise(res => request(article, checkBody(res)))
app.get('/news/news-desc', (req, res) => {
Promise.all(articleUrlArray.map(getArticle)).then(data => res.send(JSON.stringify(data)))
})
As you can see, the first method calls the "newsapi.org" and gets 10 articles. Then it would only extract the urls of those articles and push them into articleUrlArray.
After the urls have been pushed into the articleUrlArray, it would look like this:
let articleUrlArray = [ 'https://techcrunch.com/2018/05/19/shared-housing-startups-are-taking-off/',
'https://techcrunch.com/2018/05/19/shared-housing-startups-are-taking-off/',
'https://techcrunch.com/2018/05/19/my-data-request-lists-guides-to-get-data-about-you/',
'https://techcrunch.com/2018/05/19/siempos-new-app-will-break-your-smartphone-addiction/',
'https://techcrunch.com/2018/05/19/la-belle-vie-wants-to-compete-with-amazon-prime-now-in-paris/',
'https://techcrunch.com/2018/05/19/apple-started-paying-15-billion-european-tax-fine/',
'https://techcrunch.com/2018/05/19/original-content-dear-white-people/',
'https://techcrunch.com/2018/05/19/meet-the-judges-for-the-tc-startup-battlefield-europe-at-vivatech/',
'https://techcrunch.com/2018/05/18/nasas-newest-planet-hunting-satellite-takes-a-stellar-first-test-image/',
'https://techcrunch.com/video-article/turning-your-toys-into-robots-with-circuit-cubes/',
'https://techcrunch.com/2018/05/18/does-googles-duplex-violate-two-party-consent-laws/' ];
It would just be filled up with urls.
Then the second method, would use the filled up articleUrlArray to do its own thing.
However, currently for my code, the second method runs first before the articleUrlArray has been filled up.
I would like to run the second method after the first method completes and the articleUrlArray has been filled up with urls.
Could you please help me with this?
let articleUrlArray = [];
const addArticleUrl = url => articleUrlArray.push(url)
const checkBody = res => (err, response, html) => {
const $ = cheerio.load(html);
const articleContent = $('.article-content').children('p')
const bodyOne = articleContent.eq(0).text()
const bodyTwo = articleContent.eq(1).text()
const isExtensive = bodyOne.split(' ').length > 50
res(isExtensive ? { bodyOne } : { bodyOne, bodyTwo })
}
const getArticle = article => new Promise(res => request(article, checkBody(res)))
const newsDescMiddleware = app.get('/news/news-desc', (req, res) => {
Promise.all(articleUrlArray.map(getArticle)).then(data => res.send(JSON.stringify(data)))
})
const techCrunch = res => url => request(url, (err, response, html) => {
let formattedData = JSON.parse(response.body);
formattedData.articles.forEach(article => addArticleUrl(article.url))
res(response.body)
})
const getNewsByName = (newsName, url) => new Promise((res, reject) => ({
'tech-crunch': techCrunch(res)(url)
}[newsName])) || reject()
const getNewsByNameMiddleware = (req, res) => {
const API_KEY = 'example';
const techCrunchURL = `https://newsapi.org/v2/top-headlines?sources=techcrunch&apiKey=${API_KEY}`
getNewsByName(req.params.newsName, url)
.then(body => {
res.setHeader('Content-Type', 'application/json');
res.send(body)
})
.catch(() => res.send('Please type in correct news source'))
}
app.get('/news/api/:newsName', getNewsByNameMiddleware, newsDescMiddleware)
Here, I made you some middlewares.
I am assuming that you don't need the response of the previous middleware.
I like to split the code by its responsibilities and write it functionally.
You can separate the core logic of the first route to a function and re-use it in both places, if you please. however you still need to provide newsName parameter to GET '/news/news-desc' endpoint.
Example for your code.
let articleUrlArray = [];
function getNewsNames(newsName, callback) {
const API_KEY = 'example';
let data = '';
const techCrunchURL = `https://newsapi.org/v2/top-headlines?sources=techcrunch&apiKey=${API_KEY}`
switch (newsName) {
case 'tech-crunch':
request(techCrunchURL, function (err, response, html) {
let formattedData = JSON.parse(response.body);
for (let i = 0; i < formattedData.articles.length; i++) {
articleUrlArray.push(formattedData.articles[i].url);
}
data = response.body;
callback(null, data);
});
break;
default:
data = 'Please type in correct news source';
callback('Error', data);
break;
}
}
app.get('/news/api/:newsName', function (req, res) {
getNewsNames(req,params.newsName, (err, data) => {
if (!err) {
res.setHeader('Content-Type', 'application/json');
}
return res.send(data);
})
})
const checkBody = res => (err, response, html) => {
const $ = cheerio.load(html);
const articleContent = $('.article-content').children('p')
const bodyOne = articleContent.eq(0).text()
const bodyTwo = articleContent.eq(1).text()
const isExtensive = bodyOne.split(' ').length > 50
res(isExtensive ? { bodyOne } : { bodyOne, bodyTwo })
}
const getArticle = article => new Promise(res => request(article, checkBody(res)))
app.get('/news/news-desc/:newsName', (req, res) => {
getNewsNames(req.params.newsName, (err, data) => {
// by now, the articleUrlArray array will be filled
Promise.all(articleUrlArray.map(getArticle)).then(data => res.send(JSON.stringify(data)))
})
})

Categories

Resources