Requests with request-promise pipe result (Large data) - javascript

I try to download some data from an external API. I would like to pipe the response of every request. The array including the request URLs looks like this :
[ 'https://scihub.copernicus.eu/dhus/odata/v1/Products(\'d98b8730-846f-46d0-a816-5ae4db9f56a7\')/$value',
'https://scihub.copernicus.eu/dhus/odata/v1/Products(\'6edaeb16-3077-45d1-b3f0-fa2d5549f64a\')/$value',
'https://scihub.copernicus.eu/dhus/odata/v1/Products(\'333db2aa-c695-4753-8bd1-e64308af26e1\')/$value',
'https://scihub.copernicus.eu/dhus/odata/v1/Products(\'052cf771-6c4e-4a3a-bc15-51c95a3f37c4\')/$value' ]
I read that request-promise does not support to pipe the request response but I have not found an alternative, that's why the function with which I am trying to get the results from looks as follows:
var fs = require('fs');
var rp = require('request-promise');
function downloadSentinel(promObj){
return new Promise((resolve,reject) => {
try {
var promises = promObj.requestURLS.map(url => rp(url,{auth:auth}).then(body => body.pipe(fs.createWriteStream('./test.zip'))
.on('finish', () => {
resolve(promObj);
})), {concurrency:2});
Promise.all(promises).then(results => {
console.log(results)
});
} catch (error) {
reject(error);
}
})
}
Additionally, it is only possible to download two Products at the same time, I tried to achieve it with the Blubird parameter concurrency, but it doesn't seem to work properly.
How could I solve my problem?
UPDATE
If I try it with this code:
var promises = promObj.requestURLS.map(url => rp(url,{auth:auth}).then(
data => new Promise((resolve,reject) => {
data.pipe(fs.createWriteStream('./data/' + promObj.Name[0] + ".zip"))
.on('finish', () => {
console.log('Finally finished');
resolve(promObj);
})
.on('error', () => {
reject(promObj);
})})),{concurrency:2});
Promise.all(promises).then(results => {
console.log(results)
});
I get the error UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 2): ReferenceError: data.pipe is not a function
What have I missed?
Additionally, I recognized that my data is 800MB large. Is this too large to pipe it without an error?
I get this error now:
In buffer.js:556
if (encoding === undefined) return buf.utf8Slice(start, end);
Error: "toString()" failed

Related

How to send bulk get requests using nodejs?

I wrote a web crawler with nodejs to send get requests to about 300 urls.
Here is the main loop:
for (let i = 1; i <= 300; i++) {
let page= `https://xxxxxxxxx/forum-103-${i}.html`
await getPage(page,(arr)=>{
console.log(`page ${i}`)
})
}
Here is the function getPage(url,callback):
export default async function getPage(url, callback) {
await https.get(url, (res) => {
let html = ""
res.on("data", data => {
html += data
})
res.on("end", () => {
const $ = cheerio.load(html)
let obj = {}
let arr = []
obj = $("#threadlisttableid tbody")
for (let i in obj) {
if (obj[i].attribs?.id?.substr(0, 6) === 'normal') {
arr.push(`https://xxxxxxx/${obj[i].attribs.id.substr(6).split("_").join("-")}-1-1.html`)
}
}
callback(arr)
console.log("success!")
})
})
.on('error', (e) => {
console.log(`Got error: ${e.message}`);
})
}
I use cheerio to analyze HTML and put all information i need to variable nameed 'arr'.
The program will report an error after running normally for a period of time,like that:
...
success!
page 121
success!
page 113
success!
page 115
success!
Got error: connect ETIMEDOUT 172.67.139.206:443
Got error: connect ETIMEDOUT 172.67.139.206:443
Got error: connect ETIMEDOUT 172.67.139.206:443
Got error: connect ETIMEDOUT 172.67.139.206:443
Got error: connect ETIMEDOUT 172.67.139.206:443
Got error: connect ETIMEDOUT 172.67.139.206:443
I have two questions:
1.What is the reason for the error? Is it because I am sending too many get requests? How can I limit the request frequency?
2.As you can see, The order in which the pages are accessed is chaotic,how to control them?
I have tried using other modules to send get request (such as Axios) but it didn't work.
The http requests are fired simultaneously because the loop is not waiting for the previous request due to wrong use of await. Proper control of loop will limit the request frequency.
for (let i = 1; i <= 300; i++) {
let page= `https://xxxxxxxxx/forum-103-${i}.html`
var arr = await getPage(page);
// use arr in the way you want
console.log(`page ${i}`);
}
export default async function getPage(url) {
// Declare a new promise, wait for the promise to resolve and return its value.
return await new Promise((reso, rej) => {
https.get(url, (res) => {
let html = ""
res.on("data", data => {
html += data
})
res.on("end", () => {
const $ = cheerio.load(html)
let obj = {}
let arr = []
obj = $("#threadlisttableid tbody")
for (let i in obj) {
if (obj[i].attribs?.id?.substr(0, 6) === 'normal') {
arr.push(`https://xxxxxxx/${obj[i].attribs.id.substr(6).split("_").join("-")}-1-1.html`)
}
}
reso(arr) // Resolve with arr
console.log("success!")
})
})
.on('error', (e) => {
console.log(`Got error: ${e.message}`);
throw e;
})
})
}
As you can see, The order in which the pages are accessed is chaotic,how to control them?
await is meaningless unless you put a promise on the right hand side. http.get does not deal in promises.
You could wrap it in a promise but it would be easier to use an API which supports then natively such as node-fetch, axios, or Node.js's native fetch. (That all have APIs that are, IMO, easier to use than http.get in general nor just with regards to flow control).
What is the reason for the error?
It isn't clear.
Is it because I am sending too many get requests?
That is a likely hypothesis.
How can I limit the request frequency?
Once you have your for loop working with promises so the requests are sent in serial instead of parallel, you can insert a sleep between each request.

Wait for response from request before returning

I am trying to create a function with a GET request that returns a portion of the data from the GET request. However, it keeps returning before the data is retrieved, so I keep getting "undefined". How can I set this up so it actually waits for the data to be set before returning?
let getInfo = async () => {
const request = net.request({
url: URL
})
return new Promise((resolve, reject) => { // Promise being here DOES work
request.on('response', (response) => {
response.on('data', (chunk) => {
//return new Promise((resolve, reject) => { //Promise being here does NOT work
let body = JSON.parse(chunk)
let info = body.data
if (info){
resolve(info);
}
reject();
//})
});
});
request.write('')
request.end()
}).then(data => {
console.log("From then: "+data)
return data
})
}
getInfo().then(data => {
console.log("From outside: "+data)
})
Edit: This is the updated version that still does not work. I am trying to use the native electron method and I don't see why this doesn't work. The "From then:" part displays the info correctly. But when run "From outside:" it prints undefined. Does the issue have anything to do with the response.on being nested inside the request.on?
Solution: As #NidhinDavid showed in his answer, the issue was that the promise was inside the 'response' listener. Moving the 'GET' request from start to finish inside the Promise fixed it to giving the correct output. I have updated my code to reflect that for future individuals.
let getInfo = () => {
let info;
const request = net.request({
url: URL
})
return new Promise((resolve, reject) => {
request.on('response', (response) => {
response.on('data', (chunk) => {
request.write('')
request.end()
let body = JSON.parse(chunk)
info = body.data
if (info) {
resolve(info)
} else {
reject('Something went wrong');
}
});
});
})
}
getInfo()
.then(data => {
// this will be your info object
console.log(data)
})
.catch(err => {
// this will log 'Something went wrong' in case of any error
console.log(err)
})
You need to return inside your, on type event handler. Read more about asynchronous code and synchronous code here
I couldn't find the net module and the one which is included with Nodejs do not have request method. So to get the similar concept of event emiters and promise I am using http module and doing a http request to fetch json and parse it
'use strict'
var https = require('https');
const getInfo = async () => {
// create a new promise chain
// remember it is a chain, if one return is omitted
// then the chain is broken
return new Promise((resolve, reject) => {
var options = {
host: 'support.oneskyapp.com',
path: '/hc/en-us/article_attachments/202761727/example_2.json'
};
// start the request
https.request(options, function (response) {
var str = '';
// data arrives in chunks
// chunks needs to be stitched together before parsing
response.on('data', function (chunk) {
str += chunk;
});
// response body obtained
// resolve (aka return) the result
// or parse it, or do whatever you want with it
response.on('end', function () {
resolve(str)
});
// errors are another event
// listen for errors and reject when they are encountered
response.on('error', function (err) {
reject(err)
})
}).end()
})
}
//*********************************************
// using async await
//*********************************************
// if this is the entry point into app
// then top-level async approach required
(async ()=>{
try{
let data = await getInfo()
console.log("From ASYNC AWAIT ")
console.log(JSON.stringify(JSON.parse(data)))
}
catch (err) {
console.log("operation failed, error: ", err)
}
})();
//************************************************
// using promise chains
//************************************************
getInfo()
.then((data)=>{
console.log("FROM PROMISE CHAIN ")
console.log(JSON.stringify(JSON.parse(data)))
})
.catch((err)=>{
console.log("operation failed, error: ", err)
})
Tyr this, it might works for you,
let info;
const getInfo = async (_url)=>{
const response = await fetch(_url);
const data = await response.json();
info = data;
} ;
const url = "some url";
getInfo(url);
console.log(info);
Async function always returns a promise, so either consume that promise or internally await the data and assign it to some variable.
Check for the valid data required in info by logging it to the console.

How to resolve asynchronous process before moving onto another process?

I have the following method in which I read from a csv and collect the data in an array of strings. After I do that I want to use that array. I'm new to handling asynchronous calls and I think what's happening in the code below is that reading from the .csv file is asynchronous so the array is empty once I start looping through it. How do I complete the .csv reading so that all the array is filled completely and only until it is, move on to the next task of looping through the array?
static async readAndPopulateGMDevicesToMigrate() {
const bookTitles = [];
await fs.createReadStream('./BookTitles.csv')
.pipe(csv())
.on('data', (data) => bookTitles.push(data.Titles))
.on('error', (error) => loggingService.getDefaultLogger().error(error))
.on('end', () => loggingService.getDefaultLogger().info("Book Titles:" + booksTitles));
console.log(bookTitles);
const booksToAdd = [];
bookTitles.forEach(bookTitle => booksToAdd.push(new Object({
Title: bookTitle}))),
})))
console.log(readDevices);
}
Any help on this would be much appreciated!
A couple of pointers, hoping this can help with getting the approach clear.
createReadStream returns a ReadableStream, not an async/promise to await on.
The returned ReadableStream provides means to react to events, as shown when pipelining the handlers by using on.
Now, if you wrap your code in a Promise where either resolve or reject base on the end or error event respectively.
static async readAndPopulateGMDevicesToMigrate() {
const bookTitles = [];
const logger = loggingService.getDefaultLogger();
/* WRAP in promise to wait */
await new Promise((resolve, reject) => {
fs.createReadStream('./BookTitles.csv')
.pipe(csv())
.on('data', ({ Titles }) => {
logger.info(`Adding ${Titles} Titles`);
bookTitles.push(Titles);
})
.on('error', (error) => {
logger.error(error);
/* REJECT on error, maybe reject with the partial result ? */
reject(error);
})
.on('end', () => {
logger.info("Book Titles:" + booksTitles);
const booksToAdd = bookTitles.map(bookTitle => ({Title: bookTitle}));
/* RESOLVE when the stream was read to the end */
resolve(booksToAdd);
});
})
console.log(bookTitles);
console.log(readDevices);
}

How would initiate requests to an array of links, and then after they have all resolved, render results using Express?

I am trying to use axios and express to get an array of links from a page, harvest data at each link, and display results to the user. The process I'd like to implement is:
Run axios.get(targetPage), harvest the links on the target page, save to an array
Run run axios.all to get a response from link
Harvest data from each response
Display to user
Below is my app.get function:
app.get('/search', function (req, res) {
var context = {};
context.resources = [];
var promises = [];
var year = req.query.year;
var targetPage = endpoint + year;
axios.get(targetPage).then(resp => {
var $ = cheerio.load(resp.data);
var pages = []
$('queryStr').each(function (i, ele) { pages.push(endpoint + $(ele).attr("href")) });
context.links = pages;
pages.forEach( link => promises.push( axios.get(link) ));
}).then(axios.all(promises)).then( responses => {
responses.forEach( resp => {
var resource = {};
resource.link = resp.url;
var $ = cheerio.load(resp.data)
resource.title = $('query').text()
context.resources.push(resource);
})
}).then( () => {
res.render('search', context);
})
})
I have verified that the urls in the pages[] array are valid. I have tried calling res.render after the first axios.get call, and it successfully rendered my response. I have separately tested the logic in the forEach and verified that it works for each url. I'm getting an error message at the then block immediately following axios.all, which states that responses returned by axios.all is undefined. the Here is the error message:
UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'forEach' of undefined
Thanks for reading.
[TLDR]
This is because the function of your first then clause doesn't have a return statement.
This means there is no successful data passed to the next then clause and so on.
then chaining requires return for each then clause.
Example
There's many problems in my original post, but the main issue was failure to return a Promise in each step of my .then chain. Below is working code, significantly refactored. I'm sure there's better ways of passing along errors and loading in parallel, so please leave answers and comments with any improvements. In addition to the resource linked to by Nazim Kerimbekov and Khoa, I recommend this post for learning how to chain promises.
app.get('/pageToRender', function (req, res) {
var context = {};
var page1 = req.query.year
getInitial(page1)
.then( pages => Promise.all( pages.map( page => { return getResource(page) })))
.then(resources => {
context.resources = resources;
res.render('pageToRender', context);
})
.catch( errors => { console.log(errors) })
})
function getInitial (page) {
return new Promise( (resolve,reject) => {
axios.get(page).then( resp => {
var pages = []
var $ = cheerio.load(resp.data)
$('.mw-content-ltr li a').each(function (i, ele) { pages.push(endpoint + $(ele).attr("href")) });
console.log(pages);
resolve(pages);
}).catch( error => reject(error))
})
}
function getResource(page) {
return new Promise ( (resolve, reject) => {
axios.get(page)
.then( response => {
var resource = {};
resource.link = page;
var $ = cheerio.load(response.data)
resource.title = $('.Title').text()
console.log("resolving resource", resource);
resolve(resource);
}).catch (error => { error })
})
}

Fetching multiple files using Promises and Fetch API javascript

I am updating my javascript skills with Promises, already have in place a library with XHR and callbacks to load and inject multiple files at once and only proceed if ALL of them succeeded.
I am trying to use Promise.all() and Fetch API to get a similar functionality but can't make it work: console.log('All the promises are resolved', values); always triggers no matter if some of the fetch promises failed.
I want to be able to execute the code below, and only proceed with nowInitialize function if all the files were able to be fetched, or throw error using catch() with the reason of the first file that failed
xRequire(['index.html', 'style.cds'])
.then(nowInitialize)
.catch(reason => 'One or more files failed to load' + reason)
style.cds will obviously fail
//TODO Handle file types appropriately
//TODO: Inject css, javascript files
function xRequire(files) {
let urls = [];
let promisesList = [];
let handleAllPromises;
//Populates urls with each file required
for(let i=0; i < files.length ; i++) {
urls.push(files[i]);
}
//Fetch each file in urls
urls.forEach( (url, i) => { // (1)
promisesList.push(
fetch(url)
.then(handleResponse)
.then(data => console.log(data))
.catch(error => console.log(error))
);
});
handleAllPromises = Promise.all(promisesList);
handleAllPromises.then(function(values) {
console.log('All the promises are resolved', values);
});
handleAllPromises.catch(function(reason) {
console.log('One of the promises failed with the following reason', reason);
});
}
function handleResponse(response) {
let contentType = response.headers.get('content-type');
console.log('Requested Info: ' + contentType);
if (contentType.includes('application/json')) {
return handleJSONResponse(response);
} else if (contentType.includes('text/html')) {
return handleTextResponse(response);
} else if (contentType.includes('text/css')) {
return handleTextResponse(response);
} else if (contentType.includes('application/javascript')) {
return handleTextResponse(response);
} else {
throw new Error(`Sorry, content-type ${contentType} not supported`);
}
}
function handleJSONResponse(response) {
return response.json()
.then(json => {
if (response.ok) {
return json;
} else {
return Promise.reject(Object.assign({}, json, {
status: response.status,
statusText: response.statusText
}));
}
});
}
function handleTextResponse(response) {
return response.text()
.then(text => {
if (response.ok) {
return text;
} else {
return Promise.reject({
status: response.status,
statusText: response.statusText,
err: text
});
}
});
}
Can you just rewrite it as async-await code? Here is a rough idea of the typical flow:
const [data1, data2, data3] = await Promise.all([
fetch(url1),
fetch(url2),
fetch(url3),
]);
In other words, Promise.all() returns the promise to all the data that is returned from your multiple fetch() functions.
Then, if you put this into a try-catch, you can handle the rejection as well:
try {
const [data1, data2, data3] = await Promise.all([
fetch(url1),
fetch(url2),
fetch(url3),
]);
// Now you can process the data:
[data1, data2, data3].map(handleResponse);
} catch (error) {
console.log('Error downloading one or more files:', error);
}
If you want to loop with async-await, you can do that:
const promises = [];
for (const url of [url1, url2, url3, url4]) {
promises.push(fetch(url));
}
const [data1, data2, data3, data4] = await Promise.all(promises);
There are two problems. First, you need to return the Promise.all call from xRequire in order to consume it in your xRequire(..).then:
return Promise.all(promisesList);
Also, when you use .catch, if a Promise is initially rejected, it will go into the catch block, do whatever code is there, and then the Promise chain will resolve (not reject) to whatever the catch block returns. If you want to percolate errors up the Promise chain, put your catch at the point in the chain at which you want to detect errors:
urls.forEach( (url, i) => { // (1)
promisesList.push(
fetch(url)
.then(handleResponse)
.then(data => console.log(data))
// no catch here
);
});
I would suggest putting your catch only in the caller of xRequire, that way it will see all errors. Your xRequire function can be reduced to:
xRequire(['index.html', 'style.cds'])
.then(nowInitialize)
.catch(reason => 'One or more files failed to load' + reason)
function xRequire(files) {
return Promise.all(
urls.map(handleResponse)
);
}
If you want the body of xRequire to be able to see errors, but you also want to percolate errors up the Promise chain, throw an error in a catch inside xRequire, so that the Promise it resolves to will reject, rather than resolve:
function xRequire(files) {
return Promise.all(
urls.map(handleResponse)
)
.catch((err) => {
console.log('There was an error: ' + err);
throw err;
})
}
I finally solved it in this way --with the only quirk i've found so far: files argument always needs to be an array, therefore always needs brackets when calling the function--
xRequire(['my-file'])
.then(handle success)
.catch(handle error);
async function xRequire(files) {
let promises = [];
let receivedData;
//Iterate over files array and push results of fetch to promises array
files.map(x => promises.push(fetch(x)));
//populate receivedData array from promises array
receivedData = await Promise.all(promises);
//iterate over receivedData to handle each response accordingly
return receivedData.map(x => handleResponse(x));
}

Categories

Resources