I'm currently learning node and i'm looking for HTTP library that would allow me to send GET request, without downloading server response content (body).
I need to send very large amount of http requests every minute. However i do not need to read their content (also to save bandwidth). I can't use HEAD for this purpose.
Is there any way to avoid downloading response body using node-requests, or perhaps any other library - could be used?
My sample code using node-request:
const options = {
url: "https://google.com",
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
}
//How to avoid downloading a whole response?
function callback(err, response, body) {
console.log(response.request.uri.host + ' - ' + response.statusCode);
}
request(options, callback);
HTTP GET by standard fetches the file content, you cannot avoid downloading(getting response) it but you can ignore it. Which is basically what you are doing.
request(options, (err, response, body)=>{
//just return from here don't need to process anything
});
EDIT1:
To just use some bytes of the response, you can use http.get and get the data using the data event. From the doc:
http.get('http://nodejs.org/dist/index.json', (res) => {
res.setEncoding('utf8');
let rawData = '';
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
//this is when the response will end
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
Related
i'm wondering what am i doing wrong with this lambda function.
Goal:
Send http options to fetch an PDF and forward it to consumer from Lambda service.
Current code:
"use strict";
const http = require("http");
function getPDF(options, event) {
console.log(options);
return new Promise((resolve, reject) => {
let body = "";
let statusCode = 0;
let headers = { };
http
.request(options, (res) => {
statusCode = res.statusCode;
const headersFromReq = res.headers || {};
res.on("data", (chunk) => (body += chunk));
res.on("end", function () {
console.log( statusCode, headers, body);
resolve({
body: Buffer.from(body).toString(),
statusCode,
headers: {
...headersFromReq,
//'Content-type': 'application/pdf',
//'content-disposition': 'attachment; filename=test.pdf'
}
});
})
.on("error", reject)
.end();
});
});
}
exports.handler = async (event) => {
try {
const response = await getPDF(event.options, event);
return response;
} catch (error) {
console.error(error);
return {
statusCode: 500,
body: JSON.stringify(error),
headers: {}
};
}
};
Whatever i've tried, it either times out or does not result in the actually needed response of Base64 encoded PDF.
Params for testing would look something like this:
{
"options": {
"hostname": "www.africau.edu",
"port": 80,
"path": "images/default/sample.pdf",
"method": "GET",
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
"Accept": "application/pdf",
"Accept-encoding": "gzip, deflate, br"
}
}
}
Current logs -
Function Logs
START RequestId: 8d6be86c-788d-4f49-8305-8caf377cd32e Version: $LATEST
2021-09-28T09:01:21.507Z 8d6be86c-788d-4f49-8305-8caf377cd32e INFO {
hostname: 'www.africau.edu',
port: 80,
path: 'images/default/sample.pdf',
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
Accept: 'application/pdf',
'Accept-encoding': 'gzip, deflate, br'
}
}
END RequestId: 8d6be86c-788d-4f49-8305-8caf377cd32e
REPORT RequestId: 8d6be86c-788d-4f49-8305-8caf377cd32e Duration: 11011.54 ms Billed Duration: 11000 ms Memory Size: 128 MB Max Memory Used: 54 MB Init Duration: 152.43 ms
2021-09-28T09:01:32.494Z 8d6be86c-788d-4f49-8305-8caf377cd32e Task timed out after 11.01 seconds
Your approach has an underlying conceptual problem - it may take time to execute. The time that you don't have when you run things in lambda. Your lambda "technically" has the maximum of 15 minutes to finish the execution (although you explicitly have to configure it. I think by default it's 10s), but if you trigger it from AWS API Gateway, that goes down to 30 seconds and this is not a limit you can configure. It's the total max. Moreover your lambda response cannot be larger than 6MB and it is normally supposed to be JSON, so you would have to convert your file to Base64, but again, if you serve that file via api gateway even that limit goes down once again... What you're trying just cannot be done reliably with lambda in this way. There is a different way however that would actually be recommended by AWS.
You send a request to API Gateway that triggers a lambda
Lambda looks up if the requested file already exists in S3
If it doesn't exist:
The lambda downloads a file and puts it into S3. Note that you can now set up S3 bucket policy so that file stays in S3 only for certain amount of time. You probably don't want to keep it there forever, but it's nice to keep it cached for a while in case the user tries to re-download your PDF. This way they will be able to get the response much faster
The lambda then generates a pre-signed S3 URL to the freshly downloaded file (a special URL that you can request from S3 that will be valid for another few minutes only) and returns it in the response
If it already exists:
the lambda just generates the pre-signed S3 URL and returns it in the response
Your client (UI application I presume) has to generate a consecutive request to the pre-signed url received in the response (so it talks directly to S3). This way, even if your user has slow internet connection and they need 20 minutes to download the file, you don't get any timeouts... well you will still get some if the file is really large and the lambda cannot download it quickly enough, but that would require a longer discussion. In this case I'm assuming your file is under 15MB.
Im having a little problem with my request on getting an html from https://readnovelfull.com/beauty-and-the-beast-wolf-hubby-xoxo/chapter-1-i-would-not-be-responsible.html as example.
I can get all the html on the other url eg novel detalj, latest upgated etc.
but not when im getting the detali for the chapters.
I tested those url on postman and also on https://codebeautify.org/source-code-viewer as well and there is no problem on getting the content of the chapter of which it exist under the div #chr-content
So I am a bit lost now, what am I doing wrong?
Here is my fetch calls which is working on other novel sites.
static async getHtml(
url: string
): Promise<HTMLDivElement> {
console.log(`Sending html request to ${url}`);
var container = parse('<div>test</div>') as any;
try {
let headers = new Headers({
Accept: '*/*',
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
});
var data = await fetch(url, {
method: 'GET',
headers: headers,
});
if (!data.ok) {
const message = `An error has occured:${data.status}`;
console.log(message);
} else {
var html = await data.text();
console.log('Data is ok. proceed to parse it');
container = parse('<div>' + html + '</div>') as any;
}
} catch (e) {
console.log(e);
}
return container as HTMLDivElement;
}
I should mention that am not getting any error what so ever, its just that the html I am getting is not the same as postman and other site is getting.
Update
Ok so i did some research on the site and this is what i come up with.
the site need X-CSRF-TOKEN and i was able to extract those and find those values
const csrf = 'x09Q6KGqJOJJx2iHwNQUa_mYfG4neV9EOOMsUBKTItKfNjSc0thQzwf2HvCR7SQCqfIpC2ogPj18jG4dQPgVtQ==';
const id = 774791;
which i need to send a request to https://readnovelfull.com/ajax/increase-chapter-views with the values above. and this will send back true/false
now i tried to inc the csrf on my fetch call after but its still the same old same no data.
any idee if i am doing something wrong still?
Looks like you have an issue with CORS. To make sure just try to send request through cors proxy. One of the ways you can quickly do that is add prefix URL:
https://cors-anywhere.herokuapp.com/https://readnovelfull.com/beauty-and-the-beast-wolf-hubby-xoxo/chapter-1-i-would-not-be-responsible.html`
NOTE: Using this CORS proxy on production is not recommended, because it's not secure
If after that you'll receive data, that means that you faced with CORS, and you need to figure out how to solve it in your specific case.
Reproducable example:
const parse = (str) => str;
const getHtml = async (url) => {
console.log(`Sending html request to ${url}`);
var container = parse('<div>No content =(</div>')
try {
let headers = new Headers({
Accept: '*/*',
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
});
var data = await fetch(url, {
method: 'GET',
headers: headers,
});
if (!data.ok) {
const message = `An error has occured:${data.status}`;
console.log(message);
} else {
var html = await data.text();
console.log('Data is ok. proceed to parse it');
container = parse('<div>' + html + '</div>');
}
} catch (e) {
console.log(e);
}
return container;
}
getHtml('https://cors-anywhere.herokuapp.com/https://readnovelfull.com/beauty-and-the-beast-wolf-hubby-xoxo/chapter-1-i-would-not-be-responsible.html').then(htmlContent => document.querySelector('div').innerHTML = htmlContent);
<div>loading...</div>
If it doesn't help, please provide a reproducible RN example, but I believe there is no difference between RN and web environments in that case.
I have a project that requires the use of requests and is using javascript run by node. I am having trouble storing the body of a request to a global variable so I can access it in other functions. Is there a way to save the response in a global variable? Thanks.
var request = require('request');
var globalBody = "";
var options = {
url: 'http://www.google.com/',
headers: {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_1_4 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B350 Safari/8536.25'
}
}
request(options, function (error, response, body) {
if(error != null){
console.log('error:', error);
}
if(response.statusCode != 200){
console.log('statusCode:', response && response.statusCode);
}else{
globalBody = body;
}
});
console.log(globalBody)
The last line "console.log(globalBody)" results in "" but I want it to display the body of the request. Is there any way to do this?
use window.localStorage to store globally
console.log(globalBody) gets called before the request method callback. globalBody value gets updated but after you are trying to print it.Try the following code it will work.
var request = require('request');
var globalBody = "";
var options = {
url: 'http://www.google.com/',
headers: {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_1_4 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B350 Safari/8536.25'
}
};
request(options, function (error, response, body) {
if(error != null){
console.log('error:', error);
}
if(response.statusCode != 200){
console.log('statusCode:', response && response.statusCode);
}else{
globalBody = body;
console.log(globalBody);
}
});
You need to call other functions after the successful completion of async request to use global variables in it.
my application: When you send a request from a browser to my node server, my node server will request an origin website, download all of its static files (including code) and server them back to the user. Next time you visit my node server it will server all the content back from node instead of requesting the origin.
When i make a request for a font awesome file from node
http://example.com/modules/megamenu/fonts/fontawesome-webfont.woff?v=4.2.0
The file's content is different from when i request the same url with cUrl.
This is causing this error in the browser when i return the file from node back to the browser:
Failed to decode downloaded font: http://nodeDomain.test/modules/megamenu/fonts/fontawesome-webfont.woff?v=4.2.0
If i copy and paste the content from the file i requested via curl into the file stored on my node server, the error disappears and all the font awesome stuff works.
Here are the headers I am sending with the request to the origin server from node.
{
connection: 'keep-alive',
pragma: 'no-cache',
'cache-control': 'no-cache',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36',
accept: '*/*',
referer: 'http://example.com/modules/megamenu/css/font-awesome.min.css',
'accept-language': 'en-US,en;q=0.8',
cookie: 'PrestaShop-a30a9934ef476d11b.....'
}
I tried to see what headers where being sent when doing the curl request from command line but i cannot figure out how to do it.
______Node code used to fetch file_______
Url: in options is the one stated above
headers: are the browsers request headers
var options = {
url: originRequestPath,
headers: requestHeaders
}
var originPage = rquest(options);
var responseBody = '';
var resHeads = '';
originPage.on('response', function(res)
{
//store response headers locally
}
originPage.on('data', function(chunk)
{
responseBody += chunk;
});
originPage.on('end', function()
{
storeData.storePageData(storeFilePath, responseBody);
});
__________Store Function below________________
exp.storePageData = function(storePath, pageContent)
{
fs.outputFile(storePath, pageContent, function(err) {
if(err){ console.log(err)}
});
}
I believe the problem with your code is you are converting your buffer output to utf8 string. since you are adding buffer with empty string responseBody += chunk; that buffer is converted to utf-8 string. Thus you are losing some data for binary files. Try this way:
var originPage = rquest(options);
var chunks = []
originPage.on('response', function(res)
{
//store response headers locally
}
originPage.on('data', function(chunk)
{
chunks.push(chunk)
});
originPage.on('end', function()
{
var data = Buffer.concat(chunks)
//send data to browser and store content locally
});
I'm currently working on a sort of Web Proxy for Node.js, but I am having trouble with submitting forms, on most sites I am able to successfully submit a form but on some other sites I am not so fortunate. I can't pinpoint if there is anything I'm doing wrong.
Is there a possible better way of doing this?
Also, how would I be able to handle multipart forms using the Express.js bodyparser?
At the moment this is what I have in the way of form processing is this:
function proxy(req, res,request)
{
var sess = req.session;
var onUrl_Parse = function(url){
var Uri= new URI.URI(url);//Parses incoming url
var options = {
uri: url,
method: req.method
}
options.headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0", "Cookie":req.session.cook};
if(req.body) //If x-www-form-urlencoded is posted.
{
var options = {
uri: url,
method: req.method,
body: req.rawBody
}
options.headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0", "Cookie":req.session.cook, "Content-Type":"application/x-www-form-urlencoded"};
}
onRequestOptions(options, url);
}
,onRequestOptions = function(options, url)
{
request(options, function(error, response, body)
{
if(!error){
if(response.headers['set-cookie'])
req.session.cook=response.headers['set-cookie'];
Proxy_Parser.Parser(body, url, async, onParse);// Parses returned html return displayable content
}
});
}
,onParse = function(HTML_BODY)
{
if(HTML_BODY=="")
res.end();
res.write(HTML_BODY);
res.end();
console.log("DONEEEEE");
}
Url_Parser.Url(req, URI, onUrl_Parse);
}
I am not sure what exactly you are trying to accomplish, but https://github.com/felixge/node-formidable is a anyway recommended !!
I would start with something like node-http-proxy. All the hard work is done for you and you can just define the routes you want to proxy and put in some handlers for the custom response info.