Retrieving accurate filesize data using node.js - javascript

I am trying to retrieve the accurate filesize information for an image URL using node.js (specifically the http module). Everytime I run the below code (with any image url) I get '4061' bytes as the response. The below example should return about 3000 bytes.
I am open to corrections to my existing method of calculation or an alternative method to handle this in node. Thanks.
var http = require('http');
var options = {host: 'www.subway.com', path: '/menu/Images/Menu/Categories_Main/menu-category-featured-products.jpg'};
var req = http.get(options, function(res) {
var obj = res.headers;
var filesize = obj['content-length'];
console.log(filesize + " bytes");
}
);
req.end();

Related

not able to fetch text data from web url using javascript

I need to extract text data from web url (http://www.africau.edu/images/default/sample.pdf)
I used two node_module.
1) crawler-Request
it('Read Pdf Data using crawler',function(){
const crawler = require('crawler-request');
function response_text_size(response){
response["size"] = response.text.length;
return response;
}
crawler("http://www.africau.edu/images/default/sample.pdf",response_text_size).then(function(response){
// handle response
console.log("Reponse =" + response.size);
});
});
What happen for this it will not print anything on console.
2) pfd2json/pdfparser
it('Read Data from url',function(){
var request = require('request');
var pdf = require('pfd2json/pdfparser');
var fs = require('fs');
var pdfUrl = "http://www.africau.edu/images/default/sample.pdf";
let databuffer = fs.readFileSync(pdfUrl);
pdf(databuffer).then(function(data){
var arr:Array<String> = data.text;
var n = arr.includes('Thursday 02 May');
console.log("Print Array " + n);
});
});
Failed: ENOENT: no such file or directory, open 'http://www.africau.edu/images/default/sample.pdf'
I am able to access data from local path but not able to extract it from url.
The issue here is that you are using the fs module (File System) to read a file on a distant server.
You also mistyped the pdf2json module, which should give you an error ?
You did require the request module. This module will make it possible to access that distant file. Here's one way to do this :
it('Read Data from url', function () {
var request = require('request');
var PDFParser = require('pdf2json');
var pdfUrl = 'http://unec.edu.az/application/uploads/2014/12/pdf-sample.pdf';
var pdfParser = new PDFParser(this, 1);
// executed if the parser fails for any reason
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError));
// executed when the parser finished
pdfParser.on("pdfParser_dataReady", pdfData => console.log(pdfParser.getRawTextContent()));
// request to get the pdf's file content then call the pdf parser on the retrieved buffer
request({ url: pdfUrl, encoding: null }, (error, response, body) => pdfParser.parseBuffer(body));
});
This will make it possible to load the distant .pdf file in your program.
I'd recommend looking at the pdf2json documentation if you want to do more. This will simply output the textual content of the .pdf file when the parser has completed reading data.

How to handle JSON data from XMLHttpRequest POST, using nodeJS

Overarching goal is to save some JSON data I create on a webpage to my files locally. I am definitely sending something to the server, but not in format I seem to able to access.
JsonData looks like:
{MetaData: {Stock: "UTX", Analysis: "LinearTrend2"}
Projections: [2018-10-12: 127.62, 2018-10-11: 126.36000000000001, 2018-10-10: 132.17, 2018-10-09: 140.12, 2018-10-08: 137.73000000000002, …]}
XMLHttpRequest on my webpage:
function UpdateBackTestJSON(JsonUpdate){ //JsonUpdate being the JSON object from above
var request = new XMLHttpRequest();
request.open('POST', 'UpdateBackTestJSON');
request.setRequestHeader("Content-Type", "application/json;charset=UTF-8");
// request.setRequestHeader("Content-Type", "text/plain;charset=UTF-8");
request.onload = function() {
console.log("Updated JSON File");
};
console.log("about to send request");
console.log(JsonUpdate);
request.send(JSON.stringify(JsonUpdate));
}
and I handle posts on my server (rather carelessly I realize, just going for functionality as a start here)
var http = require('http')
, fs = require('fs')
, url = require('url')
, port = 8008;
var server = http.createServer (function (req, res) {
var uri = url.parse(req.url)
var qs = require('querystring');
if (req.method == 'POST'){
var body = '';
req.on('data', function (data){
body += data;
// 1e6 === 1 * Math.pow(10, 6) === 1 * 1000000 ~~~ 1MB
if (body.length > 1e6){
// FLOOD ATTACK OR FAULTY CLIENT, NUKE REQUEST
req.connection.destroy();
}
});
req.on('end', function () {
var POST = qs.parse(body);
console.log(POST); // PARSED POST IS NOT THE RIGHT FORMAT... or something, idk whats going on
UpdateBackTestData(POST);
});
}
function UpdateBackTestData(TheJsonData){
console.log("UpdateBackTestData");
console.log(TheJsonData);
JsonUpdate = JSON.parse(TheJsonData);
console.log(JsonUpdate["MetaData"]);
//var Stock = JsonUpdate["MetaData"]["Stock"];
// var Analysis = JsonUpdate["MetaData"]["Analysis"];
fs.writeFile("/public/BackTestData/"+Analysis+"/"+Stock+".json", TheJsonData, function(err){
if(err){
console.log(err);
}
console.log("updated BackTest JSON!!!");
});
}
Most confusing to me is that when I run this, the Json object Im am trying to pass, does go through to the server, but the entirety of the data is a string used as a key for a blank value in an object. when I parse the body of the POST, I get: {'{MetaData:{'Stock':'UTX','Analysis:'LinearTrend2'},'Projections':[...]}': ''}. So my data is there... but not in a practical format.
I would prefer not to use express or other server tools, as I have a fair amount of other services set up in my server that I don't want to go back and change if I can avoid it.
Thanks for any help

NodeJs streaming File from request to Filesystem not Memory

i have an strange issue. I´m using request for file download in NodeJs and everytime i download larger files(>250mb) they get downloaded into memory and are not directly streamed to the Filesystem. Maybe i´m doing something wrong but i made a testcase and the file is still not getting streamed.
var request = require('request');
var fs = require('fs');
var writable = fs.createWriteStream("1GB.zip");
var stream = request.get({
uri: "http://ipv4.download.thinkbroadband.com/1GB.zip",
encoding: null
}, function(error, response, body) {
console.log("code:", response.statusCode);
if (response.statusCode >= 500) {
log.err(response.statusCode, " Servererror", file.url);
}
}).pipe(writable);
in this testcase i`m downloading a sample 1GB file and if you watch the node proccess with the taskmanager it grows to >1GB as it downloads the file.
I want that my Node application uses not more than 200mb of Ram
The issue is that you're passing a callback, which implicitly enables buffering inside request because one of the parameters for the callback is the entire body of the response.
If you want to know when the response is available, just listen for the response event instead:
var request = require('request');
var fs = require('fs');
var writable = fs.createWriteStream("1GB.zip");
var stream = request.get({
uri: "http://ipv4.download.thinkbroadband.com/1GB.zip",
encoding: null
}).on('response', function(response) {
console.log("code:", response.statusCode);
if (response.statusCode >= 500) {
log.err(response.statusCode, " Servererror", file.url);
}
}).pipe(writable);

Express.js proxy pipe translate XML to JSON

For my front-end (angular) app, I need to connect to an external API, which does not support CORS.
So my way around this is to have a simple proxy in Node.JS / Express.JS to pass the requests. The additional benefit is that I can set my api-credentials at proxy level, and don't have to pass them to the front-end where the user might steal/abuse them.
This is all working perfectly.
Here's the code, for the record:
var request = require('request');
var config = require('./config');
var url = config.api.endpoint;
var uname = config.api.uname;
var pword = config.api.pword;
var headers = {
"Authorization" : 'Basic ' + new Buffer(uname + ':' + pword).toString('base64'),
"Accept" : "application/json"
};
exports.get = function(req, res) {
var api_url = url+req.url;
var r = request({url: api_url, headers: headers});
req.pipe(r).pipe(res);
};
The API-endpoint I have to use has XML as only output format. So I use xml2js on the front-end to convert the XML reponse to JSON.
This is also working great, but I would like to lighten the load for the client, and do the XML -> JSON parsing step on the server.
I assume I will have to create something like:
req.pipe(r).pipe(<convert_xml_to_json>).pipe(res);
But I don't have any idea how do create something like that.
So basically I'm looking to create an XML to JSON proxy as a layer on top of an already existing API.
There are a lot of questions on SO regarding "how do I make a proxy" and "how do I convert XML to JSON" but I couldn't find any that combine the two.
you need to use transform stream and for xml to json conversion you need some library i use this xml2json
..then u use it like this (simplified but it should work with request too)
var http = require('http');
var fs = require('fs');
var parser = require('xml2json');
var Transform = require('stream').Transform;
function xmlParser () {
var transform = new Transform();
transform._transform = function(chunk, encoding, done) {
chunk = parser.toJson(chunk.toString())
console.log(chunk);
this.push(chunk);
done();
};
transform.on('error', function (err) {
console.log(err);
});
return transform;
}
var server = http.createServer(function (req, res) {
var stream = fs.createReadStream(__dirname + '/data.xml');
stream.pipe(xmlParser()).pipe(res);
});
server.listen(8000);

NodeJS HTTP request POST ERROR socket hang up

Hi I'm having problems to perform HTTP request on NodeJS given a larger number array of json object. The request works fine given small array of json object. However, if I try to increase the size array of json, I received Error: socket hang up {"error":{"code":"ECONNRESET"}}. Is it required to perform multiple write? Or is it something wrong going on at the other end?
Thanks in advance for taking your time here!
// data is a json object
var post_data = JSON.stringify(data);
var buf = new Buffer(post_data);
var len = buf.length;
var options = {
hostname: address,
port: port,
path: pathName,
method: 'PUT',
headers: {
'Content-Type':'application/json',
'Content-Length': len,
'Transfer-Encoding':'chunked'
}
};
// http call to REST API server
var req = restHttp.request(options, function(res) {
console.log('server PUT response received.');
var resData = '';
res.on('data', function(replyData) {
// Check reply data for error.
console.log(replyData.toString('utf8'));
if(replyData !== 'undefined')
resData += replyData;
});
res.on('end', function() {
callback(JSON.parse(resData));
});
});
req.write(buf);
req.end();
You can stream the request body.
If the data in buf was in a readable stream then you can just do buf.pipe(req).
For example, if the current directory contains a file data.json with the JSON you can do
var buf = fs.createReadStream(__dirname + '/data.json');
to create a ReadStream object. Then you can pipe this to you req
buf.pipe(req);
The pipe command will call req.end once its done streaming.

Categories

Resources