How to end() a file stream - javascript

I am having a weird issue with a piece of sample code that I got here, the central part being this:
server.on('request', function(request, response) {
var file = fs.createWriteStream('copy.csv');
var fileSize = request.headers['content-length'];
var uploadedSize = 0;
request.on('data', function (chunk) {
uploadedSize += chunk.length;
uploadProgress = (uploadedSize/fileSize) * 100;
response.write(Math.round(uploadProgress) + "%" + " uploaded\n" );
var bufferStore = file.write(chunk);
console.log(bufferStore);
console.log(chunk);
if(!bufferStore)
{
request.pause();
}
});
file.on('drain', function() {
request.resume();
});
request.on('end', function() {
response.write('Upload done!');
response.end();
});
});
The problem is, the file copy.csv does not contain anything after the process is finished.
I tried to add file.end(); in the request.on('end'-callback, but it did not do the trick. However, if I add faulty code in said callback that causes an exception, the file is being written just fine (although this ofc can't be the final solution).

To notify the stream that there are no more chunks to be read, you can simply call your_stream.push(null). You can read more about streams and push(null) from the excellent substack's stream guide.

Try this structure:
var file = fs.WriteStream('copy.csv');
fileSize = request.headers['content-length'],
uploadedSize = 0;
request.on('readable', function () { // Node.js 0.10 (Streams2 interface)
var newData = this.read() || new Buffer(0); // Sometimes may come null
file.write(newData);
uploadedSize += newData.length;
response.write(Math.round((uploadedSize / fileSize) * 100) + "%" + " uploaded\n" );
});
request.on('end', function () {
response.write('Upload done!');
response.end();
file.end();
});

Related

Formidable/NodeJS HTTP Server JPG save

Stackoverflow JS Genius's!
I have an issue with my current project, it's using node's HTTP createServer, using Formidable to parse the body data.
See code below. (http-listener.js)
var listenport = 7200;
const server = http.createServer((req, res) => {
// Set vars ready
var data = '';
var plateImg = '';
var overview1 = '';
var overview2 = '';
new formidable.IncomingForm().parse(req)
// I originally thought it was sent in files, but it isnt, it's fields.
.on('file', function(name, file) {
console.log('Got file:', name);
})
// This is the correct procedure for my issue.
.on('field', function(name, field) {
console.log('Got a field:', name);
if(name.toLowerCase() === "anpr.xml")
{
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(field, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Console log parsed json data.
console.log("Read: "+result.EventNotificationAlert.ANPR.licensePlate);
console.log(result);
data = result;
});
}
if(name.toLowerCase() === "licenseplatepicture.jpg")
{
plateImg = field
// This doesnt work?
// I need to store these fields as an image. ? Is this possible with it being sent as a field and not as a file upload.
// This is the only option I have as I can't control the client sending this data (It's a camera)
fs.writeFile(config.App.ImageDir+'/Plate.jpg', plateImg, function(err) {
if(err)console.log(err);
});
}
if(name.toLowerCase() === "detectionpicture.jpg")
{
if(overview1 == '')
{
overview1 = field;
}
else if(overview2 == '')
{
overview2 = field;
}
else
{
// do nothing else.
console.log("Couldn't send images to variable.");
}
}
})
.on('error', function(err) {
alert(err);
})
.on('end', function() {
// Once finished, send to ANPR data to function to handle data and insert to database. WORKS
// Call anpr function.
ANPR_ListenData(data, plateImg, overview1, overview2, function(result) {
if(result.Status > 0)
{
console.log("Accepted by: "+result.Example);
// reset var
data = '';
plateImg = '';
overview1 = '';
overview2 = '';
res.writeHead(200, {'content-type':'text/html'});
res.end();
}
});
});
});
server.listen(listenport, () => {
console.log('ANPR Server listening on port: ' + listenport);
});
Basically the images that are sent in the fields: licenseplatepicture.jpg etc I want to store them directly into my app image directory.
Unfortunately I have no control over how the chunks are sent to this server due to it being a network camera, I simply need to write a procedure.
The full request chunk is quite large so I will upload the file to OneDrive for you to glance at and understand the request.
Any help with this will be appreciated. I've tried everything I can possibly think of, but the file saves unreadable :(. I don't know where else to look or what else I can try, other than what I've already done & tried.
Request Txt File: https://1drv.ms/t/s!AqAIyFoqrBTO6hTwCimcHDHODqEi?e=pxJY00
Ryan.
I fixed this by using Busboy package instead of Formidable.
This is how my http listener looks like using Busboy.
var inspect = util.inspect;
var Busboy = require('busboy');
http.createServer(function(req, res) {
if (req.method === 'POST') {
//vars
var ref = Math.random().toString(36).substring(5) + Math.random().toString(36).substring(2, 15);;
var xml = '';
var parseXml = '';
var over1, over2 = '';
var i = 0;
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
if(filename.toLowerCase() === "licenseplatepicture.jpg")
{
var saveTo = config.App.ImageDir+"/"+ref+"_Plate.jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
if(filename.toLowerCase() === "detectionpicture.jpg")
{
i++;
var saveTo = config.App.ImageDir+"/"+ref+"_Front_"+i+".jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
file.on('data', function(data) {
if(filename.toLowerCase() === "anpr.xml")
{
xml += data;
}
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
});
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated, encoding, mimetype) {
console.log('Field [' + fieldname + ']: value: ' + inspect(val));
// No fields according to busboy
});
busboy.on('finish', function() {
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(xml, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Set parsed var
parseXml = result;
});
var images = '';
if(i = 2)
{
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"${ref}_Front_2.jpg"}`;
} else {
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"null"}`;
}
// Once parsed, send on to ANPR listen function.
ANPR_ListenData(ref, parseXml, images, function(result) {
if(result.Status == 1)
{
console.log('Data transfered for: '+parseXml.EventNotificationAlert.ANPR.licensePlate);
console.log('Accepted Camera: '+result.Example);
res.writeHead(200, { Connection: 'close', Location: '/' });
res.end();
}
});
});
req.pipe(busboy);
}
}).listen(7200, function() {
console.log('Listening for requests');
});
Hope this helps someone else in the future. Certainly caused me a lot of a wasted time.
Busboy was the better package to use when I was reading into it more, it makes more sense for what I was attempting to achieve.
Ryan :).
All the best.

send binary response in node from phantomjs child process

I have created a node endpoint to create rasterised version for my svg charts.
app.post('/dxexport', function(req, res){
node2Phantom.createPhantomProcess(req,res);
});
My node to phantom function uses spawn to run phantomjs
var spawn = require('child_process').spawn;
exports.createPhantomProcess = function(req,res){
var userRequest = JSON.stringify(req.body);
var bin = "node_modules/.bin/phantomjs"
var args = ['./dxexport/exporter-server.js', userRequest, res];
var cspr = spawn(bin, args);
cspr.stdout.on('data', function (data) {
var buff = new Buffer(data);
res.send(data);
});
cspr.stderr.on('data', function (data) {
data += '';
console.log(data.replace("\n", "\nstderr: "));
});
cspr.on('exit', function (code) {
console.log('child process exited with code ' + code);
process.exit(code);
});
};
when rendering is completed and file is successfully created I call the renderCompleted function inside phantomjs:
var renderCompleted = function (parameters) {
var exportFile = fileSystem.open(parameters.exportFileName, "rb"),
exportFileContent = exportFile.read();
parameters.response.statusCode = 200;
parameters.response.headers = {
"Access-Control-Allow-Origin": parameters.url,
"Content-Type": contentTypes[parameters.format],
"Content-Disposition": "attachment; fileName=" + parameters.fileName + "." + parameters.format,
"Content-Length": exportFileContent.length
};
parameters.response.setEncoding("binary");
parameters.response.write(exportFileContent);
/////// somehow send exportFileContent as node res object for download \\\\\\\\
exportFile.close();
parameters.format !== "svg" && fileSystem.remove(parameters.exportFileName);
for (var i = 0; i < parameters.filesPath.length; i++)
fileSystem.remove(parameters.filesPath[i]);
parameters.filesPath = [];
parameters.response.close()
};
this response is passed from nodejs however apparently this code is calling phantomjs methods so I get errors like
TypeError: 'undefined' is not a function (evaluating 'parameters.response.setEncoding("binary")')
How can I send the binary file response somehow to the node function so it can be sent with my node server to the user?
Any help is appreciated.
Ok after some struggle here is the working solution if someone stumbles on this post.
As Artjom B. mentioned I found that the easiest way was delegate the rendering and file creation of the visualisation to phantomjs. Then send all the parameters related to those operations once done through the console.
Also updated the answer based on #ArtjomB.'s advice to wrap the console message sent in a unique beginning and end string so the risk of the other possible future outputs being mistaken for the intended rendered file object is mitigated.
var renderCompleted = function (parameters) {
console.log("STRTORNDRD" + JSON.stringify(parameters) + "ENDORNDRD");
};
This then gets picked up by stdout and usable like this:
exports.exportVisual = function (req, res) {
var userRequest = JSON.stringify(req.body);
var bin = "node_modules/.bin/phantomjs"
var args = ['./dxexport/exporter-server.js', userRequest, res];
var cspr = spawn(bin, args);
var contentTypes = {
pdf: "application/pdf",
svg: "image/svg+xml",
png: "image/png"
};
cspr.stdout.on('data', function (data) {
var buff = new Buffer(data).toString('utf8');
var strData = buff.match(new RegExp("STRTORNDRD" + "(.*)" + "ENDORNDRD"));
if (strData) {
var parameters = JSON.parse(strData[1]);
var img = fs.readFileSync(parameters.exportFileName);
var headers = {
"Access-Control-Allow-Origin": parameters.url,
"Content-Type": contentTypes[parameters.format],
"Content-Disposition": "attachment; fileName=" + parameters.fileName + "." + parameters.format,
"Content-Length": img.length
};
res.writeHead(200, headers);
res.end(img, 'binary');
// delete files after done
if (parameters.format != "svg") {
fs.unlink(parameters.exportFileName);
}
for (var i = 0; i < parameters.filesPath.length; i++)
fs.unlink(parameters.filesPath[i]);
// done. kill it
cspr.kill('SIGINT');
}
});
cspr.stderr.on('data', function (data) {
data += '';
console.log(data.replace("\n", "\nstderr: "));
});
cspr.on('exit', function (code) {
console.log('child process exited with code ' + code);
process.exit(code);
});
};

Nodejs with Phantom-Cluster just stops

I'm running nodejs, not as a webserver, but from the command line against a pretty heavily modified version of the example.js which comes with the phantom-cluster package. Server is Ubuntu 13.10 in an AWS instance.
My goal is to "ping" more than 64000 urls to test for 404 or 500 http errors. If there is an error, then log that url with the error for later processing.
Here is my code:
(function() {
var WEBSITES, cluster, enqueueRequests, main, phantomCluster;
var fs = require('fs');
phantomCluster = require("./index");
cluster = require("cluster");
WEBS = [];
function loadUrls(callback)
{
console.log("starting loaded");
var fs = require('fs');
var urls = [];
fs.readFile("/home/ubuntu/phantom-cluster/theurls.txt", 'utf8', function (err, data)
{
if (err) throw err;
var myArray = data.split("\n");
for(i=0;i<myArray.length;i++)
{
urls.push(myArray[i]);
}
callback(null,urls);
})
}
enqueueRequests = function(engine)
{
fulfilled = 0;
loadUrls(function(err,WEBS)
{
console.log(">>" + WEBS.length + " urls to process");
var enqueuer, i, key, _i, _results;
enqueuer = function(request)
{
var item;
item = engine.enqueue(request);
item.on("timeout", function()
{
fs.appendFile("/home/ubuntu/error_log.log", "TIMEOUT: " + request + "\r\n", function (err) {});
});
return item.on("response", function()
{
fulfilled++;
console.log(fulfilled);
});
};
_results = [];
for (i = i = 0;i < 1; i++)
{
_results.push((function()
{
var _results1;
_results1 = [];
for(x=0;x<WEBS.length;x++)
{
_results1.push(enqueuer(WEBS[x]));
}
return _results1;
})());
}
return _results;
});
};
main = function()
{
var engine;
engine = phantomCluster.createQueued(
{
workers: 20,
workerIterations: 1,
phantomBasePort: 54321
});
if (cluster.isMaster)
{
enqueueRequests(engine);
}
engine.on("queueItemReady", function(url)
{
var _this = this;
var retVal;
urlArray = url.split("|");
var phantom = this.ph;
var curPage = phantom.createPage(function(page)
{
page.set('settings.loadImages', false);
page.set('settings.javascriptEnabled', false);
page.set('settings.resourceTimeout', 5000);
page.set('settings.userAgent','Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
page.set('onError', function(msg, trace)
{
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length)
{
msgStack.push('TRACE:');
trace.forEach(function(t)
{
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
});
page.set('onResourceReceived', function(response)
{
if((response.status == "404") || (response.status == "500"))
{
myUrl = decodeURI(response.url);
if(myUrl == urlArray[0])
{
retVal = response.status + "|" + url;
fs.appendFile("/home/ubuntu/error_log.log", response.status + "|" + url + "\r\n", function (err) {});
return retVal;
}
}
});
page.open(urlArray[0], function(status)
{
_this.next(); // _this is a PhantomQueuedClusterClient object
return _this.queueItemResponse(status);
});
});
});
return engine.start();
};
main();
}).call(this);
The file which is referenced as index.js is here:
https://github.com/dailymuse/phantom-cluster/blob/master/index.js
and I have not modified it at all.
This works great, and sparks up 20 worker processes which go out and get the initial response code for the queued urls.
Here is the problem:
After processing anywhere from 960-990 urls, the whole thing just stops. no error code, no nothing.
I've tried everything I can think of from some sort of node timeout, to an issue with a given url to banging my head against my desk. The first two would return an error when I create a test for it. The third just makes my head hurt.
Anyone have any help or experience working with this?
EDIT I made an update to the code and added the on.response callback and then called the nextTick method to remove the item from the queue. Still have the same issue.
Have you taken a look at link-crawler? It uses phantom-cluster and prerender to do almost exactly what you're looking for.
If all you're looking to do is check HTTP status codes, you don't need a headless browser to do that. Node can do that on it's own using http.request() or something that utilizes promises like request-promise.
Unless you're needing to verify something in the rendering of the pages that you're crawling, there's no need to render the page in a browser, just make HTTP calls to the URLs and introspect their statuses.

With node.js how to I set a var to response from HTTP client?

I know this is probably Asynchronous Javascript 101 and I do have some books on the Kindle I could consult, but I am nowhere near my device.
I have a node app with a variable being assigned to a module that I'm loading. The module has one function that downloads a string of data from a URL.
The problem is, how do I not set the variable until the request has returned?
My code looks like this:
Downloader.js:
var http = require('http');
exports.downloadString = function(str) {
console.log("Downloading from " + str);
http.get(str, function(res) {
var data = [];
console.log("Got response: " + res.statusCode);
res.on('data', function (chunk) {
data.push(chunk);
});
res.on('end', function() {
return data.join('');
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
app.js:
var downloader = require('./lib/Downloader')
, dateParser = require('./lib/DateParser')
, eventIdGenerator = require('./lib/EventIdGenerator');
var report = downloader.downloadString("http://exampleapi.com");
console.log(report);
I need to wait until the variable named "report" is populated.
Obviously this means my Mocha tests are also failing as I'm still unsure of how to tell the test to wait until the variable is filled.
I'm sure this is extremely simple, but I am drawing a blank!
Thanks!
Node.js is (mostly) asynchronous, so you'd need to pass a callback function to your module:
Downloader.js:
var http = require('http');
exports.downloadString = function(str, callback) {
console.log("Downloading from " + str);
http.get(str, function(res) {
var data = [];
console.log("Got response: " + res.statusCode);
res.on('data', function (chunk) {
data.push(chunk);
});
res.on('end', function() {
callback(data.join(''));
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
};
app.js:
var downloader = require('./lib/Downloader')
, dateParser = require('./lib/DateParser')
, eventIdGenerator = require('./lib/EventIdGenerator');
downloader.downloadString("http://exampleapi.com", function(report) {
console.log(report);
});

Download large file with node.js avoiding high memory consumption

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});
I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.
does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))
Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});
When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}
Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.
Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp

Categories

Resources