Related
Stackoverflow JS Genius's!
I have an issue with my current project, it's using node's HTTP createServer, using Formidable to parse the body data.
See code below. (http-listener.js)
var listenport = 7200;
const server = http.createServer((req, res) => {
// Set vars ready
var data = '';
var plateImg = '';
var overview1 = '';
var overview2 = '';
new formidable.IncomingForm().parse(req)
// I originally thought it was sent in files, but it isnt, it's fields.
.on('file', function(name, file) {
console.log('Got file:', name);
})
// This is the correct procedure for my issue.
.on('field', function(name, field) {
console.log('Got a field:', name);
if(name.toLowerCase() === "anpr.xml")
{
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(field, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Console log parsed json data.
console.log("Read: "+result.EventNotificationAlert.ANPR.licensePlate);
console.log(result);
data = result;
});
}
if(name.toLowerCase() === "licenseplatepicture.jpg")
{
plateImg = field
// This doesnt work?
// I need to store these fields as an image. ? Is this possible with it being sent as a field and not as a file upload.
// This is the only option I have as I can't control the client sending this data (It's a camera)
fs.writeFile(config.App.ImageDir+'/Plate.jpg', plateImg, function(err) {
if(err)console.log(err);
});
}
if(name.toLowerCase() === "detectionpicture.jpg")
{
if(overview1 == '')
{
overview1 = field;
}
else if(overview2 == '')
{
overview2 = field;
}
else
{
// do nothing else.
console.log("Couldn't send images to variable.");
}
}
})
.on('error', function(err) {
alert(err);
})
.on('end', function() {
// Once finished, send to ANPR data to function to handle data and insert to database. WORKS
// Call anpr function.
ANPR_ListenData(data, plateImg, overview1, overview2, function(result) {
if(result.Status > 0)
{
console.log("Accepted by: "+result.Example);
// reset var
data = '';
plateImg = '';
overview1 = '';
overview2 = '';
res.writeHead(200, {'content-type':'text/html'});
res.end();
}
});
});
});
server.listen(listenport, () => {
console.log('ANPR Server listening on port: ' + listenport);
});
Basically the images that are sent in the fields: licenseplatepicture.jpg etc I want to store them directly into my app image directory.
Unfortunately I have no control over how the chunks are sent to this server due to it being a network camera, I simply need to write a procedure.
The full request chunk is quite large so I will upload the file to OneDrive for you to glance at and understand the request.
Any help with this will be appreciated. I've tried everything I can possibly think of, but the file saves unreadable :(. I don't know where else to look or what else I can try, other than what I've already done & tried.
Request Txt File: https://1drv.ms/t/s!AqAIyFoqrBTO6hTwCimcHDHODqEi?e=pxJY00
Ryan.
I fixed this by using Busboy package instead of Formidable.
This is how my http listener looks like using Busboy.
var inspect = util.inspect;
var Busboy = require('busboy');
http.createServer(function(req, res) {
if (req.method === 'POST') {
//vars
var ref = Math.random().toString(36).substring(5) + Math.random().toString(36).substring(2, 15);;
var xml = '';
var parseXml = '';
var over1, over2 = '';
var i = 0;
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
if(filename.toLowerCase() === "licenseplatepicture.jpg")
{
var saveTo = config.App.ImageDir+"/"+ref+"_Plate.jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
if(filename.toLowerCase() === "detectionpicture.jpg")
{
i++;
var saveTo = config.App.ImageDir+"/"+ref+"_Front_"+i+".jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
file.on('data', function(data) {
if(filename.toLowerCase() === "anpr.xml")
{
xml += data;
}
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
});
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated, encoding, mimetype) {
console.log('Field [' + fieldname + ']: value: ' + inspect(val));
// No fields according to busboy
});
busboy.on('finish', function() {
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(xml, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Set parsed var
parseXml = result;
});
var images = '';
if(i = 2)
{
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"${ref}_Front_2.jpg"}`;
} else {
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"null"}`;
}
// Once parsed, send on to ANPR listen function.
ANPR_ListenData(ref, parseXml, images, function(result) {
if(result.Status == 1)
{
console.log('Data transfered for: '+parseXml.EventNotificationAlert.ANPR.licensePlate);
console.log('Accepted Camera: '+result.Example);
res.writeHead(200, { Connection: 'close', Location: '/' });
res.end();
}
});
});
req.pipe(busboy);
}
}).listen(7200, function() {
console.log('Listening for requests');
});
Hope this helps someone else in the future. Certainly caused me a lot of a wasted time.
Busboy was the better package to use when I was reading into it more, it makes more sense for what I was attempting to achieve.
Ryan :).
All the best.
I am writing a backup script that simply downloads all the blobs in all the blob containers of a specific Azure account.
The script uses async.js to make sure only so much threads can run at the same time so it doesn't overload the server. When I run this script it works fine, but when it hits large files it runs out of memory. I'm guessing the download runs faster than the disk can write, and it eventually fills up the in-memory buffer so badly that I run out of memory entirely, but debugging the exact cause has been impossible so far.
The specific function which appears to use a lot of memory is called as follows:
blobService.getBlobToStream(
containerName,
blob.name,
fs.createWriteStream(fullPath),
function(error) {
if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
console.log("Failed writing " + blob.name + " (" + error + ")");
callback();
}
else if(!error) { //Write the last modified date and finish the queue item silently
fs.writeFile(fullPath + ".date", blobLastModified, function(err)
{ if(err) console.log("Couldn't write .date file: " + err); });
callback();
}
});
Even a single 700MB download will easily fill up 1GB of memory on my side.
Is there any way around this? Am I missing a parameter which magically prevents the Azure SDK from buffering everything and the kitchen sink?
Full code:
#!/usr/bin/env node
//Requires
var azure = require('azure');
var fs = require('fs');
var mkdirp = require('mkdirp');
var path = require('path');
var async = require('async');
var maxconcurrency = 1; //Max amount of simultaneous running threads of getBlobsAndSaveThem() running through async.js.
var blobService = azure.createBlobService();
backupPrefix='/backups/azurebackup/' //Always end with a '/'!!
//Main flow of the script is near the bottom of the file.
var containerProcessingQueue = async.queue(
function getBlobsAndSaveThem(containerName) {
console.log(containerName); //DEBUG
blobService.listBlobs(containerName,
function(error, blobs) {
if(!error){
var blobProcessingQueue =
async.queue(function(index,callback) {
var blob = blobs[index];
console.log(blob); //DEBUG
var fullPath = backupPrefix + containerName + '/' + blob.name;
var blobLastModified = new Date(blob.properties['last-modified']);
//Only create if the directoy doesn't exist, since mkdirp fails if the directory exists.
if(!fs.existsSync(path.dirname(fullPath))){ //And do it sync, because otherwise it'll check 99999 times if the directory exists simultaneously, doesn't find it, then fails to create it 99998 times.
mkdirp.sync(path.dirname(fullPath), function(err) { console.log('Failed to create directory ' + path.dirname(fullPath) + " ("+ err + ")"); });
}
if(fs.existsSync(fullPath + ".date")){
if(blobLastModified == fs.readFileSync(fullPath + ".date").toString()) {
callback();
return; //If the file is unmodified, return. No this won't exit the program, because it's called within a function definition (async.queue(function ...))
}
}
blobService.getBlobToStream(
containerName,
blob.name,
fs.createWriteStream(fullPath),
function(error) {
if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
console.log("Failed writing " + blob.name + " (" + error + ")");
callback();
}
else if(!error) { //Write the last modified date and finish the queue item silently
fs.writeFile(fullPath + ".date", blobLastModified, function(err)
{ if(err) console.log("Couldn't write .date file: " + err); });
callback();
}
});
},maxconcurrency);
for(var blobindex in blobs){
blobProcessingQueue.push(blobindex);
} //Push new items to the queue for processing
}
else {
console.log("An error occurred listing the blobs: " + error);
}
});
},1);
blobService.listContainers(function(err, result){
for(var i=0;i<result.length;i++) {
containerProcessingQueue.push(result[i].name);
}
});
For all those now curious the variables for the start and end have changed. They are now just rangeStart and rangeEnd.
Here is the azure node documentation for more help
http://dl.windowsazure.com/nodestoragedocs/BlobService.html
One thing that you could possibly do is read only a chunk of data into stream instead of whole blob data, append that to the file and read next chunk. Blob Storage service supports that. If you look at the source code for getBlobToStream (https://github.com/WindowsAzure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js), you can specify from/to bytes in the options - rangeStartHeader and rangeEndHeader. See if that helps.
I have hacked some code which does just that (as you can see from my code, my knowledge about node.js is quite primitive :)). [Please use this code just to get an idea about how you can do chunked download as I think it still has some glitches]
var azure = require('azure');
var fs = require('fs');
var blobService = azure.createBlobService("account", "accountkey");
var containerName = "container name";
var blobName = "blob name";
var blobSize;
var chunkSize = 1024 * 512;//chunk size -- we'll read 512 KB at a time.
var startPos = 0;
var fullPath = "D:\\node\\";
var blobProperties = blobService.getBlobProperties(containerName, blobName, null, function (error, blob) {
if (error) {
throw error;
}
else {
blobSize = blob.contentLength;
fullPath = fullPath + blobName;
console.log(fullPath);
doDownload();
}
}
);
function doDownload() {
var stream = fs.createWriteStream(fullPath, {flags: 'a'});
var endPos = startPos + chunkSize;
if (endPos > blobSize) {
endPos = blobSize;
}
console.log("Downloading " + (endPos - startPos) + " bytes starting from " + startPos + " marker.");
blobService.getBlobToStream("test", blobName, stream,
{ "rangeStartHeader": startPos, "rangeEndHeader": endPos-1 }, function(error) {
if (error) {
throw error;
}
else if (!error) {
startPos = endPos;
if (startPos <= blobSize - 1) {
doDownload();
}
}
});
}
I am having a weird issue with a piece of sample code that I got here, the central part being this:
server.on('request', function(request, response) {
var file = fs.createWriteStream('copy.csv');
var fileSize = request.headers['content-length'];
var uploadedSize = 0;
request.on('data', function (chunk) {
uploadedSize += chunk.length;
uploadProgress = (uploadedSize/fileSize) * 100;
response.write(Math.round(uploadProgress) + "%" + " uploaded\n" );
var bufferStore = file.write(chunk);
console.log(bufferStore);
console.log(chunk);
if(!bufferStore)
{
request.pause();
}
});
file.on('drain', function() {
request.resume();
});
request.on('end', function() {
response.write('Upload done!');
response.end();
});
});
The problem is, the file copy.csv does not contain anything after the process is finished.
I tried to add file.end(); in the request.on('end'-callback, but it did not do the trick. However, if I add faulty code in said callback that causes an exception, the file is being written just fine (although this ofc can't be the final solution).
To notify the stream that there are no more chunks to be read, you can simply call your_stream.push(null). You can read more about streams and push(null) from the excellent substack's stream guide.
Try this structure:
var file = fs.WriteStream('copy.csv');
fileSize = request.headers['content-length'],
uploadedSize = 0;
request.on('readable', function () { // Node.js 0.10 (Streams2 interface)
var newData = this.read() || new Buffer(0); // Sometimes may come null
file.write(newData);
uploadedSize += newData.length;
response.write(Math.round((uploadedSize / fileSize) * 100) + "%" + " uploaded\n" );
});
request.on('end', function () {
response.write('Upload done!');
response.end();
file.end();
});
I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?
Here is what I tried:
var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';
var request = require('request'), fs = require('fs'), zlib = require('zlib');
request.get(url, function(err, res, file) {
if(err) throw err;
zlib.unzip(file, function(err, txt) {
if(err) throw err;
console.log(txt.toString()); //outputs nothing
});
});
[EDIT]
As, suggested, I tried using the adm-zip library and I still cannot make this work:
var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
if(err) throw err;
var zip = new ZipEntry();
zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
var text = zip.getData();
console.log(text.toString()); // fails
});
You need a library that can handle buffers. The latest version of adm-zip will do:
npm install adm-zip
My solution uses the http.get method, since it returns Buffer chunks.
Code:
var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
var AdmZip = require('adm-zip');
var http = require('http');
http.get(file_url, function(res) {
var data = [], dataLen = 0;
res.on('data', function(chunk) {
data.push(chunk);
dataLen += chunk.length;
}).on('end', function() {
var buf = Buffer.alloc(dataLen);
for (var i = 0, len = data.length, pos = 0; i < len; i++) {
data[i].copy(buf, pos);
pos += data[i].length;
}
var zip = new AdmZip(buf);
var zipEntries = zip.getEntries();
console.log(zipEntries.length)
for (var i = 0; i < zipEntries.length; i++) {
if (zipEntries[i].entryName.match(/readme/))
console.log(zip.readAsText(zipEntries[i]));
}
});
});
The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.
Update
This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.
var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
var AdmZip = require('adm-zip');
var request = require('request');
request.get({url: file_url, encoding: null}, (err, res, body) => {
var zip = new AdmZip(body);
var zipEntries = zip.getEntries();
console.log(zipEntries.length);
zipEntries.forEach((entry) => {
if (entry.entryName.match(/readme/i))
console.log(zip.readAsText(entry));
});
});
The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.
Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!
I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.
If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped
You can use the AMDZip (as #mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:
$ npm install git://github.com/cthackers/adm-zip.git
N.B. Assumption: the zip file contains only one file
var request = require('request'),
fs = require('fs'),
csv = require('csv')
NodeZip = require('node-zip')
function getData(tmpFolder, url, callback) {
var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()
var tempZipFileStream = fs.createWriteStream(tempZipFilePath)
request.get({
url: url,
encoding: null
}).on('end', function() {
fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {
var zip = new NodeZip(zipContent, { base64: true })
Object.keys(zip.files).forEach(function (filename) {
var tempFilePath = tmpFolder + new Date().getTime() + Math.random()
var unzipped = zip.files[filename].data
fs.writeFile(tempFilePath, unzipped, function (err) {
callback(err, tempFilePath)
})
})
})
}).pipe(tempZipFileStream)
}
getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {
if (err) {
return console.error('error: %s' + err.message)
}
var metadata = []
csv().fromPath(path, {
delimiter: '|',
columns: true
}).transform(function (data){
// do things with your data
if (data.NAME[0] === '#') {
metadata.push(data.NAME)
} else {
return data
}
}).on('data', function (data, index) {
console.log('#%d %s', index, JSON.stringify(data, null, ' '))
}).on('end',function (count) {
console.log('Metadata: %s', JSON.stringify(metadata, null, ' '))
console.log('Number of lines: %d', count)
}).on('error', function (error) {
console.error('csv parsing error: %s', error.message)
})
})
If you're under MacOS or Linux, you can use the unzip command to unzip from stdin.
In this example I'm reading the zip file from the filesystem into a Buffer object but it works
with a downloaded file as well:
// Get a Buffer with the zip content
var fs = require("fs")
, zip = fs.readFileSync(__dirname + "/test.zip");
// Now the actual unzipping:
var spawn = require('child_process').spawn
, fileToExtract = "test.js"
// -p tells unzip to extract to stdout
, unzip = spawn("unzip", ["-p", "/dev/stdin", fileToExtract ])
;
// Write the Buffer to stdin
unzip.stdin.write(zip);
// Handle errors
unzip.stderr.on('data', function (data) {
console.log("There has been an error: ", data.toString("utf-8"));
});
// Handle the unzipped stdout
unzip.stdout.on('data', function (data) {
console.log("Unzipped file: ", data.toString("utf-8"));
});
unzip.stdin.end();
Which is actually just the node version of:
cat test.zip | unzip -p /dev/stdin test.js
EDIT: It's worth noting that this will not work if the input zip is too big to be read in one chunk from stdin. If you need to read bigger files, and your zip file contains only one file, you can use funzip instead of unzip:
var unzip = spawn("funzip");
If your zip file contains multiple files (and the file you want isn't the first one) I'm afraid to say you're out of luck. Unzip needs to seek in the .zip file since zip files are just a container, and unzip may just unzip the last file in it. In that case you have to save the file temporarily (node-temp comes in handy).
Two days ago the module node-zip has been released, which is a wrapper for the JavaScript only version of Zip: JSZip.
var NodeZip = require('node-zip')
, zip = new NodeZip(zipBuffer.toString("base64"), { base64: true })
, unzipped = zip.files["your-text-file.txt"].data;
I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});
I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.
does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))
Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});
When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}
Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.
Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp