send binary response in node from phantomjs child process - javascript

I have created a node endpoint to create rasterised version for my svg charts.
app.post('/dxexport', function(req, res){
node2Phantom.createPhantomProcess(req,res);
});
My node to phantom function uses spawn to run phantomjs
var spawn = require('child_process').spawn;
exports.createPhantomProcess = function(req,res){
var userRequest = JSON.stringify(req.body);
var bin = "node_modules/.bin/phantomjs"
var args = ['./dxexport/exporter-server.js', userRequest, res];
var cspr = spawn(bin, args);
cspr.stdout.on('data', function (data) {
var buff = new Buffer(data);
res.send(data);
});
cspr.stderr.on('data', function (data) {
data += '';
console.log(data.replace("\n", "\nstderr: "));
});
cspr.on('exit', function (code) {
console.log('child process exited with code ' + code);
process.exit(code);
});
};
when rendering is completed and file is successfully created I call the renderCompleted function inside phantomjs:
var renderCompleted = function (parameters) {
var exportFile = fileSystem.open(parameters.exportFileName, "rb"),
exportFileContent = exportFile.read();
parameters.response.statusCode = 200;
parameters.response.headers = {
"Access-Control-Allow-Origin": parameters.url,
"Content-Type": contentTypes[parameters.format],
"Content-Disposition": "attachment; fileName=" + parameters.fileName + "." + parameters.format,
"Content-Length": exportFileContent.length
};
parameters.response.setEncoding("binary");
parameters.response.write(exportFileContent);
/////// somehow send exportFileContent as node res object for download \\\\\\\\
exportFile.close();
parameters.format !== "svg" && fileSystem.remove(parameters.exportFileName);
for (var i = 0; i < parameters.filesPath.length; i++)
fileSystem.remove(parameters.filesPath[i]);
parameters.filesPath = [];
parameters.response.close()
};
this response is passed from nodejs however apparently this code is calling phantomjs methods so I get errors like
TypeError: 'undefined' is not a function (evaluating 'parameters.response.setEncoding("binary")')
How can I send the binary file response somehow to the node function so it can be sent with my node server to the user?
Any help is appreciated.

Ok after some struggle here is the working solution if someone stumbles on this post.
As Artjom B. mentioned I found that the easiest way was delegate the rendering and file creation of the visualisation to phantomjs. Then send all the parameters related to those operations once done through the console.
Also updated the answer based on #ArtjomB.'s advice to wrap the console message sent in a unique beginning and end string so the risk of the other possible future outputs being mistaken for the intended rendered file object is mitigated.
var renderCompleted = function (parameters) {
console.log("STRTORNDRD" + JSON.stringify(parameters) + "ENDORNDRD");
};
This then gets picked up by stdout and usable like this:
exports.exportVisual = function (req, res) {
var userRequest = JSON.stringify(req.body);
var bin = "node_modules/.bin/phantomjs"
var args = ['./dxexport/exporter-server.js', userRequest, res];
var cspr = spawn(bin, args);
var contentTypes = {
pdf: "application/pdf",
svg: "image/svg+xml",
png: "image/png"
};
cspr.stdout.on('data', function (data) {
var buff = new Buffer(data).toString('utf8');
var strData = buff.match(new RegExp("STRTORNDRD" + "(.*)" + "ENDORNDRD"));
if (strData) {
var parameters = JSON.parse(strData[1]);
var img = fs.readFileSync(parameters.exportFileName);
var headers = {
"Access-Control-Allow-Origin": parameters.url,
"Content-Type": contentTypes[parameters.format],
"Content-Disposition": "attachment; fileName=" + parameters.fileName + "." + parameters.format,
"Content-Length": img.length
};
res.writeHead(200, headers);
res.end(img, 'binary');
// delete files after done
if (parameters.format != "svg") {
fs.unlink(parameters.exportFileName);
}
for (var i = 0; i < parameters.filesPath.length; i++)
fs.unlink(parameters.filesPath[i]);
// done. kill it
cspr.kill('SIGINT');
}
});
cspr.stderr.on('data', function (data) {
data += '';
console.log(data.replace("\n", "\nstderr: "));
});
cspr.on('exit', function (code) {
console.log('child process exited with code ' + code);
process.exit(code);
});
};

Related

Client request to server

Client function is requesting for data from Server. I imported this function in other file where I need it to use. It is working fine. But i don't understand why I am receiving on command prompt "Undefined". I have commented all console.log but still it is coming. I'm not sure if export/import has problem?
Here is my code:
// tstReq.js
function getData(iccid) {
toString(iccid);
var http = require('http');
var jasParseData;
var options = {
host: 'demo8620001.mockable.io',
port: 80,
path: '/Api3',
method: 'get'
};
http.request(options, function(res) {
//console.log('STATUS: ' + res.statusCode);
//console.log('HEADERS: ' + JSON.stringify(res.headers));
res.setEncoding('utf8');
res.on('data', function (chunk) {
//console.log('BODY: ' + chunk);
josParseData= JSON.parse(chunk);
for(i = 0, len = Object.keys(josParseData.iccid[i]).length; i<=len; i++) {
//console.log('JSON.parse:',josParseData.iccid[i]);
//console.log("iccid: ",iccid);
if (josParseData.iccid[i] === iccid) { // Only printed match iccid
console.log('JSON.parse:',josParseData.iccid[i]);
console.log("iccid: ",iccid);
}
if (josParseData.iccid[i] === iccid) {
console.log("Valid Jasper", i+1);
console.log('\n');
}
else{
// console.log ("Invlid Jasper");
}
//console.log('\n');
}
//console.log('\n');
});
}).end();
};
module.exports = getData;
Here is code where I am using exported function:
const fs = require('fs');
var parse = require('csv-parse');
var validateICCID = require('./funcValidateId.js');
var getData = require('./tstReq.js');
fs.createReadStream('iccid2.csv')
.pipe(parse({delimiter: ':'}))
.on('data',function(csv) {
csvrow= csv.toString();
console.log('\n');
console.log(getData(csvrow));
console.log('\n');
});
You're probably getting undefined because the function getData doesn't have a return statement. It doesn't mean your function is broken, it's just not directly returning a result.
If you want to log the result of getData, you'll need to return http.request, and you'll need to resolve the http.request by returning in the callback.
Also, I noticed you declared var jasParseData you probably meant josParseData.

Node.js HTTP - TypeError: Header name must be a valid HTTP Token

when I'm trying to set the header in the following way ,it's working absolutely fine.
response.setHeader('Content-Type', 'application/json');
but when I'm trying to add variable instead of exact header name/value in the following way, it's showing error :-(
response.setHeader(result.headers);
if you console.log("headers -> " + result.header) the result would be the same.
headers -> 'Content-Type', 'application/json'
following are the exact error I'm getting , not able to figure out how to get around it.
_http_outgoing.js:487
throw new TypeError(`Header name must be a valid HTTP Token ["${name}"]`);
^
TypeError: Header name must be a valid HTTP Token ["'Content-Type', 'application/json'"]
at validateHeader (_http_outgoing.js:487:11)
at ServerResponse.setHeader (_http_outgoing.js:498:3)
at C:\service-mocker\src\main\app.js:54:22
at FSReqWrap.readFileAfterClose [as oncomplete] (fs.js:511:3)
Below is whole code I'm trying to implement :
var http = require('http');
var fs = require('fs');
var path = require('path');
var repl = require('repl');
var map={};
var key;
var value;
//create a server object:
var server = http.createServer(function(request, response) {
fs.readFile("./resources/FileData1.txt", function(err, data) {
if(err) throw err;
content = data.toString().split(/(?:\r\n|\r|\n)/g).map(function(line){
return line.trim();
}).filter(Boolean)
var result = processFile(content);
console.log("url -> " + result.url);
console.log("status -> " + result.status);
console.log("headers -> " + result.headers);
console.log("body -> " + result.body);
function objToString (obj) {
var str = '';
for (var p in obj) {
if (obj.hasOwnProperty(p)) {
str += obj[p] + '\n';
}
}
return str;
}
function processFile(nodes) {
nodes.forEach(function(node) {
if(node.startsWith("//")){
key = node.substring(2, node.length-2).toLowerCase().trim();
return;
}
else{
value = node;
}
// map[key] = value;
if(key in map){
map[key].push(value);
}else{
map[key]= [value];
}
});
return map;
// console.log(map);
}
if(request.url == result.url ){
response.setHeader(result.headers);
// response.setHeader('Content-Type', 'application/json');
response.write( objToString(result.body) );
response.statuscode = parseInt( result.status );
response.end();
}else {
// response.writeHead(404, {"Content-Type": "text/html"});
response.end("No Page Found");
}
});
});
var port = process.env.PORT || 8080;
server.listen(port, function() {
console.log('Listening on port ' + port);
});
It looks like result.headers is returning a single string, but response.setHeader needs two arguments: the header name and the header value.
Based on the error message, you're doing the equivalent of:
response.setHeader("'Content-Type', 'application/json'");
Instead, you need to split out the header into key and value:
//n.b. make sure that your result only has the one header!
var headerSplit = result.headers.split(',');
var headerKey = headerSplit[0];
var headerVal = headerSplit[1];
response.setHeader(headerKey, headerVal);
If you dont want to set headers one at a time with response.setHeader, you should use response.writeHead, which will write the status code and your headers into the response.
response.writeHead(200, headers)
Here headers must be a JavaScript object.
Replace
response.setHeader(result.headers);
response.statuscode = parseInt( result.status );
with
response.writeHead(parseInt(result.status), parseHeaders(result.headers));
where parseHeaders is a function that parses result.headers into an object.
function parseHeaders(headers) {
var o = {};
headers.forEach(header => {o[header.split(',')[0]] = header.split(',')[1]});
return o;
}
In this answer I've assumed result.headers is formatted like so
[
"header1, header1value",
"header2, header2value"
]
Template literals use a back-tick not quotes.

Nodejs with Phantom-Cluster just stops

I'm running nodejs, not as a webserver, but from the command line against a pretty heavily modified version of the example.js which comes with the phantom-cluster package. Server is Ubuntu 13.10 in an AWS instance.
My goal is to "ping" more than 64000 urls to test for 404 or 500 http errors. If there is an error, then log that url with the error for later processing.
Here is my code:
(function() {
var WEBSITES, cluster, enqueueRequests, main, phantomCluster;
var fs = require('fs');
phantomCluster = require("./index");
cluster = require("cluster");
WEBS = [];
function loadUrls(callback)
{
console.log("starting loaded");
var fs = require('fs');
var urls = [];
fs.readFile("/home/ubuntu/phantom-cluster/theurls.txt", 'utf8', function (err, data)
{
if (err) throw err;
var myArray = data.split("\n");
for(i=0;i<myArray.length;i++)
{
urls.push(myArray[i]);
}
callback(null,urls);
})
}
enqueueRequests = function(engine)
{
fulfilled = 0;
loadUrls(function(err,WEBS)
{
console.log(">>" + WEBS.length + " urls to process");
var enqueuer, i, key, _i, _results;
enqueuer = function(request)
{
var item;
item = engine.enqueue(request);
item.on("timeout", function()
{
fs.appendFile("/home/ubuntu/error_log.log", "TIMEOUT: " + request + "\r\n", function (err) {});
});
return item.on("response", function()
{
fulfilled++;
console.log(fulfilled);
});
};
_results = [];
for (i = i = 0;i < 1; i++)
{
_results.push((function()
{
var _results1;
_results1 = [];
for(x=0;x<WEBS.length;x++)
{
_results1.push(enqueuer(WEBS[x]));
}
return _results1;
})());
}
return _results;
});
};
main = function()
{
var engine;
engine = phantomCluster.createQueued(
{
workers: 20,
workerIterations: 1,
phantomBasePort: 54321
});
if (cluster.isMaster)
{
enqueueRequests(engine);
}
engine.on("queueItemReady", function(url)
{
var _this = this;
var retVal;
urlArray = url.split("|");
var phantom = this.ph;
var curPage = phantom.createPage(function(page)
{
page.set('settings.loadImages', false);
page.set('settings.javascriptEnabled', false);
page.set('settings.resourceTimeout', 5000);
page.set('settings.userAgent','Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
page.set('onError', function(msg, trace)
{
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length)
{
msgStack.push('TRACE:');
trace.forEach(function(t)
{
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
});
page.set('onResourceReceived', function(response)
{
if((response.status == "404") || (response.status == "500"))
{
myUrl = decodeURI(response.url);
if(myUrl == urlArray[0])
{
retVal = response.status + "|" + url;
fs.appendFile("/home/ubuntu/error_log.log", response.status + "|" + url + "\r\n", function (err) {});
return retVal;
}
}
});
page.open(urlArray[0], function(status)
{
_this.next(); // _this is a PhantomQueuedClusterClient object
return _this.queueItemResponse(status);
});
});
});
return engine.start();
};
main();
}).call(this);
The file which is referenced as index.js is here:
https://github.com/dailymuse/phantom-cluster/blob/master/index.js
and I have not modified it at all.
This works great, and sparks up 20 worker processes which go out and get the initial response code for the queued urls.
Here is the problem:
After processing anywhere from 960-990 urls, the whole thing just stops. no error code, no nothing.
I've tried everything I can think of from some sort of node timeout, to an issue with a given url to banging my head against my desk. The first two would return an error when I create a test for it. The third just makes my head hurt.
Anyone have any help or experience working with this?
EDIT I made an update to the code and added the on.response callback and then called the nextTick method to remove the item from the queue. Still have the same issue.
Have you taken a look at link-crawler? It uses phantom-cluster and prerender to do almost exactly what you're looking for.
If all you're looking to do is check HTTP status codes, you don't need a headless browser to do that. Node can do that on it's own using http.request() or something that utilizes promises like request-promise.
Unless you're needing to verify something in the rendering of the pages that you're crawling, there's no need to render the page in a browser, just make HTTP calls to the URLs and introspect their statuses.

Node.js script would break when requesting HTTP responses from a site that does not exist

Using Node.js, when one requests a HTTP response, in optimal circumstances, the request comes back with a HTTP response.
However, sometimes the request breaks because the site, for example, has a 404 code, or the site does not exist at all. When requesting a batch of URLs, if there is a 404 code on, say, the 200th URL out of 1000 URLs requested, the entire script breaks. Here is my code:
var hostNames = ['www.gizmodo.com','www.sitethatdoesnotexist123.com','www.google.com'];
for (i; i < hostNames.length; i++){
var options = {
host: hostNames[i],
path: '/'
};
(function (i){
http.get(options, function(res) {
var obj = {};
obj.url = hostNames[i];
obj.statusCode = res.statusCode;
obj.headers = res.headers;
db.scrape.save(obj);
}).on('error',function(e){
console.log("Error Site: " + hostNames[i]);
});
})(i);
};
Is there a way, that for example, if the site does not exist, I simply skip to the next URL, instead of having the script break?
EDIT: Fixed. Thanks user DavidKemp
Use a try/catch block to catch any errors that might occur, and then continue on from there.
For example:
var hostNames = ['www.gizmodo.com','www.sitethatdoesnotexist123.com','www.google.com'];
//moved the function out so we do not have to keep redefining it:
var get_url = function (i){
http.get(options, function(res) {
var obj = {};
obj.url = hostNames[i];
obj.statusCode = res.statusCode;
obj.headers = res.headers;
console.log(JSON.stringify(obj, null, 4));
})
};
for (i; i < hostNames.length; i++){
var options = {
host: hostNames[i],
path: '/'
};
try {
get_url(i);
}
catch(err){
//do something with err
}
};
You need to bind an error handler to your request. I also cleaned up the code a bit.
hostNames.forEach(function(hostname), {
var req = http.get({host: hostName}, function(res) {
var obj = {
url: hostName,
statusCode: res.statusCode,
headers: res.headers
};
console.log(JSON.stringify(obj, null, 4));
});
req.on('error', function(err){
console.log('Failed to fetch', hostName);
});
});
You can use uncaughtException event. this let script run even after exception. link
process.on('uncaughtException', function(err) {
console.log('Caught exception: ' + err);
});
var hostNames = ['www.gizmodo.com','www.sitethatdoesnotexist123.com','www.google.com'];
for (i; i < hostNames.length; i++){
var options = {
host: hostNames[i],
path: '/'
};
(function (i){
http.get(options, function(res) {
var obj = {};
obj.url = hostNames[i];
obj.statusCode = res.statusCode;
obj.headers = res.headers;
db.scrape.save(obj);
}).on('error',function(e){
console.log("Error Site: " + hostNames[i]);
});
})(i);
};
Added a callback for when there's an error. Logs the site that returns an error on console. This error is usually triggered by a 404 or request time is too long.
The full docs are at http://nodejs.org/api/http.html#http_http_get_options_callback at the time of writing. loganfsmyth's answer provides a useful example.

Download large file with node.js avoiding high memory consumption

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});
I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.
does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))
Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});
When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}
Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.
Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp

Categories

Resources