I created a python script that parses a website(IMDB) and organizes it into a dataframe.
I also have a node.js app that allows me to find a variable (movie ID based on movie name in the code called pyvar) that I would include in the python script. So how can I include this variable that I get after running javascript app into python script, run the script and then send the result back to the node.js app? (that would be dataframe converted to lets say json)
Node.js app
var express = require("express")
var app = express()
var request = require("request")
app.set("view engine", "ejs")
app.get("/", function(req, res){
res.render("search")
})
app.get("/results", function(req, res){
var query = req.query.search
var url = "http://www.omdbapi.com/?s=" + query + "&apikey=thewdb"
request(url, function(error, response, body){
if(!error && response.statusCode == 200){
var data = JSON.parse(body)
res.render("results", {data: data})
var pyvar = data["Search"][0]["imdbID"]
}
})
})
app.listen(process.env.PORT, process.env.IP, function(){
console.log("Movie App has started!!!");
})
The python script in a nutshell looks like this:
url = 'website.org/' + pyvar + '/blah'
parse(url)
return dataframe
After that I would send the dataframe in some form back to the node.js app and display the results or even better if it would allow me to download the dataframe converted to xlsx but it might be too complicated.
You can use child_process spawn to execute your python script, as Felix Kling suggest in his comment, and return it result to your nodejs app. Then you could use a package like node-xlsx to transform the data to an Excel file.
Something like that:
app.js
// ...
const { spawn } = require('child_process');
const xlsx = require('node-xlsx');
// ...
app.get("/results", (req, res) => {
let query = req.query.search;
let url = "http://www.omdbapi.com/?s=" + query + "&apikey=thewdb";
request(url, (error, response, body) => {
if (!error && response.statusCode == 200) {
let data = JSON.parse(body);
let pyvar = data["Search"][0]["imdbID"];
// Call the python script
let pythonScript = spawn('./script.py', [pyvar]);
pythonScript.stdout.on('data', data => {
// Here transform the datatable to xls sheet
let xlsx = xlsx.build([{ name: "myXlsxSheet", data: data.toString() }])
// And send the file
res.end(new Buffer(xlsx, 'binary'));
});
}
})
})
// ...
script.py
#!/usr/bin/python
import sys
import pandas
pyvar = sys.argv[1]
# Here the script that parse the website
url = 'website.org/' + pyvar + '/blah'
data = parse(url)
print pandas.DataFrame(data)
Related
I know this has been gone over before but I've spent hours on this and can't seem to figure out how to post data to a node.js server... I've started the project using cloud9 and node.js so I know I have a good install and that I'm starting with a working site.
In swift I'm sending a post request like
func post (){
let url = URL(string: "https://the-game-stevens-apps.c9users.io/index.html/")!
var request = URLRequest(url: url)
request.setValue("application/x-www-form-urlencoded", forHTTPHeaderField: "Accept")
request.setValue("application/x-www-form-urlencoded", forHTTPHeaderField: "Content-Type")
request.httpMethod = "POST"
let postString = "name=henry&message=HelloWorld"
request.httpBody = postString.data(using: .utf8)
let task = URLSession.shared.dataTask(with: request) { data, response, error in
guard let data = data, error == nil else {
print("error=\(error)")
return
}
if let httpStatus = response as? HTTPURLResponse, httpStatus.statusCode != 200 { // check for http errors
print("statusCode should be 200, but is \(httpStatus.statusCode)")
print("response = \(response)")
}
let responseString = String(data: data, encoding: .utf8)
print("responseString = \(responseString)")
}
task.resume()
}
Then in my server.js file I have
var router = express();
router.use(express.urlencoded());
router.post('/index.html', function(req, res) {
var obj = {name:req.body.name,text:req.body.message};
});
I have plenty of experience with javascript but I'm a noob when it comes to node.js and have just been poking around with it, any help is really appreciated
The only part I can correct in your node.js code is
To get router you need to call express.Router();
const express = require('express');
const router = express.Router();
router.use((req,res,next) => {
console.log("/" + req.method, "host"+ req.host, "Params" + req.params);
next();
});
router.post('/index.html', (req, res) => {
const obj = {name:req.body.name,text:req.body.message};
//insert obj into database
});
Im trying to figure it out for past 6 hours, but Im out of ideas..
What Im trying to accomplish:
I want to display a JSON data that looks like this
movie {title: " xxxxx", seed: "number", url: "zzzzzzzzz"}
I want to display it on my Node server(via jade), but what I accomplished till now is to send it from the website to my node server via POST request using this code:
My JS script
var http = new XMLHttpRequest();
var url = "http://localhost:8080/";
var params = arr; <------ My JSON data
http.open("POST", url, true);
//Send the proper header information along with the request
http.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
http.onreadystatechange = function() {//Call a function when the state changes.
if(http.readyState == 4 && http.status == 200) {
console.log(http.responseText);
}
}
http.send(params);
After using above code in my google chrome developer tools on the website I actually have that data, I receive the JSON array in my node, here is my node code:
My app.js node server
const http = require("http");
const express = require('express');
const app = express();
const myParser = require('body-parser');
app.set('views', __dirname + '/views')
app.set('view engine', 'jade')
app.use(express.static(__dirname + '/public'))
app.use(myParser.urlencoded({ extended: false }));
app.use(myParser.json())
var allowCrossDomain = function (req, res, next) {
res.header('Access-Control-Allow-Origin', "*");
res.header('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE');
res.header('Access-Control-Allow-Headers', 'Content-Type');
next();
}
app.use(allowCrossDomain);
app.get('/', function (req, res, next) {
res.render('index');
})
app.get('/tl', function (req, res, next) {
res.render('tl');
})
app.post("/", function (req, res) {
response = {
first_name: req.body
};
console.log('SHOW ME BODY')
console.log(req.body);
res.send('You sent: this to Express');
});
app.listen(8080);
And this is what Im receiving in my node command prompt:
{ '[{"title":" SOME-MOVE-TITLE","seed":"NUMBER","url":"https://SOMEURLS.COM', etc. etc. etc.
And finally here is my layout.jade file
doctype
html
head
title Bolo Node Server
link(rel="stylesheet", type="text/css", href="stylesheet/style.css")
body
header
h1 My Site
block content
footer
p Running on node with Express, Jade and Stylus
And index.jade
extend layout
block content
p= 'Block content works'
script.
if req.body != undefined
div(id='data')= req.body
I really run out of ideas on how to display the json array Im receiving...help me out please
Update
My index.jade
extend layout
block content
p= 'Block content works'
div(id='data')
pre
test= jsonString
My app.js looks now like this:
app.get('/', function (req, res, next) {
res.render('index');
})
app.post("/", function (req, res) {
// Get string representation
var jsonString = JSON.stringify(req.body || {}); // use JSON.stringify(req.body || {}, null, 2) for formatted JSON
console.log(jsonString);
res.render('index', {test: jsonString});
//res.send('You sent: this to Express');
});
I see the data in my node command prompt, but I dont see it on my local website http://localhost:8080/ the div(id='data') is showing me empty.. nothing, how do I get the jsonString there?? I want it to show me the data on my local website..
**
UPDATE
**
I ended up just putting the data into the sqlite3 database and then retrieving the data via GET request and finally putting it into my jade template. I thought I can go around and not use sqlite3 but I couldnt figure out how.
When you say that you want to display the json, if you just want to see the contents of the json you can use res.json.
app.post("/", function (req, res) {
// Send back the request body if present or else, an empty object
res.json(req.body || {});
});
If you want it to be displayed inside a template, you can get a string representation of the json using JSON.stringify(req.body) and render that in your template by passing it to it as a local variable.
app.post("/", function (req, res) {
// Get string representation
var jsonString = JSON.stringify(req.body || {}); // use JSON.stringify(req.body || {}, null, 2) for formatted JSON
res.render('jsonView',{jsonString});
});
And in your template:
div(id='data')
pre
code = jsonString
You should pass the data in the template.
res.render('index', {data: 'data'});
And show it with:
data = data
p #{data}
First you should parse your incoming data, as is application/x-www-form-urlencoded. You'll need to JSON.parse req.body first and encode your response as json too
app.post("/", function (req, res) {
var response = try { JSON.parse(req.body) } catch(e) { console.error('Invalid Data') };
res.json(response || {});
});
You could also send your data as 'application/json' from you client JS and save receive a JSON directly to the req.body.
Hope it helps
UPDATE (if you want to append new data via async requests on the client)
In this post you can see the use of XmlHttpRequest with jquery $.ajax() which is basically the same concept of async requests after the DOM is rendered on your server.
Imagine the step 3 being your Jade rendered HTML
I ended up just putting the data into the sqlite3 database and then retrieving the data via GET request and finally putting it into my jade template. I thought I can go around and not use sqlite3 but I couldnt figure out how.
Here is the code
app.post("/", function (req, res) {
var jsonString = JSON.stringify(req.body || {});
db.serialize(function () {
var stmt = db.prepare("INSERT INTO movies (id, title, seed, url) VALUES (?,?,?,?)");
for (var i = 0; i < req.body.length; i++) {
var d = new Date();
var data = req.body;
var n = d.toLocaleTimeString();
stmt.run(i, req.body[i].title, req.body[i].seed, req.body[i].url);
}
stmt.finalize();
});
res.send('You sent: this to Express');
});
Retrieving the data from the database
app.get('/tl', function (req, res, next) {
db.all('select * from movies', function (err, rows) {
if (err)
return next(err);
var dataO = [];
rows.forEach(function (row) {
dataO.push(row);
})
res.render('tl', { dataO: dataO });
})
})
I'm writing an application that scrapes fan sites for characters as a practice exercise. Currently I have an array of URLs that I am looping through and scraping the data I want, then outputting this data to a output.json file to store for later. I am having issues with my formatting when writing to this file.
Maybe I should store my data differently, I am open to suggestions on best practices/other methods. I would just like this data accessible later.
server.js
var express = require('express');
var cheerio = require('cheerio');
var app = express();
var rp = require('request-promise');
var fsp = require('fs-promise');
app.get('/', function(req, res){
urls = [
'fansite.com/boss1', 'fansite.com/boss2'
];
function parse(html) {
var bosses = require('./output.json');
var $ = cheerio.load(html);
$('.page-header__title').filter(function () {
var data = $(this);
name = data.text();
bosses.name = name;
})
return bosses;
}
var append = file => content => fsp.appendFile(file, JSON.stringify(content, null, 2));
urls.forEach(function (url) {
rp(url)
.then(parse)
.then(append('output.json'))
.then(() => console.log('Success'))
.then(res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
});
})
app.listen('8081')
console.log('Running on port 8081');
exports = module.exports = app;
output.json
{
}{
"name": "Boss1"
}{
"name": "Boss2"
}
You're better off just modifying the in-memory javascript object, and then saving it all to the file in an overwrite / replace kind of approach, rather than appending to the file (unless you expect the file to become so huge that it breaks memory limits).
To do that, just maintain an in-memory copy of the data and then just write it out: fs.writeFile(fileName, JSON.stringify(content, null, 4));
Otherwise, you have to figure out how to insert the new object inside the old one, or risk making it invalid json.
I'm building my first node/express app and am following this tut.
I am at a point where I am trying to get all JSON data and put it in an array to be sent to the template and rendered. When I try to run the app via CLI, I get the following error:
Directory Structure
The data output at the var blogsurlall location
hellotest.js
var routes = require('./routes/index');
var express = require('express');
var app = express();
var request = require("request");
var blogsurlall = "https://[JSON export URL location configured in a Drupal 8 view]";
app.set('view engine','ejs');
var server = app.listen (2000, function(){ console.log('Waiting for you on port 2000'); });
/* Get all global blogs data */
request({
url: blogsurlall,
json: true
}, function (error, response, body) {
if (!error && response.statusCode === 200) {
blogsdata_all = body;
}
// Create blogs array for footer.
var blogs = [];
// Fill up the array with blogs.
blogsdata_all.blogs.forEach(function(item){
blogs = blogs.concat(item);
});
app.locals.blogsdata = blogs;
});
app.use('/', routes);
index.js
var express = require('express');
var routes = express.Router();
routes.get('/', function(req, res){ res.render('default',{title: 'Home', body: 'blogsdata'}); });
routes.get('/about-us', function(req, res){ res.send('<h1>Lucius Websystems</h1>Amsterdam, The Netherlands'); });
routes.get('/about/:name?', function(req, res){ var name = req.params.name; res.send('<h1>' +name +'</h1>About text'); });
/* GET Blog detail page. */
routes.get('/blog/:blogid', function(req, res, next) {
// Place json data in a var.
var blogsdata = req.app.locals.blogsdata;
// Create array.
var blogItem = [];
// Check and build current URL
var currentURL = '/blog/' + req.params.blogid;
// Lop through json data and pick correct blog-item based on current URL.
blogsdata.forEach(function (item) {
if (item.title == currentURL) {
blogItem = item;
}
});
if (blogItem.length == 0) {
// Render the 404 page.
res.render('404', {
title: '404',
body: '404'
});
} else {
// Render the blog page.
res.render('blog-detail', {
blog: blogItem
});
}
});
module.exports = routes;
From the CLI error, it appears no blog data is even returned to be read into the array.
I have carefully gone through the tutorial several times and I think there are steps that may be implied that I am missing.
Can someone please help me understand how to get the blog data so that it can be read into the array and output to my template?
Also open to troubleshooting suggestions in comments.
Thanks for reading!
The error is raising in this line:
blogsdata_all.blogs.forEach(function(item){
As the error says, blogs is undefined.
If there is an error in the request or status code isn't 200, the body is not assigned to the variable, but you are not finishing the execution, so the variable in that case would be undefined.
Other possible problem is the json received doesn't have blogs as key of the body.
Check this both things and let us know if you found the problem
I am trying to scrape a website using nodejs and it works perfectly on sites that do not require any authentication. But whenever I try to scrape a site with a form that requires username and password I only get the HTML from the authentication page (that is, if you would click 'view page source' on the authentication page it self, that is the HTML I get). I am able to get the desired HTML using curl
curl -d "username=myuser&password=mypw&submit=Login" URL
Here is my code...
var express = require('express');
var fs = require('fs'); //access to file system
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res){
url = 'myURL'
request(url, function(error, response, html){
// check errors
if(!error){
// Next, we'll utilize the cheerio library on the returned html which will essentially give us jQuery functionality
var $ = cheerio.load(html);
var title, release, rating;
var json = { title : "", release : "", rating : ""};
$('.span8 b').filter(function(){
// Let's store the data we filter into a variable so we can easily see what's going on.
var data = $(this);
title = data.first().text();
release = data.text();
json.title = title;
json.release = release;
})
}
else{
console.log("Error occurred: " + error);
}
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err){
console.log('File successfully written! - Check your project directory for the output.json file');
})
res.send('Check your console!')
})
})
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
I have tried the following...
var request = require('request',
username:'myuser',
password:'mypw');
This just returns the authentication page's HTML
request({form: {username:myuser, password:mypw, submit:Login}, url: myURL}, function(error, response, html){
...
...
...
}
This also just returns the authentication page's HTML
So my question is how do I achieve this using nodejs?
you shouldn't use .get but .post and put the post param (username and password) in your call
request.post({
headers: {'content-type' : 'application/x-www-form-urlencoded'},
url: url,
body: "username=myuser&password=mypw&submit=Login"
}, function(error, response, html){
//do your parsing...
var $ = cheerio.load(html)
});