I'm writing an application that scrapes fan sites for characters as a practice exercise. Currently I have an array of URLs that I am looping through and scraping the data I want, then outputting this data to a output.json file to store for later. I am having issues with my formatting when writing to this file.
Maybe I should store my data differently, I am open to suggestions on best practices/other methods. I would just like this data accessible later.
server.js
var express = require('express');
var cheerio = require('cheerio');
var app = express();
var rp = require('request-promise');
var fsp = require('fs-promise');
app.get('/', function(req, res){
urls = [
'fansite.com/boss1', 'fansite.com/boss2'
];
function parse(html) {
var bosses = require('./output.json');
var $ = cheerio.load(html);
$('.page-header__title').filter(function () {
var data = $(this);
name = data.text();
bosses.name = name;
})
return bosses;
}
var append = file => content => fsp.appendFile(file, JSON.stringify(content, null, 2));
urls.forEach(function (url) {
rp(url)
.then(parse)
.then(append('output.json'))
.then(() => console.log('Success'))
.then(res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
});
})
app.listen('8081')
console.log('Running on port 8081');
exports = module.exports = app;
output.json
{
}{
"name": "Boss1"
}{
"name": "Boss2"
}
You're better off just modifying the in-memory javascript object, and then saving it all to the file in an overwrite / replace kind of approach, rather than appending to the file (unless you expect the file to become so huge that it breaks memory limits).
To do that, just maintain an in-memory copy of the data and then just write it out: fs.writeFile(fileName, JSON.stringify(content, null, 4));
Otherwise, you have to figure out how to insert the new object inside the old one, or risk making it invalid json.
Related
So the file uploaded is an excel file that sheetJS needs to read, otherwise it will show as {}.
app.post('/sendExcel', function(req, res) {
let data = req.body;
var workbook = sheetJS.read(data, {type: 'buffer'});
console.log(workbook.Sheets['Sheet1); //prints... "{ A1: { t: 's', v: '[object Object]' }, '!ref': 'A1' }"
let excel = workbook.Sheets['Sheet1']['A1']['v'][0]; //prints... "["
So I've tried various things including changing the type client side as I had problems with it being of type buffer. So now it works partially, but I still can't access the data in the sheet.
As an example, I used the file path instead here, and it's shown to work as normal.
app.get('/excel', function(err, res, data) {
var wb = sheetJS.readFile("data.xlsx");
let excel = wb.Sheets['Sheet1']['A1']['v'];
console.log(excel); //this prints "vehicle", which is what is supposed to happen, not "[".
res.send(excel)
});
I am supposed to get the excel data from the form upload. That's the issue. It is is now successful when sending to the db, but will not access the whole data. I believe I need to change it back to an array.
You can use:
var fileReader = new FileReader();
fileReader.readAsArrayBuffer(workbook);
But this will not run in app.js
Here is my other answer with client-side and server-side. It might be helpful to others.
Javascript Read Excel file on server with SheetJS
Don't use the file reader. Append the excel sheet to the form in the body normally.
Client side:
let excelInput = document.getElementById("fileToUpload");
//excelInput: this html element allows you to upload the excel sheet to it
let excelFile = excelInput.files[0];
let form = new FormData();
form.append("excel", excelFile);
fetch('/sendExcel', {method: "POST", body: form})
.then((data) => {
console.log('Success:', data);
})
.catch((error) => {
console.error('Error:', error);
});
Then use formidable server side.
Server side:
const sheetJS = require('xlsx');
const formidable = require('formidable');
app.post('/excel', function(req, res) {
let data = req.body;
const form = formidable({ multiples: true });
form.parse(req, (err, fields, files, next) => {
if (err) {
next(err);
return;
}
var f = files[Object.keys(files)[0]];
var workbook = sheetJS.readFile(f.path);
res.send(workbook);
});
});
So formidable has to be used otherwise it won't work. Then you can use sheetJS.readFile instead of sheetJS.read.
Recently I picked up Node.js + Express.js in order to pair them with Socket.io and make a real time chat application. The problem is that since I'm relatively inexperienced with Node.js and Express.js, I'm having some trouble figuring out where to put my logic and how to separate it in different files. Right now, the logic that creates my Socket.io namespaces is in the www file and I'm trying to figure out where to place it. Right now it looks like this:
www File:
var app = require('../app');
var http = require('http');
var server = http.createServer(app);
let io = require('socket.io')(server);
servers.forEach((server) => {
console.log(server.name)
io.of(server.endpoint).on('connection',(socket) => {
socket.on('messageToServer', (message) => {
let roomName = Object.keys(socket.rooms)[1]
let room = server.room.find((room) => {
return room.name == roomName
})
room.history.push(message)
io.of(server.endpoint).to(roomName).emit('messageToClient', message)
})
socket.on('joinRoom', (roomToJoin) => {
let roomToLeave = Object.keys(socket.rooms)[1]
socket.leave(roomToLeave)
socket.join(roomToJoin)
let room = server.room.find((room) => {
return room.name == roomToJoin
})
socket.emit('chatHistory', room.history)
})
})
});
What I tried is the following - I created a socket.js file and put it in a folder called utility, moved the code from www to the socket.js file and tried exporting the server from my www file to my socket.js file so that Socket.io can access it:
socket.js File:
let server = require('../bin/www').server
let io = require('socket.io')(server);
function createSocketServers() {
servers.forEach((server) => {
console.log(server.name)
io.of(server.endpoint).on('connection',(socket) => {
socket.on('messageToServer', (message) => {
let roomName = Object.keys(socket.rooms)[1]
let room = server.room.find((room) => {
return room.name == roomName
})
room.history.push(message)
io.of(server.endpoint).to(roomName).emit('messageToClient', message)
})
socket.on('joinRoom', (roomToJoin) => {
let roomToLeave = Object.keys(socket.rooms)[1]
socket.leave(roomToLeave)
socket.join(roomToJoin)
let room = server.room.find((room) => {
return room.name == roomToJoin
})
socket.emit('chatHistory', room.history)
})
})
});
}
module.exports = createSocketServers
www File:
var app = require('../app');
var http = require('http');
var server = http.createServer(app);
module.exports = server
require('../utility/socket').createSocketServers
As soon as I did that, my code stopped working, so I can only assume I haven't done it correctly, hence why I'm here asking for help. Thanks!
Everything looks fine except this line
require('../utility/socket').createSocketServers
When you do, module.exports = createSocketServers in socket.js file, you actually exporting the constructor.
If you run what you require, it should work;
require('../utility/socket')()
If you want to keep it like you do now, you have to export an object from socket.js file;
module.exports = { createSocketServers }
After exporting your module like above, you have to run it like below;
require('../utility/socket').createSocketServers()
I was just wondering how to add(push) data into an array via an express router endpoint. Suppose I have an array inside data/data.js directory and my router code look this:
const express = require('express');
const bodyParser = require('body-parser');
const productRouter = express.Router();
//! bring data
const { products } = require('../data/data');
productRouter.use(bodyParser.json());
productRouter
.route('/')
.post((req, res, next) => {
if (req.body !== null) {
//some logic
} else {
products.push(req.body);
res.statusCode = 200;
res.setHeader('Content-Type', 'application/json');
res.json(products);
}
} else {
//handle error
}
});
module.exports = productRouter;
When I involve POST method in the endpoint of my route then it push the new data and response with an updated array. But when I check my express file then it still the old array. Why I lost the data?
I heartily thank if anyone helps me to figure out this.
As #Aditya toke and #Andre Nuechter suggested I update my code like this:
.post((req, res, next) => {
if (req.body !== null) {
if (products.some((product) => product.id === req.body.id)) {
err = new Error('Product with id:' + req.body.id + ' already exit');
err.status = 404;
return next(err);
} else {
const data_path = path.join(__dirname, '../data/data.js');
fs.appendFileSync(data_path, JSON.stringify(req.body));
res.statusCode = 200;
res.setHeader('Content-Type', 'application/json');
res.json(products);
}
} else {
err = new Error('Body didnot contain product information');
err.status = 404;
return next(err);
}
});
But it pushes the new data like this:
exports.products = [
{
title: 'camera',
imageUrl: 'https://source.unsplash.com/gGgoDJRD2WM/300x300',
id: 1,
},
...
]
exports.orders = [];
{"title":"mens","imageUrl":"https://source.unsplash.com/1-nx1QR5dTE/300x300","id":"6"}
Which is not what I want. Is there any way to add this to the products array? Or any better approach?
As you mention about the saving data in the file,
that is only possible by using the filesystem to write data in the file,
I would recommend using the JSON extension file, as it's easy to parse and read.
You have to use the filesystem to write data in the file.
And to add further it can be utilised as a global variable all over the project.
There are multiple ways to play with the filesystem using node js.
https://nodejs.org/api/fs.html
Updated The Answer
#falamiw Follow these steps
1. Don't use data.js start using data.json
Structure inside data.json will be like this
{
products : [
{"title":"mens","imageUrl":"https://source.unsplash.com/1-nx1QR5dTE/300x300","id":"6"},
{"title":"women","imageUrl":"https://source.unsplash.com/1-nx1QR5dTE/300x300","id":"7"}
]
}
Code to make changes in this JSON file
const fs = require('fs');
let rawdata = fs.readFileSync('data.json');
let productArray = JSON.parse(rawdata);
// push changes to your array
productArray ['product'].push({ any thing you want to add })
After pushing object inside array now we will make changes in the data.json file using fileSystem.
let data = JSON.stringify(productArray );
fs.writeFileSync('data.json', data);
That's it now you can see the changes in the file.
I have not tested this code but I am sure it will work you just to debug and check its working.
The data is lost, because push does not alter the file on your disk.
To do that you need to use something like fs.writeFile.
I have a utility module that creates an instance of a multer-gridfs storage engine for uploading files to my Mongo database. I use this module inside of any API route that requires the need to upload files.
I need to be able to update the metadata property value with a unique identifier. More than likely this will be the mongoose _id of the user uploading the file, but for now I am not concerned with that aspect of it. I really just want to know if I can change the metadata property dynamically.
Here is the storage engine gridFs_upload_engine.js:
const mongoose = require('mongoose');
const path = require('path');
const crypto = require('crypto');
const multer = require('multer');
const GridFsStorage = require('multer-gridfs-storage');
const Grid = require('gridfs-stream');
//Init Upload Engine
let gfs;
//Global instance of the DB connection
const database = mongoose.connection;
const mongoDb = process.env.MONGODB_URI || process.env.MLAB_URL;
database.once('open', () => {
//Init Stream
gfs = Grid(database.db, mongoose.mongo);
gfs.collection('uploads');
});
//Create Storage Engine
const storage = new GridFsStorage({
url: mongoDb,
file: (res, file) => {
return new Promise((resolve, reject) => {
crypto.randomBytes(16, (err, buf) => {
if (err) {
return reject(err);
}
const filename = buf.toString('hex') + path.extname(file.originalname);
const fileInfo = {
filename: filename,
bucketName: 'uploads',
metadata: 'NEED TO UPDATE THIS'
};
resolve(fileInfo);
});
});
}
});
const uploadEngine = multer({ storage });
module.exports = {
uploadEngine,
gfs
};
Above you can see the metadata property that I need to be able to dynamically change with some undetermined unique identifier. Is it possible to do that with an exported file?
Here is how I am utilizing it inside of an API route:
const express = require('express');
const router = express.Router();
//Controllers
const upload_controller = require('../../controllers/uploader');
//Utilities
const upload = require('../../utils/gridFs_upload_engine');
const { uploadEngine } = upload;
//Upload Single File
router.post(
'/single',
uploadEngine.single('file'),
upload_controller.upload_single_file
);
//Upload Multiple Files
//Max file uploads at once set to 30
router.post(
'/multiple',
uploadEngine.array('file', 30),
upload_controller.upload_multiple_files
);
module.exports = router;
I pass the uploadEngine into the API route here, so that the route controller can use it, and that works with no issue. I am just having quite a time trying to figure out how to update metatdata dynamically and I am leaning towards my current implementation not allowing for that.
I don't know much about node and have now idea what multer-gridfs is but I can answer How can I pass options into an imported module?
You can export an function that returns another function. And you would import it like
const configFunction = require('nameoffile')
// this returns a functions with the config you want
const doSomethingDependingOnTheConfig = configFunction({...someConfig})
And in the file you are importing you would have a function returning another function like
const configFunction = ({...someConfig}) => (your, func) => {
// do what you want deppending on the config
}
module.exports = configFunction
I know this doesn't answer your question the way you want, but answer you question title and I hope this give you a better understanding of how to do what you want to do.
If this doesn't help, just let me know.
You would need to pass a parameter to the module gridFs_upload_engine.js and do the magic there.
An example could be:
In gridFs_upload_engine.js file:
function uploadEngine (id, file) {
// update what you want
}
module.exports = {
...
uploadEngine: uploadEngine
}
In your router:
const upload = require('../../utils/gridFs_upload_engine')
...
router.post('/single/:id', function(req, res, next) {
...
upload.uploadEngine(req.params.id, file)
next()
}, upload_controller.upload_single_file)
In other words, when you are exposing gfs and uploadEngine inside your module, you could instead expose a function that would receive the arguments needed to perform the upload.
I created a python script that parses a website(IMDB) and organizes it into a dataframe.
I also have a node.js app that allows me to find a variable (movie ID based on movie name in the code called pyvar) that I would include in the python script. So how can I include this variable that I get after running javascript app into python script, run the script and then send the result back to the node.js app? (that would be dataframe converted to lets say json)
Node.js app
var express = require("express")
var app = express()
var request = require("request")
app.set("view engine", "ejs")
app.get("/", function(req, res){
res.render("search")
})
app.get("/results", function(req, res){
var query = req.query.search
var url = "http://www.omdbapi.com/?s=" + query + "&apikey=thewdb"
request(url, function(error, response, body){
if(!error && response.statusCode == 200){
var data = JSON.parse(body)
res.render("results", {data: data})
var pyvar = data["Search"][0]["imdbID"]
}
})
})
app.listen(process.env.PORT, process.env.IP, function(){
console.log("Movie App has started!!!");
})
The python script in a nutshell looks like this:
url = 'website.org/' + pyvar + '/blah'
parse(url)
return dataframe
After that I would send the dataframe in some form back to the node.js app and display the results or even better if it would allow me to download the dataframe converted to xlsx but it might be too complicated.
You can use child_process spawn to execute your python script, as Felix Kling suggest in his comment, and return it result to your nodejs app. Then you could use a package like node-xlsx to transform the data to an Excel file.
Something like that:
app.js
// ...
const { spawn } = require('child_process');
const xlsx = require('node-xlsx');
// ...
app.get("/results", (req, res) => {
let query = req.query.search;
let url = "http://www.omdbapi.com/?s=" + query + "&apikey=thewdb";
request(url, (error, response, body) => {
if (!error && response.statusCode == 200) {
let data = JSON.parse(body);
let pyvar = data["Search"][0]["imdbID"];
// Call the python script
let pythonScript = spawn('./script.py', [pyvar]);
pythonScript.stdout.on('data', data => {
// Here transform the datatable to xls sheet
let xlsx = xlsx.build([{ name: "myXlsxSheet", data: data.toString() }])
// And send the file
res.end(new Buffer(xlsx, 'binary'));
});
}
})
})
// ...
script.py
#!/usr/bin/python
import sys
import pandas
pyvar = sys.argv[1]
# Here the script that parse the website
url = 'website.org/' + pyvar + '/blah'
data = parse(url)
print pandas.DataFrame(data)