I want to set up a server that scrapes data off an existing API, cleans it up and outputs it to my own server where I can manipulate the data further.
So far I've got all the pieces working.
My server runs.
I can scrape the data into the correct JSON format
I have a function that runs when a POST https request is called that writes JSON to my database.
Now I need to connect the two.
There are 3 main components:
the server.js file
The Post controller & schema file. This contains the function that handles the http request and constructs the object and writes to the database.
I have a separate file that contains a bunch of functions that scrape the data and prepare it in the proper JSON format.
My problem is getting scraping file to run. At the moment to test it I just run Node filename.js it worked fine when I was outputting the json to console.log. However now I've written in the http request. the problem is, when I run the server to open the http endpoint, I can't run the file.js that contains the scraper.
Essentially I want the scraper to run on a schedule, and send the data to the endpoint I've set up on the server.
Is there a way to replicate calling node filename.js in code? As per my understanding when you run node filename.js it just runs down the file, executing code as it finds it. Is there a way to do this? Or do I need to call each function 1 by 1 from my server file? Or do I just encapsulate the whole js file in a master function (my worry would then be that variables become siloed)?
I've posted the code below for reference:
server.js
require("dotenv").config(); // ALLOWS ENVIRONMENT VARIABLES TO BE SET ON PROCESS.ENV SHOULD BE AT TOP
const express = require("express");
const app = express();
const postRoutes = require("./routes/postRoutes.js");
const path = require('path');
// Middleware
app.use(express.json()); // parse json bodies in the request object
// Redirect requests to endpoint starting with /posts to postRoutes.js
app.use("/posts", postRoutes);
// Global Error Handler. IMPORTANT function params MUST start with err
app.use((err, req, res, next) => {
console.log(err.stack);
console.log(err.name);
console.log(err.code);
res.status(500).json({
message: "Something went rely wrong",
});
});
// Listen on pc port
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(`Server running on PORT ${PORT}`));
const battlesGetData = require(path.resolve(__dirname, "./battlesgetData.js")); // is this right?!?
battlesGetData.js
const fetch = require("node-fetch");
const postController = require("./controllers/postControllers");
//const fs = require('fs');
const distinct = (value, index, self) => {
return self.indexOf(value) === index;
}
async function getBattleHistory(player = '', data = {}) {
const battleHistory = await fetch('https://api2.splinterlands.com/battle/history?player=' + player)
.then((response) => {
if (!response.ok) {
throw new Error('Network response was not ok');
}
return response;
})
.then((battleHistory) => {
return battleHistory.json();
})
.catch((error) => {
console.error('There has been a problem with your fetch operation:', error);
});
return battleHistory.battles;
}
const extractGeneralInfo = (x) => {
return {
created_date: x.created_date ? x.created_date : '',
match_type: x.match_type ? x.match_type : '',
mana_cap: x.mana_cap ? x.mana_cap : '',
ruleset: x.ruleset ? x.ruleset : '',
inactive: x.inactive ? x.inactive : ''
}
}
const extractMonster = (team) => {
const monster1 = team.monsters[0];
const monster2 = team.monsters[1];
const monster3 = team.monsters[2];
const monster4 = team.monsters[3];
const monster5 = team.monsters[4];
const monster6 = team.monsters[5];
return {
summoner_id: team.summoner.card_detail_id,
summoner_level: team.summoner.level,
monster_1_id: monster1 ? monster1.card_detail_id : '',
monster_1_level: monster1 ? monster1.level : '',
monster_1_abilities: monster1 ? monster1.abilities : '',
monster_2_id: monster2 ? monster2.card_detail_id : '',
monster_2_level: monster2 ? monster2.level : '',
monster_2_abilities: monster2 ? monster2.abilities : '',
monster_3_id: monster3 ? monster3.card_detail_id : '',
monster_3_level: monster3 ? monster3.level : '',
monster_3_abilities: monster3 ? monster3.abilities : '',
monster_4_id: monster4 ? monster4.card_detail_id : '',
monster_4_level: monster4 ? monster4.level : '',
monster_4_abilities: monster4 ? monster4.abilities : '',
monster_5_id: monster5 ? monster5.card_detail_id : '',
monster_5_level: monster5 ? monster5.level : '',
monster_5_abilities: monster5 ? monster5.abilities : '',
monster_6_id: monster6 ? monster6.card_detail_id : '',
monster_6_level: monster6 ? monster6.level : '',
monster_6_abilities: monster6 ? monster6.abilities : ''
}
}
let battlesList = [];
usersToGrab = ["rus48-bot", "sbalani"]
const battles = usersToGrab.map(user =>
getBattleHistory(user)
.then(battles => battles.map(
battle => {
const details = JSON.parse(battle.details);
if (details.type != 'Surrender') {
if (battle.winner && battle.winner == battle.player_1) {
const monstersDetails = extractMonster(details.team1)
const info = extractGeneralInfo(battle)
return {
...monstersDetails,
...info,
battle_queue_id: battle.battle_queue_id_1,
player_rating_initial: battle.player_1_rating_initial,
player_rating_final: battle.player_1_rating_final,
winner: battle.player_1,
}
} else if (battle.winner && battle.winner == battle.player_2) {
const monstersDetails = extractMonster(details.team2)
const info = extractGeneralInfo(battle)
return {
...monstersDetails,
...info,
battle_queue_id: battle.battle_queue_id_2,
player_rating_initial: battle.player_2_rating_initial,
player_rating_final: battle.player_2_rating_final,
winner: battle.player_2,
}
}
}
})
).then(x => battlesList = [...battlesList, ...x])
)
Promise.all(battles).then(() => {
const cleanBattleList = battlesList.filter(x => x != undefined)
fetch("http://localhost:3000/posts/", {
method: "post",
body: cleanBattleList,
headers: {"Content-Type": "application/json"}
})
.then(json => console.log(json))
.catch(err => console.log(err))
/* fs.writeFile(`data/history.json`, JSON.stringify(cleanBattleList), function (err) {
if (err) {
console.log(err);
}
}); */
});
This is the POST function that gets called
exports.createNewPost = async (req, res, next) => {
/*
let { summoner_id, summoner_level,
monster_1_id, monster_1_level, monster_1_abilities,
monster_2_id, monster_2_level, monster_2_abilities,
monster_3_id, monster_3_level, monster_3_abilities,
monster_4_id, monster_4_level, monster_4_abilities,
monster_5_id, monster_5_level, monster_5_abilities,
monster_6_id, monster_6_level, monster_6_abilities,
created_date, match_type, mana_cap, ruleset, inactive,
battle_queue_id, player_rating_initial, player_rating_final, winner
} = req.body; // using postman this is what allows us to post JSON
let post = new PostBattle(summoner_id, summoner_level,
monster_1_id, monster_1_level, monster_1_abilities,
monster_2_id, monster_2_level, monster_2_abilities,
monster_3_id, monster_3_level, monster_3_abilities,
monster_4_id, monster_4_level, monster_4_abilities,
monster_5_id, monster_5_level, monster_5_abilities,
monster_6_id, monster_6_level, monster_6_abilities,
created_date, match_type, mana_cap, ruleset, inactive,
battle_queue_id, player_rating_initial, player_rating_final, winner); // the title & body defined in the previous line taken from the JSON are now deposited here.
*/
let json = req.body;
for (var obj in json) {
console.log(obj + ": " + json[obj]);
let post = new PostBattle(json[obj].summoner_id, json[obj].summoner_level,
json[obj].monster_1_id, json[obj].monster_1_level, json[obj].monster_1_abilities,
json[obj].monster_2_id, json[obj].monster_2_level, json[obj].monster_2_abilities,
json[obj].monster_3_id, json[obj].monster_3_level, json[obj].monster_3_abilities,
json[obj].monster_4_id, json[obj].monster_4_level, json[obj].monster_4_abilities,
json[obj].monster_5_id, json[obj].monster_5_level, json[obj].monster_5_abilities,
json[obj].monster_6_id, json[obj].monster_6_level, json[obj].monster_6_abilities,
json[obj].created_date, json[obj].match_type, json[obj].mana_cap, json[obj].ruleset, json[obj].inactive,
json[obj].battle_queue_id, json[obj].player_rating_initial, json[obj].player_rating_final, json[obj].winner);
console.log(post);
//let post = new PostBattle(json);
post = await post.save();
console.log("a post is happening");
}
I've set up a route to upload a .csv file, validate the data received, check that none of the rows exist in the database and finally upload the data once all validation checks have been performed.
Below is the route in question:
...
const Customer = require('../models/Customer');
const fs = require('fs');
const multer = require('multer');
const csv = require('fast-csv');
router.post('/customers/csv', upload.single('file'), async (req, res) => {
const errors = [];
const fileRows = [];
const existRows = [];
const count = 1;
fs.createReadStream(req.file.path)
.pipe(csv.parse({ headers: true }))
.validate(data =>
// Data validation for each field...
)
.on('error', error => console.error(error))
.on('data-invalid', (row, rowNumber) =>
{
errors.push(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`)
console.log(`Invalid [rowNumber=${rowNumber}] [row=${JSON.stringify(row)}]`)
}
)
.on('data', async function (row) {
if(await Customer.exists({$or: [
{srn: row.srn},
{telephone: row.telephone},
{claim_id: row.claim_id},
{receiver_id: row.receiver_id}
]})) {
// This doesn't store data so long as I'm using async/await
// but I need the await for the mongoose query
existRows.push(row)
} else {
fileRows.push(row) // This doesnt work so long as I'm using async
}
count++;
})
.on('end', (rowCount) => {
fs.unlinkSync(req.file.path);
if(errors.length !== 0) {
req.flash(
'error_msg',
`Upload failed! Parsed ${rowCount} rows with ${errors.length} bad rows at rows:`
)
return res.redirect('/admin/customers/csv')
}
if(existRows.length !== 0) {
req.flash(
'error_msg',
`Error: ${existRows.length} rows already exist. Kindly edit and try again.`
)
return res.redirect('/admin/customers/csv')
}
fileRows.forEach(async customer => {
//Execute an insertion or save
})
})
})
Unfortunately it would seem I can't access the existRows or fileRows data I store outside of the on.('data'...) portion and use them in the on.('end'...) portion.
I assume it must be an issue with how I've set up my async/await in there?
I'm uploading files from the browser via a multipart request to a GraphQL-API which is powered by graphql-yoga which is powered by express.
Now I want to forward this exact same request body to another GraphQL-API.
const fetch = require('node-fetch');
async passThrough(args, opts) {
const { body, getRawBody, headers, method } = opts.request;
var rawBody;
if (body.files && body.files.length) {
rawBody = await getRawBody;
} else {
rawBody = typeof body == 'string' ? body : JSON.stringify(body)
}
let options = {
body: rawBody,
method, headers
};
var res = await fetch(otherApiUrl, options).then((res) => {
return res.json();
});
return res;
}
In this function I get the body as an object. But it includes "files" as promises which I can't simply forward (Couldn't find anything to do it). So I tried to get the raw body through a express middleware and access it like above with await getRawBody.
function getRawBody(req, res, next) {
req.getRawBody = new Promise(resolve => {
var buf = '';
req.on('data', x => buf += x);
req.on('end', () => {
resolve(buf);
});
});
next();
}
server.express.use(getRawBody);
It passes the request to the other API but the files are no valid jpegs anymore. I found out, that the uploaded file is shifted some bits from the original file. What am I maybe doing wrong?
I found a solution here and adapted the function to get the raw body. Now the file contents are not shifted anymore on the target host.
const concatStream = require('concat-stream');
function getRawBody(req, res, next) {
req.getRawBody = new Promise(resolve => {
req.pipe(concatStream(function (data) {
resolve(data);
}));
});
next();
}
FIXED: USER storageEngine: "wiredTiger"
I use Mocha / Chai / Supertest and Mongodb-Memory-Server to test my app. But's I received error: Transaction numbers are only allowed on storage engines that support document-level locking
In real database and test by postman, it's working well.
My code:
In database.js
const mongoose = require('mongoose')
const { MongoMemoryReplSet } = require('mongodb-memory-server')
mongoose.set('useFindAndModify', false);
const connect = async () => {
try {
let url = process.env.MONGO_URL
let options = {
//Something
}
if (process.env.NODE_ENV === 'test') {
const replSet = new MongoMemoryReplSet();
await replSet.waitUntilRunning();
const uri = await replSet.getUri();
await mongoose.connect(uri, options)
//log connected
} else {
await mongoose.connect(url, options)
//log connected
}
} catch (error) {
//error
}
}
I have two model: Company and User. I made a function to add a member to company with used transaction. My code
const addMember = async (req, res, next) => {
const { companyId } = req.params
const { userId } = req.body
const session = await mongoose.startSession()
try {
await session.withTransaction(async () => {
const [company, user] = await Promise.all([
Company.findOneAndUpdate(
//Something
).session(session),
User.findByIdAndUpdate(
//Something
).session(session)
])
//Something if... else
return res.json({
message: `Add member successfully!`,
})
})
} catch (error) {
//error
}
}
Here's router:
router.post('/:companyId/add-member',
authentication.required,
company.addMember
)
Test file:
const expect = require('chai').expect
const request = require('supertest')
const app = require('../app')
describe('POST /company/:companyId/add-member', () => {
it('OK, add member', done => {
request(app).post(`/company/${companyIdEdited}/add-member`)
.set({ "x-access-token": signedUserTokenKey })
.send({userId: memberId})
.then(res => {
console.log(res.body)
expect(res.statusCode).to.equals(200)
done()
})
.catch((error) => done(error))
})
})
And i received error: Transaction numbers are only allowed on storage engines that support document-level locking'
How can I fix this?
Add retryWrites=false to your database uri. Example below:
mongodb://xx:xx#xyz.com:PORT,zz.com:33427/database-name?replicaSet=rs-xx&ssl=true&retryWrites=false
I'm using Node and the ws npm package to work with WebSockets. Got the listenKey as stated in the docs (below), but I'm unable to get my account info using User Data Stream. I'd prefer to use a stream to read my most current account info (balances, etc) since using the Rest API to do it incurs a penalty (WEIGHT: 5) each time.
I've tried doing ws.send('outboundAccountInfo') but no joy.
DOCS: https://github.com/binance-exchange/binance-official-api-docs/blob/master/user-data-stream.md
Full code example - does not return any data:
import request from 'request'
import WebSocket from 'ws'
import { API_KEY } from '../../assets/secrets'
const DATA_STREAM_ENDPOINT = 'wss://stream.binance.com:9443/ws'
const BINANCE_API_ROOT = 'https://api.binance.com'
const LISTEN_KEY_ENDPOINT = `${BINANCE_API_ROOT}/api/v1/userDataStream`
const fetchAccountWebsocketData = async() => {
const listenKey = await fetchListenKey()
console.log('-> ', listenKey) // valid key is returned
let ws
try {
ws = await openWebSocket(`${DATA_STREAM_ENDPOINT}/${listenKey}`)
} catch (err) {
throw(`ERROR - fetchAccountWebsocketData: ${err}`)
}
// Nothing returns from either
ws.on('message', data => console.log(data))
ws.on('outboundAccountInfo', accountData => console.log(accountData))
}
const openWebSocket = endpoint => {
const p = new Promise((resolve, reject) => {
const ws = new WebSocket(endpoint)
console.log('\n-->> New Account Websocket')
ws.on('open', () => {
console.log('\n-->> Websocket Account open...')
resolve(ws)
}, err => {
console.log('fetchAccountWebsocketData error:', err)
reject(err)
})
})
p.catch(err => console.log(`ERROR - fetchAccountWebsocketData: ${err}`))
return p
}
const fetchListenKey = () => {
const p = new Promise((resolve, reject) => {
const options = {
url: LISTEN_KEY_ENDPOINT,
headers: {'X-MBX-APIKEY': API_KEY}
}
request.post(options, (err, httpResponse, body) => {
if (err)
return reject(err)
resolve(JSON.parse(body).listenKey)
})
})
p.catch(err => console.log(`ERROR - fetchListenKey: ${err}`))
return p
}
export default fetchAccountWebsocketData
Was stuggling too .... for hours !!!
https://www.reddit.com/r/BinanceExchange/comments/a902cq/user_data_streams_has_anyone_used_it_successfully/
The binance user data stream doesn't return anything when you connect
to it, only when something changes in your account. Try running your
code, then go to binance and place an order in the book, you should
see some data show up*