Node.js Cheerio Live Update/Instant Refresh page on every HTML body change - javascript

I am using Axios and cheerio to scrape the cricket score website and convert its data to JSON format.
But the problem is that scores and other information are not updating instantly on my API response.
I want to have kind of useEffect (I know, it's React Hook and don't work in Express) functionality in my express server so that whenever the score changes on the main website, my server re-scrapes the page and show updated data.
axios(link).then(response => {
const html = response.data
const $ = cheerio.load(html)
const score = []
$('.scorecard-container', html).each(function(){
const title = $(this).text()
const url = link + $(this).find('a').attr('href')
score.push({
id: score.length + 1,
title,
url
})
})
res.json(score)
}).catch(err => {res.send('Something went wrong'); console.log(err)})
Thanks in Advance :)

example express endpoint
get("/stats", async (req, res) => {
let { data } = await axios.get(someUrl)
let $ = cheerio.load(data)
res.json({
title: $('title').text()
})
})
in react
useEffect(() => {
fetch("/stats").then(r=> r.json().then(data => setStats(data)))
}, [])

Related

Looking for a solution to stream dynamic changes in JSON data on to browser without refreshing

I have a node.js / next.js api built that essentially does a bunch of stuff after the user submits text into a form on the front end. One of the things it does is write stage completion messages periodically to a JSON file to signify the completion of certain stages.
my api looks something like this
import dbConnect from '../../../lib/dbConnect'
import Demo from '../../../models/Demo'
import fs from 'fs'
import shell from 'shelljs';
export default async function handler(req, res) {
const {
method,
body,
} = req
await dbConnect()
switch (method) {
case 'GET':
try {
const demos = await Demo.find({})
res.status(200).json({ success: true, data: demos })
} catch (error) {
res.status(400).json({ success: false })
}
break
case 'POST':
try {
const initialjson = '[]'
const timestamp = Date.now();
// stage 1
if (shell.exec('./initial_checks.sh').code !== 0) {
shell.echo('Sorry stage failed');
shell.exit(1);
};
const objSuccess1 = JSON.parse(initialjson);
objSuccess1.push("Stage 1 complete", + timestamp);
const finalJSONSuccess1 = JSON.stringify(objSuccess1);
fs.writeFileSync('success-stage.json', finalJSONSuccess1);
// stage 2
if (shell.exec('./secondary_checks.sh').code !== 0) {
shell.echo('Sorry stage failed');
shell.exit(1);
};
const objSuccess2 = JSON.parse(initialjson);
objSuccess2.push("Stage 2 complete", + timestamp);
const finalJSONSuccess2 = JSON.stringify(objSuccess2);
fs.writeFileSync('success-stage.json', finalJSONSuccess2);
const demo = await Demo.create(
req.body
)
res.status(201).json({ success: true, data: demo })
} catch (error) {
res.status(400).json({ success: false })
}
break
default:
res.status(400).json({ success: false })
break
}
}
I am using socket.io, my server.js file is
server.js
const app = require("express")();
const server = require("http").Server(app);
const io = require("socket.io")(server);
const next = require("next");
const dev = process.env.NODE_ENV !== "production";
const nextApp = next({ dev });
const nextHandler = nextApp.getRequestHandler();
let port = 3000;
const fs = require('fs')
const data = fs.readFileSync('success-stage.json', 'utf8')
io.on("connect", (socket) => {
socket.emit("now", {
message: data
});
});
nextApp.prepare().then(() => {
app.all("*", (req, res) => {
return nextHandler(req, res);
});
server.listen(port, (err) => {
if (err) throw err;
console.log("> Ready on port: " + port);
});
});
and here is the pages/index.js file
import { useEffect, useRef, useState } from "react";
import io from "socket.io-client";
export default function IndexPage() {
const socket = useRef();
const [hello, setHello] = useState();
useEffect(() => {
socket.current = io();
socket.current.on("now", (data) => {
setHello(data.message);
});
}, []);
return <h1>{hello}</h1>;
}
so at this point we are seeing the 2nd message from my JSON file match what is rendered on the frontend when I build my application. It looks like this
["Stage 2 complete",1664289144513]
I am wondering how I can stream this data onto the front end for clients without having to refresh the page? I need it to show the current stage's success message... There are 5 total stages, so i guess i am looking for a way to either stream data or maybe to revalidate the browser window like every second without having to refresh... is this possible?
Any help would be greatly appreciated... Thanks in advance for your time everyone...
You've already got a solution implemented that can handle this. What you're describing is exactly what sockets are for -- bidirectional communication between the client and server without refreshing the page.
Just create a new socket listener on the frontend for a new topic, maybe "stageStatus", and then emit messages to that topic on the backend at various stages in the process. That's it!

Need help scraping image from craigslist

I've tried everything I can think of. I'm able to get postUrl, date, title, price and location. If you go to https://sandiego.craigslist.org/search/sss?query=surfboards and paste the code snippet below into the console it returns all the images. But when I try to access in my code it's returning undefined. Any help on this would be greatly appreciated!
$('#search-results > li').each((index, element) => {
console.log( $(element).children().find('img').attr('src') )
})
import axios from 'axios'
import request from 'request-promise'
import cheerio from 'cheerio'
import express from 'express'
import path from 'path'
const __dirname = path.resolve();
const PORT = process.env.PORT || 8000;
const app = express();
app.get('', (req, res) => {
res.sendFile(__dirname + '/views/index.html')
});
const surfboards = [];
axios("https://sandiego.craigslist.org/search/sss?query=surfboards")
.then(res => {
const htmlData = res.data;
const $ = cheerio.load(htmlData);
$('#search-results > li').each((index, element) => {
const postUrl = $(element).children('a').attr('href');
const date = $(element).children('.result-info').children('.result-date').text();
const title = $(element).children('.result-info').children('.result-heading').text().trim();
const price = $(element).children('.result-info').children('.result-meta').children('.result-price').text();
const location = $(element).children('.result-info').children('.result-meta').children(".result-hood").text().trim();
// Why is this not working?!?!?!?!?!
const img = $(element).children().find('img').attr('src');
surfboards.push({
title,
postUrl,
date,
price,
location,
img
})
})
return surfboards
}).catch(err => console.error(err))
app.get('/api/surfboards', (req, res) => {
const usedboards = surfboards
return res.status(200).json({
results: usedboards
})
})
// Make App listen
app.listen(PORT, () => console.log(`Server is listening to port ${PORT}`))
Looks like the page sets the images with JavaScript. Thus axios gets the HTML without actual links to images.
But there seems to be a workaround here. You can generate links to images by concatenate https://images.craigslist.org and data-ids value from parent a tag.
You can get the data-ids like this:
var data_ids = $(element).children('a').attr('data-ids')
then split it to array by comma, delete first two 3: symbols and concat it like this:
`${img_base_url}/${ids}_${resolution_and_extension}`
But if you need to get URL only for first image then there is no need to create new array each time. Use substring instead (note that sometimes li don't have image at all):
if (data_ids && data_ids.includes(',')) {
data_ids.substring(data_ids.indexOf('3:') + 2, data_ids.indexOf(','))
} else if (data_ids) {
data_ids.substring(data_ids.indexOf('3:') + 2, data_ids.length)
}

Node/Express: Use a function request from another file

Firstly, I'm a frontend developer so I'm sorry if I use wrong terms in my explanations.
In my company, we are actually building 1 web app and 2 API apps. So the users use the web app which talks to the first API which talks to the second API.
Here, we are in the first API, in the server.js file:
server.js
---------
var app = express();
const cats = require("./api/cats");
app.get("/animals/cats", cats.listTheCats); // listTheCats() returns an array of cats
In cats.js, we can see with listTheCats() we are sending another request to the second API:
cats.js
-------
const listTheCats = (req, res) => {
axios({
method: "get",
url: "http://second-api-url.com/animals/cats",
params: req.query,
})
.then((ans) => {
res.status(ans.data.status).json(ans.data.data);
})
.catch((err) => {
console.log(err);
res.sendStatus(503);
});
};
module.exports = listTheCats;
The code above works fine on the web app. But now, in the first api, in another file called "cuteAnimals.js" I need to call listTheCats(). So I tried to do this but it doesn't work:
cuteAnimals.js
--------------
const { listTheCats } = require("./cats");
const fetchCats = async () => {
const params = {
type: "cute",
};
const cuteCats = await axios.get(`animals/cats`, {
params,
});
// or const cuteCats = await listTheCats(params);
console.log("cuteCats", cuteCats);
};
fetchCats();
This is the error: "Request failed with status code 400"
In cuteAnimals.js, is it right to use axios from a file to another file of the same server project?
You need to export the function in order to use it in another file, you can do it simply by writing this line at the end of cats.js
module.exports = listTheCats

How do I call two different REST api endpoints simultaneously and display the data from both on one endpoint of my app?

I am building a simple application for my portfolio using The Movie Database api. In my GET /movie route, I want to get and display the data about the movie, and the names and photos of the cast members, however the data for the movie and the data for the cast members belong two separate endpoints of the api, and I am at a complete loss as to how to access both response data sets under a single endpoint in my app.
I cannot call axios.get() on both endpoints under the /movie route because I will get a "Headers already sent" error, and I have tried to write a function that uses axios.get for 1 endpoint that returns the response and gets called in my GET /movie route, but that causes the entire GET route to return undefined.
Here is my current code for my /movie route that is incorrect, but closely conveys what I am trying to accomplish
const express = require('express');
const router = express.Router();
const axios = require('axios');
const api_key = require('../config/keys').api_key;
const imgURL = "http://image.tmdb.org/t/p/";
const dateFormat = require('../config/dateFormat');
getActors = movie_id => {
axios.get(`https://api.themoviedb.org/3/movie/${movie_id}/credits?api_key=${api_key}&language=en-US`)
.then(res => {
return res.data;
}).catch(err => console.log(err.message));
}
router.get('/:id', (req, res) => {
const id = req.params.id
axios.get(`https://api.themoviedb.org/3/movie/${id}?api_key=${api_key}&language=en-US`)
.then(res => {
const movieData = res.data;
const actors = getActors(id); //calling above function here, but returns undefined and hangs the application
res.render('movie', {movieInfo: movieData, imgURL: imgURL, releaseDate: dateFormat(movieData.release_date), actors: actors})
})
.catch(err => console.log(err.status_message))
});
module.exports = router;
any help is greatly appreciated.
You can use axios.all to concatenate several promises and executing them in parallel. Then, once all the added requests have been finished, you can handle with the then promise the result of all of them. For instance, in your code:
const express = require('express');
const router = express.Router();
const axios = require('axios');
const api_key = require('../config/keys').api_key;
const imgURL = "http://image.tmdb.org/t/p/";
const dateFormat = require('../config/dateFormat');
axios.all([
axios.get(`https://api.themoviedb.org/3/movie/${movie_id}/credits?api_key=${api_key}&language=en-US`),
axios.get(`https://api.themoviedb.org/3/movie/${id}?api_key=${api_key}&language=en-US`)
])
.then(axios.spread((actorsRes, moviesRes) => {
// Your logic with each response
});
module.exports = router;
I see that getActors is currently returning null. Change it to...
const getActors = movie_id => {
return axios.get(`https://api.themoviedb.org/3/movie/${movie_id}/credits?api_key=${api_key}&language=en-US`)
.then(res => {
return res.data;
}).catch(err => console.log(err.message));
}
Another problem is calling of getActors function. It's a function that contains asynchronous function axios.get(). Change that to ..
router.get('/:id', (req, res) => {
let movieData;
const id = req.params.id
axios.get(`https://api.themoviedb.org/3/movie/${id}?api_key=${api_key}&language=en-US`)
.then(res => {
movieData = res.data;
return getActors(id);
})
.then(actors => {
res.render('movie', {movieInfo: movieData, imgURL: imgURL, releaseDate: dateFormat(movieData.release_date), actors: actors})
})
.catch(err => console.log(err.status_message))
});

Log visitors on Github page?

I have a lil site on github pages and I was wondering if there was a way to see when people(I think 1-3) visit the page?
It's not enough for the traffic tab on github stats to show anything and since it's on github pages I can't write to file or database.
Any way at all? Use javascript to send tweets on an account made for logging visitor(s), writing to database even though people say that's a bad idea.. Just any way at all?
Maybe this would help. I use this to count visitors on my personal portfolio which is hosted on GitHub pages and it's pretty much effective.
const KEY = `YOUR_KEY`;
const NAMESPACE = "YOURDOMAIN.COM";
const COUNT_URL = `https://api.countapi.xyz`;
const counter = document.getElementById("visit-count");
const getCount = async () => {
const response = await fetch(`${COUNT_URL}/get/${NAMESPACE}/${KEY}`);
const data = await response.json();
setValue(data.value);
};
const incrementCount = async () => {
const response = await fetch(`${COUNT_URL}/hit/${NAMESPACE}/${KEY}`);
const data = await response.json();
setValue(data.value);
};
const setValue = (num) => {
counter.innerText = `Total Unique Visitor: ${num}`;
};
if (localStorage.getItem("hasVisited") == null) {
incrementCount()
.then(() => {
localStorage.setItem("hasVisited", "true");
})
.catch((err) => console.log(err));
} else {
getCount()
.catch((err) => console.log(err));
}

Categories

Resources