Scrape multiple websites using NodeJS, Express, Cherio and Axios - javascript

I would like to scrape multiple websites using NodeJS, Express, Cheerio and Axios.
I'm able now to scrape 1 website and display the information to the HTML.
But when I try to scrape multiple websites looking for the same element, it doesn't go through the forEach (stops after 1 cycle). Notice my loop which doesn't work correctly:
urls.forEach(url => {
2 files that are the most important:
index.js
const PORT = 8000
const axios = require('axios')
const cheerio = require('cheerio')
const express = require('express')
const app = express()
const cors = require('cors')
app.use(cors())
const urls = ['https://www.google.nl','https://www.google.de']
// const url = 'https://www.heineken.com/nl/nl/'
app.get('/', function(req, res){
res.json('Robin')
})
urls.forEach(url => {
app.get('/results', (req, res) => {
axios(url)
.then(response => {
const html = response.data
const $ = cheerio.load(html)
const articles = []
$('script', html).each(function(){
const link = $(this).get()[0].namespace
if (link !== undefined) {
if (link.indexOf('w3.org') > -1) {
articles.push({
link
})
}
}
})
res.json(articles)
}).catch(err => console.log(err))
})
})
app.listen(PORT, () => console.log('server running on PORT ${PORT}'))
App.js:
const root = document.querySelector('#root')
fetch('http://localhost:8000/results')
.then(response => {return response.json()})
.then(data => {
console.log(data)
data.forEach(article => {
const title = `<h3>` + article.link + `</h3>`
root.insertAdjacentHTML("beforeend", title)
})
})

You're registering multiple route handlers for the same route. Express will only route requests to the first one. Move your URL loop inside app.get("/results", ...)...
app.get("/results", async (req, res, next) => {
try {
res.json(
(
await Promise.all(
urls.map(async (url) => {
const { data } = await axios(url);
const $ = cheerio.load(data);
const articles = [];
$("script", html).each(function () {
const link = $(this).get()[0].namespace;
if (link !== undefined) {
if (link.indexOf("w3.org") > -1) {
articles.push({
link,
});
}
}
});
return articles;
})
)
).flat() // un-nest each array of articles
);
} catch (err) {
console.error(err);
next(err); // make sure Express responds with an error
}
});

Related

Fetch POST - send JSON to backend

Im stuck at something very trivial trying to build my frontend.
My main.js looks like this :
const textfield = document.getElementById('inpTextField')
const btnSendPost = document.getElementById('btnSendPost')
btnSendPost.addEventListener('click', () => {
sendData()
})
function sendData() {
let txtValue = textfield.value
console.log(txtValue)
let sendingJSON = {
"body" : `${txtValue}`
}
fetch("http://localhost:3000/post", {
method: "post",
body: JSON.stringify(sendingJSON),
headers: { "Content-Type": "application.json"}
})
.then(res => res.json())
.then(json => console.log(json))
.catch(err => console.log(err))
}
but my backend (express 4.18) doesnt register POST request, and console gives back enigmatic :
TypeError: Failed to fetch
at <anonymous>:1:876
at sendData (main.js:17:5)
at HTMLButtonElement.<anonymous> (main.js:7:5)
index.js (backend)
const express = require('express')
const app = express()
const port = 3000
const bp = require('body-parser')
let randomJson = {
"apple" : "cider viegar"
}
app.use(bp.json())
app.use(bp.urlencoded(({ extended: true})))
app.get('/',(req,res) => {
res.json(randomJson)
})
app.post('/post', (req, res) => {
var body = req.body
console.log(body)
res.send("ok")
})
app.listen(port, () => {
console.log(`Server running on ${port}`)
})
any ideas ?

Next.js and Express.js give CORS error, API queries only work at build time

I have a project where I use Next.js on the front-end and Express.js on the back.
Front-end side
The 'pages' file contains 'index.js'. In it, I am sending the following request.
import Home from "./home/home";
import axios from "axios";
import { useState } from "react";
export default function Index({ data }) {
const [products, setProducts] = useState(data);
const [start, setStart] = useState(1);
const getMoreProducts = async () => {
setStart(start + 1);
const { newProducts } = await axios.get(
`${process.env.NEXT_PUBLIC_BACK_END}/test`
);
setProducts([...products, ...newProducts]);
};
return (
<div>
<Home data={products} />
<button onClick={getMoreProducts}> Load more {start}</button>
</div>
);
}
export async function getServerSideProps(context) {
// Fetch data from external API
const { data } = await axios.get(
`${process.env.NEXT_PUBLIC_BACK_END}/productlist/pagination`,
{
params: {
page: 1,
limit: 5,
},
}
);
return {
props: {
data: data || {},
},
};
// Pass data to the page via props
}
Back-end side
const express = require("express");
var cors = require('cors')
const app = express();
require("dotenv").config();
const mongoose = require("mongoose");
mongoose.connect(
"*********",
{ useNewUrlParser: true }
);
const db = mongoose.connection;
db.on("error", (err) => console.error(err));
db.once("open", () => console.log("Connected to Database "));
app.use(express.json());
const productlistRouter = require("./routes/productlist");
const test = require("./routes/test");
app.use("/productlist", productlistRouter);
app.use("/test", test);
app.use(cors())
app.listen(3000, () => console.log("Server is running"));
And here is my Route code :
const express = require("express");
const router = express.Router();
const Product = require("../models/product");
const cors = require("cors");
const corsOptions = {
headers: [
{ key: "Access-Control-Allow-Credentials", value: "true" },
{ key: "Access-Control-Allow-Origin", value: "*" },
// ...
],
origin: "*",
optionsSuccessStatus: 200, // some legacy browsers (IE11, various SmartTVs) choke on 204
};
router.get("/showall", async (req, res) => {
try {
const product = await Product.find();
res.json(product);
} catch (err) {
res.status(500).json({ message: err.message });
}
});
router.get("/pagination", cors(corsOptions), async (req, res) => {
const page = req.query.page;
const limit = req.query.limit;
const startIndex = (page - 1) * limit;
const endIndex = page * limit;
const products = await Product.find();
const result = products.slice(startIndex, endIndex);
res.json(result);
});
So, When the page is first built with Next.js, the api works, but when I click the 'Load more' button, it gives a CORS error. When I use the same query with 'postman' and other tools, it does not give any error.
On the Next.js side, it works when I send a query to another 3rd party API., but it doesn't work when I send it to my own back-end. And no matter what page or component I do this in, only the APIs that are created at the build time are working.
What could be the reason for this? and how can i solve it? I've read and searched a few articles about cors, but I still haven't found a solution for days.
CORS should be placed on top level as javascript is executed one by one line. Place app.use(cors()) just above the line of app.use("/productlist", productlistRouter);

Axios NPM not fetching data express.js

I have a express.js route whose code is below: -
const path = require("path");
const express = require("express");
const hbs = require("hbs");
const weather = require("./weather");
const app = express();
app.get("/weather", (req, res) => {
if (!req.query.city) {
return res.send({
error: "City Not Found",
});
}
res.send({
currentTemp: weather.temp,
});
});
And I also have a file to fetch data from api using axios whose code is here
const axios = require("axios");
axios
.get(
"https://api.openweathermap.org/data/2.5/weather?q=samalkha&appid=91645b79f9eac8808153c90472150f2d"
)
.then(function (response) {
module.exports = {
temp: response.data.main.temp
}
})
.catch(function (error) {
console.log("Error Spotter");
});
As I am using res.send I should get a json with currentTemp and the value of current temp should be temperature that I will get from weather.js file but I am getting a blank json array.
Try this.
You'll get the temperature in the localhost:3000
If you want to render the data for EJS (or something) instead of .then((data) => res.json(data.main.temp)) use:
.then((data) => res.render("index", { weather: data })
--
const URL =
"https://api.openweathermap.org/data/2.5/weather?q=samalkha&appid=91645b79f9eac8808153c90472150f2d"
const express = require("express")
const axios = require("axios")
const app = express()
const PORT = 3000
app.get("/", (req, res) => {
axios
.get(URL)
.then((response) => response.data)
.then((data) => res.json(data.main.temp))
.catch((err) => console.log(err))
})
app.listen(PORT, () => {
console.log(`Listening at http://localhost:${PORT}`)
})
module.exports is processed when the module is being defined. when you import weather, it does not exists therefore you get no data.
try to export a function which does the request and add a callback function as argument so you can pass the request result to it.
Change your weather.js to the following example:
const axios = require("axios");
let temperature;
async function getTemp() {
await axios
.get("https://api.openweathermap.org/data/2.5/weather?q=samalkha&appid=91645b79f9eac8808153c90472150f2d")
.then(function (response) {
temperature = response.data.main.temp
})
.catch(function (error) {
console.log("Error Spotter");
});
}
module.exports = {
temp: getTemp
}
This will actually return the fetched temperature.

Network Error when connecting React frontend to

The app is using next.js connected to express, which is in turn connected to AWS MySql.
I'm trying to initially load some products I have stored on my database. However, i get the following network error:-
TypeError: NetworkError when attempting to fetch resource.
I troubleshooted the frontend code by using an external api and that works fine, so it's something to do with the express middleware.
See code excerpts below...
index.js
import '../scss/style.scss';
import NavBar from '../components/Navbar/Navbar';
import fetch from 'isomorphic-unfetch';
const Index = (props) => (
<div>
<NavBar />
<h1>See our products below yeah</h1>
</div>
)
Index.getInitialProps = async function() {
const res = await fetch('localhost:3000/');
const data = await res.json();
return {
posts: data
}
}
export default Index;
server.js
const express = require('express');
const next = require('next');
const port = process.env.PORT || 3000;
const dev = process.env.NODE_ENV !== 'production';
const app = next({ dev });
const handle = app.getRequestHandler();
const db = require('../lib/db')
const escape = require('sql-template-strings')
app.prepare().then(() => {
const server = express()
server.get('/', (req, res, next) => {
let sql = 'SELECT * FROM products'
db.query(sql, [], (err, rows) => {
if (err) {
throw err;
} else {
return res.json({
data: rows
})
}
})
})
...
db.js
const mysql = require('serverless-mysql')
const db = mysql({
config: {
host: "XXX",
database: "XXX",
user: "XXX",
password: "XXX"
}
})
exports.query = async query => {
try {
const results = await db.query(query)
await db.end()
return results
} catch (error) {
return { error }
}
}
whenever you have api request from a different server you have to setup CORS on both frontend and backend
Link: https://expressjs.com/en/resources/middleware/cors.html

How to download a video file into browser

I made the routes below in express to download a video from youtube, but the video file goes into my project folder and it is not downloaded into the browser. It's like it works only in localhost.
This is my project folder:
node_modules
src
-app.js
static
-index.html
-styles.css
package.json
'THE FILE DOWNLOADED'
This is the code i made
const express = require('express')
const router = express.Router()
const fs = require('fs')
const ytdl = require('ytdl-core')
const youtube = require('simple-youtube-api')
router.post('/downloadByUrl', async (req, res) => {
let url = req.body.url
await downloadVideo(url)
return res.redirect('/')
})
router.post('/downloadByName', async (req, res) => {
let videoName = req.body.videoName
let youtubeClient = new youtube('MY YOUTUBE API KEY')
await youtubeClient.searchVideos(videoName, 1)
.then(results => {
downloadVideo('https://youtu.be/' + results[0].id)
})
.catch(console.log)
return res.redirect('/')
})
function downloadVideo(URL) {
let videoReadableStream = ytdl(URL)
ytdl.getInfo(URL, (err, info) => {
let title = info.title.replace('|', '').toString('ascii')
let stream = videoReadableStream.pipe(fs.createWriteStream(title + '.mp4'))
stream.on('finish', () => {
console.log('Finished')
})
})
}
module.exports = router
I want to host my website so i want to make possible the download of the file into the 'download' section of the browser.
How can I do that?

Categories

Resources