I am making a handler library named handle_http.js:
module.exports.redirectHttpToHttps = (db,req,res)=>{
const sql = "SELECT * from redirect where use_in_http = 1 and exact_match = 1 and url_from = ? and exact_match=1 LIMIT 1";
// redirection logic
}
And I made a http server where consumes the library:
const http = require('node:http');
// A simple database connection generator
const db = require('./db.js');
const handler = require('./handle_http.js');
http.createServer((req,res){
handler.redirectHttpToHttps(db,req,res);
});
http.listen(80);
But before running into an actual code, I want to make some unit tests using jest (test_redirect.jest.js):
const db = require('../src/db.js');
const redirect = require('../src/handle_http.js');
test("redirect to https",()=>{
const dbHandler = db(':memory:');
database.exec(`
INSERT INTO redirect (url_from,url_to,method,http_status_code,use_in_http,exact_match) VALUES
('http://google.com/mytest','http://yahoo.com','GET',301,1,1),
('http://google.com/mytest2','http://yandex.com','GET',302,1,0),
('http://google.com?q=ikariam','http://yandex.com','GET',302,1,1),
('http://example.com/products','https://fakestoreapi.com/products','POST',308,1,1),
('http://example.net/products','https://fakestoreapi.com/products','POST',308,1,0),
('http://example.net','https://fakestoreapi.com/products','POST',308,1,0);
`,function(error){ err_callback(error); });
// need to make fake request so I can call the `redirectHttpToHttps`
redirect.redirectHttpToHttps(db,/*mocked_request*/,/*some way to assert the response*/)
});
As you can see, I am able to populate an in-memory database with fake data, but I do not know how:
How I can make a fake an incoming http request.
How I can assert that http response has appropriate status code and headers
The provided example does not cut in my case because I need to test the http handling logic in my own http server written in nodejs.
An approach is to use the supertest and create an http server on the fly:
const http = require('node:http');
const request = require('supertest');
const db = require('../src/db.js');
const redirect = require('../src/handle_http.js');
test("redirect to https",(done)=>{
const dbHandler = db(':memory:');
database.exec(`
INSERT INTO redirect (url_from,url_to,method,http_status_code,use_in_http,exact_match) VALUES
('http://google.com/mytest','http://yahoo.com','GET',301,1,1),
('http://google.com/mytest2','http://yandex.com','GET',302,1,0),
('http://google.com?q=ikariam','http://yandex.com','GET',302,1,1),
('http://example.com/products','https://fakestoreapi.com/products','POST',308,1,1),
('http://example.net/products','https://fakestoreapi.com/products','POST',308,1,0),
('http://example.net','https://fakestoreapi.com/products','POST',308,1,0);
`,function(error){ done(error); });
const server = http.createServer((req,res)=>{
redirect.redirectHttpToHttps(dbHandler,req,res)
});
request(server)
.get('/mytest')
.set('Host','google.com')
.expect(301,done);
});
Pay attention into the lines:
request(server)
.get('/mytest')
.set('Host','google.com')
.expect(301,done);
Using request function comming from supertest I provide a server instance that does not listen to any port:
const server = http.createServer((req,res)=>{
redirect.redirectHttpToHttps(dbHandler,req,res)
});
During testing, you can avoid the https at all and create pure non-ssl servers that call the http handling function you want to perform.
Miscelanous
Also, your code has an error at section:
database.exec(`
INSERT INTO redirect (url_from,url_to,method,http_status_code,use_in_http,exact_match) VALUES
('http://google.com/mytest','http://yahoo.com','GET',301,1,1),
('http://google.com/mytest2','http://yandex.com','GET',302,1,0),
('http://google.com?q=ikariam','http://yandex.com','GET',302,1,1),
('http://example.com/products','https://fakestoreapi.com/products','POST',308,1,1),
('http://example.net/products','https://fakestoreapi.com/products','POST',308,1,0),
('http://example.net','https://fakestoreapi.com/products','POST',308,1,0);
`,function(error){ err_callback(error); });
Function err_callback is not defined. Therfore I used the jest's done function as defined into documentation
So the refactored part of the test is:
database.exec(`
INSERT INTO redirect (url_from,url_to,method,http_status_code,use_in_http,exact_match) VALUES
('http://google.com/mytest','http://yahoo.com','GET',301,1,1),
('http://google.com/mytest2','http://yandex.com','GET',302,1,0),
('http://google.com?q=ikariam','http://yandex.com','GET',302,1,1),
('http://example.com/products','https://fakestoreapi.com/products','POST',308,1,1),
('http://example.net/products','https://fakestoreapi.com/products','POST',308,1,0),
('http://example.net','https://fakestoreapi.com/products','POST',308,1,0);
`,function(error){ done(error); });
I am trying to scrape a classified ad search result page.
I have tried console logging everything I can to make sure I am getting a response, which I am, but when I actually use cheerio to query something I don't get anything back. For instance if I just query for how many children using $('.listing-group').children('section').length I get back 0 instead of 24 when I console log it.
Here is what I'm doing. Pardon the long URL.
const request = require("request");
const cheerio = require("cheerio");
const app = express();
app.get("/scrape", function(req, res) {
url =
"http://classifieds.ksl.com/search/?keyword=code&category%5B%5D=Books+and+Media&zip=&miles=25&priceFrom=&priceTo=&city=&state=&sort=";
request(url, function(error, response, html) {
if (!error) {
let $ = cheerio.load(html);
let test = $("#search-results").find("section").length
console.log(test);
} else {
console.log("there has been an error");
}
res.send("Check the console.");
});
});
app.listen("8081");
console.log("check localhost:8081/scrape");
exports = module.exports = app;
I'm new to cheerio so I'm assuming I'm probably making a simple error, but with all the tutorials I've checked, I can't seem to find an answer.
Ultimately I want to grab each result on the page (found in the 'section' tags) to display the data for a personal project I'm working on.
It looks like:
JSON.parse(html.match(/listings: (\[.*\])/)[1])
There is the error i am facing when i am sending the input to the child-process written in python.
When i send the data for first time it gives output but with the second input i send me error. Hint that the pipe to covers is ended just after the first output i receive.
Can you help me.
here is the node code.
var bodyParse = require('body-parser');
var urlencodedParser = bodyParse.urlencoded({extended: false});
var spawn = require('child_process').spawn
var py = spawn('python', ['dialogue_management_model.py'])
module.exports = function(app) {
app.get('/', function(req, res) {
res.render('index');
});
app.post('/', urlencodedParser, function(req, res) {
var typed = (JSON.stringify(req.body).substring(2, JSON.stringify(req.body).indexOf(":") - 1));
console.log(typed)
module.exports.typed = typed
var data = typed;
dataString = '';
// Handling the Input data from the Front End With the Post request.
// taking computed/operated data from the python file
py.stdout.on('data', function(data){
dataString += data.toString();
});
// Simply logging it to the Console
py.stdout.on('end', function(){
console.log(dataString);
res.send(dataString);
});
// python doesn't understand the data without string format
py.stdin.write(data);
py.stdin.end();
})
}
Just Server is started in other file and is passing full control to here and from here i am calling python code to take input do computation and pass me the result.
You are ending the input stream entirely after the first call. Move var py = spawn('python', ['dialogue_management_model.py']) into the post request handler, so each request will spawn a child process, write the data, end the input stream, wait for a response, and return a result when the output stream ends.
This gives you the added benefit of making it more thread safe. That is to say, if you have two requests come in at the same time, both will end up adding listeners for py.stdout.on('data', ..., resulting in both getting a mixture of output. Also I'm fairly certain py.stdout.on('end', would only trigger for once, so any request that comes in after the stdout.end callback runs from the first request(s) would hang until they timeout.
Also, unrelated to your question, but when you do:
var typed = (JSON.stringify(req.body).substring(2, JSON.stringify(req.body).indexOf(":") - 1))
you should assign the JSON.stringify() to a variable so you don't have to run it twice.
ie. var payload = JSON.stringify(req.body); var typed = (payload.substring(2, payload.indexOf(":") - 1))
But even then, if you just need the first key you can do Object.keys(req.body)[0] rather than converting the object to a string and parsing the string.