I am trying javascript for the first time and I am having this trouble with the example:
https://www.twilio.com/blog/web-scraping-and-parsing-html-with-node-js-and-cheerio
It is a web scrapper example that uses got and cheerio, both of which I have installed. But when i run the sample code it gives me 'cannot convert undefined or null to object error.
Why is that? I didn't change anything from the example at all.
the code in question:
const $ = cheerio.load(response.body);
$('a').each((i, link) => {
const href = link.attribs.href;
console.log(href);
});
}).catch(err => {
console.log(err);
});
How does your index.js file look like? I did the tutorial and my code is working. Maybe you are miss typed the url?
Here is my index.js
const fs = require("fs");
const cheerio = require("cheerio");
const got = require("got");
const vgmUrl = "https://www.vgmusic.com/music/console/nintendo/nes";
got(vgmUrl)
.then((response) => {
const $ = cheerio.load(response.body);
$("a").each((i, link) => {
const href = link.attribs.href;
console.log(href);
});
})
.catch((err) => {
console.log(err);
});
Related
I've tried everything I can think of. I'm able to get postUrl, date, title, price and location. If you go to https://sandiego.craigslist.org/search/sss?query=surfboards and paste the code snippet below into the console it returns all the images. But when I try to access in my code it's returning undefined. Any help on this would be greatly appreciated!
$('#search-results > li').each((index, element) => {
console.log( $(element).children().find('img').attr('src') )
})
import axios from 'axios'
import request from 'request-promise'
import cheerio from 'cheerio'
import express from 'express'
import path from 'path'
const __dirname = path.resolve();
const PORT = process.env.PORT || 8000;
const app = express();
app.get('', (req, res) => {
res.sendFile(__dirname + '/views/index.html')
});
const surfboards = [];
axios("https://sandiego.craigslist.org/search/sss?query=surfboards")
.then(res => {
const htmlData = res.data;
const $ = cheerio.load(htmlData);
$('#search-results > li').each((index, element) => {
const postUrl = $(element).children('a').attr('href');
const date = $(element).children('.result-info').children('.result-date').text();
const title = $(element).children('.result-info').children('.result-heading').text().trim();
const price = $(element).children('.result-info').children('.result-meta').children('.result-price').text();
const location = $(element).children('.result-info').children('.result-meta').children(".result-hood").text().trim();
// Why is this not working?!?!?!?!?!
const img = $(element).children().find('img').attr('src');
surfboards.push({
title,
postUrl,
date,
price,
location,
img
})
})
return surfboards
}).catch(err => console.error(err))
app.get('/api/surfboards', (req, res) => {
const usedboards = surfboards
return res.status(200).json({
results: usedboards
})
})
// Make App listen
app.listen(PORT, () => console.log(`Server is listening to port ${PORT}`))
Looks like the page sets the images with JavaScript. Thus axios gets the HTML without actual links to images.
But there seems to be a workaround here. You can generate links to images by concatenate https://images.craigslist.org and data-ids value from parent a tag.
You can get the data-ids like this:
var data_ids = $(element).children('a').attr('data-ids')
then split it to array by comma, delete first two 3: symbols and concat it like this:
`${img_base_url}/${ids}_${resolution_and_extension}`
But if you need to get URL only for first image then there is no need to create new array each time. Use substring instead (note that sometimes li don't have image at all):
if (data_ids && data_ids.includes(',')) {
data_ids.substring(data_ids.indexOf('3:') + 2, data_ids.indexOf(','))
} else if (data_ids) {
data_ids.substring(data_ids.indexOf('3:') + 2, data_ids.length)
}
I'm trying to use stream-json to read a zip, unzip it, and then write it to file. I don't think I understand how to use the library.
Based on the link above, they have this example:
const {chain} = require('stream-chain');
const {parser} = require('stream-json');
const {pick} = require('stream-json/filters/Pick');
const {ignore} = require('stream-json/filters/Ignore');
const {streamValues} = require('stream-json/streamers/StreamValues');
const fs = require('fs');
const zlib = require('zlib');
const pipeline = chain([
fs.createReadStream('sample.json.gz'),
zlib.createGunzip(),
parser(),
pick({filter: 'data'}),
ignore({filter: /\b_meta\b/i}),
streamValues(),
data => {
const value = data.value;
// keep data only for the accounting department
return value && value.department === 'accounting' ? data : null;
}
]);
let counter = 0;
pipeline.on('data', () => ++counter);
pipeline.on('end', () =>
console.log(`The accounting department has ${counter} employees.`));
However I don't want to count anything, I just want to write to file. Here is what I have that works:
function unzipJson() {
const zipPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json.zip');
const jsonPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json');
console.info('Attempting to read zip');
return new Promise((resolve, reject) => {
let error = null;
Fs.readFile(zipPath, (err, data) => {
error = err;
if (!err) {
const zip = new JSZip();
zip.loadAsync(data).then((contents) => {
Object.keys(contents.files).forEach((filename) => {
console.info(`Writing ${filename} to disk...`);
zip.file(filename).async('nodebuffer').then((content) => {
Fs.writeFileSync(jsonPath, content);
}).catch((writeErr) => { error = writeErr; });
});
}).catch((zipErr) => { error = zipErr; });
resolve();
} else if (error) {
console.log(error);
reject(error);
}
});
});
}
However I can't easily add any processing to this, so I wanted to replace it with stream-json. This is my partial attempt, as I don't know how to finish:
function unzipJson() {
const zipPath = Path.resolve(__dirname, 'resources', 'myfile.json.zip');
const jsonPath = Path.resolve(__dirname, 'resources', 'myfile.json');
console.info('Attempting to read zip');
const pipeline = chain([
Fs.createReadStream(zipPath),
zlib.createGunzip(),
parser(),
Fs.createWriteStream(jsonPath),
]);
// use the chain, and save the result to a file
pipeline.on(/*what goes here?*/)
Later on I intend to add extra processing of the json file(s), but I want to learn the basics before I start throwing in extra functionality.
I can't produce a minimal example unfortunately, as I don't know what goes into the pipeline.on function. I'm trying to understand what I should do, not what I've done wrong.
I also looked at the related stream-chain, which has an example that ends like so:
// use the chain, and save the result to a file
dataSource.pipe(chain).pipe(fs.createWriteStream('output.txt.gz'));`
But at no point does the documentation explain where dataSource comes from, and I think my chain creates it's own by reading the zip from file?
How am I supposed to use these streaming libraries to write to file?
I don't want to count anything, I just want to write to file
In that case, you'll need to convert the token/JSON data stream back into a text stream that you can write to a file. You can use the library's Stringer for that. Its documentation also contains an example that seems to be more in line with what you want to do:
chain([
fs.createReadStream('data.json.gz'),
zlib.createGunzip(),
parser(),
pick({filter: 'data'}), // omit this if you don't want to do any processing
stringer(),
zlib.Gzip(), // omit this if you want to write an unzipped result
fs.createWriteStream('edited.json.gz')
]);
I am currently learning solidity and creating my first project. I am trying to test the deployment of my contract with truffle and i keep getting the below error
TypeError: artifacts.reqiure is not a function
Syntax looks correct and there are no errors appearing. I have also gone into the truffle console and the migration seems to have deployed ok and Color.json is now in my abis folder as well.
Any help will be appreciated, all files are below.
Color.sol
pragma solidity 0.5.0;
import "./ERC721Full.sol";
contract Color is ERC721Full {
// Initialise function
constructor () ERC721Full("Color", "COLOR") public {
}
}
Color.test.js
const Color = artifacts.reqiure('./Color.sol')
require('chai')
.use(require('chai-as-promised'))
.should()
contract('Color', (accounts) => {
let contract
before(async () => {
contract = await Color.deployed()
})
describe('deployment,', async() => {
it('deploys successfully', async() => {
contract = await Color.deployed()
const address = contract.address
console.log(address)
assert.notEqual(address,"")
assert.notEqual(address, 0x0)
assert.notEqual(address, null)
assert.notEqual(address, undefined)
})
it('has a name', async () => {
const name = await contract.name()
assert.equal(name, 'Color')
})
it('has a symbol', async () => {
const symbol = await contract.symbol()
assert.equal(symbol, 'COLOR')
})
})
})
2_deploy_contracts.js
const Color = artifacts.require("Color");
module.exports = function(deployer) {
deployer.deploy(Color);
};
1_init_migration.js
const Migrations = artifacts.require("Migrations");
module.exports = function(deployer) {
deployer.deploy(Migrations);
};
Make sure you have
require('#nomiclabs/hardhat-truffle5');
Before you're trying to call artifacts.require
You have a typo in Color.test.js
const Color = artifacts.reqiure('./Color.sol')
should be require
I tried out this particular code and it shows out an error message and after looking into the line it is just a small spelling error in this particular line in the color code: -
const Color = artifacts.reqiure('./Color.sol')
Try replacing it with this: -
const Color = artifacts.require('./Color.sol')
I have one scenario where I have to verify the downloaded text file's data against an API response.
Below is the code that I have tried.
Test:
const path = require('path')
const downloadsFolder = Cypress.config('downloadsFolder')
cy.task('deleteFolder', downloadsFolder)
const downloadedFilename = path.join(downloadsFolder, 'ABCDEF.txt')//'*.txt'
....
cy.get('#portmemo').its('response.body')
.then((response) => {
var json = JSON.parse(response);
const resCon = json[0].content.replaceAll(/[\n\r]/g, '');
cy.readFile(downloadedFilename).then((fc) => {
const formatedfc = fc.replaceAll(/[\n\r]/g, '');
cy.wrap(formatedfc).should('contains', resCon)
})
})
Task in /cypress/plugins/index.js
const { rmdir } = require('fs')
module.exports = (on, config) => {
console.log("cucumber started")
on('task', {
deleteFolder(folderName) {
return new Promise((resolve, reject) => {
rmdir(folderName, { maxRetries: 5, recursive: true }, (err) => {
if (err) {
console.error(err);
return reject(err)
}
resolve(null)
})
})
},
})
When I have the downloadedFilename as 'ABCDEF.txt', it works fine [I have hard coded here]. But I need some help to get the (dynamic) file name as it changes every time [eg.: AUADLFA.txt, CIABJPT.txt, SVACJTM.txt, PKPQ1TM.txt & etc.,].
I tried to use '.text' but I get 'Timed out retrying after 4000ms: cy.readFile("C:\Repositories\xyz-testautomation\cypress\downloads/.txt") failed because the file does not exist error.
I referred to this doc as well but no luck yet.
What is the right way to use regex to achieve the same? Also wondering is there a way to get the recently downloaded file name?
You can make use of the task shown in this question How can I verify if the downloaded file contains name that is dynamic
/cypress/plugins/index.js
const fs = require('fs');
on('task', {
downloads: (downloadspath) => {
return fs.readdirSync(downloadspath)
}
})
This returns a list of the files in the downloads folder.
Ideally you'd make it easy on yourself, and set the trashAssetsBeforeRuns configuration. That way, the array will only contain the one file and there's no need to compare arrays before and after the download.
(Just noticed you have a task for it).
I'm trying to use a "lightweight offline database", which stores data in .csv files. Documentation to the module: https://www.npmjs.com/package/csv-db
The module seems not to work for me, tried everything. my actual code is:
index.js:
const CsvDb = require('csv-db');
const db = new CsvDb("test_db.csv");
(async () => {
const db = await db.get();
await db.insert({
id: 3,
title: "Obj3Title."
})
.then((data) => console.log(data), (err) => console.log(err));
})();
test_db.csv: (lies in same directory as index.js)
id;title;
1;Obj1Title;
2;Obj2Title;
So i get this error:
UnhandledPromiseRejectionWarning: ReferenceError: db is not defined
i also tried the following as path in "new CsvDb(...)" getting the same error:
const db = new CsvDb(__dirname + "\\test_db.csv");
const db = new CsvDb("./test_db.csv");
thanks in advance for any help!
Here's what you are doing wrong.
You are re-assigning value to the constant "db".
Here's what you should be doing:
const CsvDb = require('csv-db');
const csvDbInstance = new CsvDb("test_db.csv");
(async () => {
const db = await csvDbInstance.get();
await db.insert({
id: 3,
title: "Obj3Title."
})
.then((data) => console.log(data), (err) => console.log(err));
})();
Hope this helps.