I have created a handlebars template and compiled it with my custom data. Everything else works fine just the image is not being displayed in the template.
I have done the following:
Made the images folder static using express
app.use(express.static("assets"));
used the image in the template
<img src="sign.png"/>
Here is my code where I'm using the template
const compile = async function (templateName, data) {
const filePath = path.join(process.cwd(), 'Template', `${templateName}.hbs`);
const html = await fs.readFile(filePath, 'utf-8');
return hbs.compile(html)(data);
}
const browser = await puppeteer.launch();
const page = await browser.newPage();
const content = await compile('Passes', jsonData);
await page.setContent(content);
await page.emulateMediaType('screen');
await page.pdf({
path: 'PASSES/' + filenameWE + '.pdf',
format: 'A4',
printBackground: true
});
console.log("Done ---- " + filenameWE);
await browser.close();
Again, I have tried the following:
Made the images folder static using express
used the image in the template
Related
I am trying to download a website as static, I mean without JS, only HTML & CSS.
I've tried many approaches yet some issues still present regarding CSS and Images.
A snippet
const puppeteer = require('puppeteer');
const {URL} = require('url');
const fse = require('fs-extra');
const path = require('path');
(async (urlToFetch) => {
const browser = await puppeteer.launch({
headless: true,
slowMo: 100
});
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on("request", request => {
if (request.resourceType() === "script") {
request.abort()
} else {
request.continue()
}
})
page.on('response', async (response) => {
const url = new URL(response.url());
let filePath = path.resolve(`./output${url.pathname}`);
if(path.extname(url.pathname).trim() === '') {
filePath = `${filePath}/index.html`;
}
await fse.outputFile(filePath, await response.buffer());
console.log(`File ${filePath} is written successfully`);
});
await page.goto(urlToFetch, {
waitUntil: 'networkidle2'
})
setTimeout(async () => {
await browser.close();
}, 60000 * 4)
})('https://stackoverflow.com/');
I've tried using
content = await page.content();
fs.writeFileSync('index.html', content, { encoding: 'utf-8' });
As well as, I download it using CDPSession.
I've tried it using website-scraper
So what is the best approach to come to a solution where I provide a website link, then It downloads it as static website.
Try using this https://www.npmjs.com/package/website-scraper
It will save the website into a local directory
Have you tried something like wget or curl?
wget -p https://stackoverflow.com/questions/67559777/download-website-locally-without-javascript-using-puppeteer
Should do the trick
I'm building a pdf report with handlebars and puppeteer to save into my db. The pdf works fine except that I can't seem to get images that I have stored in an assets directory to load. I am console logging the filePath that I am traversing to when attempting to get the images and they come through appropriately. Just not sure why the images are not loading, any help appreciated.
Here is my code so far.
const puppeteer = require("puppeteer");
const hbs = require("handlebars");
const fs = require("fs-extra");
const path = require("path");
// compiles the handlebars docs
const compile = async (templateName, data) => {
const filePath = path.join(__dirname, "templates", `${templateName}.hbs`);
if (!filePath) {
throw new Error(`Could not find ${templateName}.hbs in generatePDF`);
}
const html = await fs.readFile(filePath, "utf-8");
return hbs.compile(html)(data);
};
// helper for getting the images
hbs.registerHelper("getIntro", (context, idx) => {
const filePath = path.join(
__dirname,
"assets",
`${context}_intro_${idx}.png`
);
return filePath;
});
// use puppeteer to take in compiled hbs doc and create a pdf
const generatePDF = async (fileName, data) => {
const preparedData = prepareDataForPDF(data);
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
headless: true
});
const page = await browser.newPage();
const content = await compile(fileName, preparedData);
await page.goto(`data: text/html;charset=UTF-8, ${content}`, {
waitUntil: "networkidle0"
});
await page.setContent(content);
await page.emulateMedia("screen");
await page.waitFor(100);
const pdf = await page.pdf({
format: "A4",
printBackground: true
});
return pdf;
};
module.exports = generatePDF;
<!-- handlebars partial that handles image loading -->
{{#loop 14}}
<div class="page">
<img src="{{getIntro assessmentType this}}" class="intro-page-content">
</div>
{{/loop}}
<!-- used like this in my main file -->
{{> intro assessmentType="Leadership"}}
I am currently trying to find the number of pages in a single pdf / what is the total size of the pdf file created by puppeteer.page as per requirement
Here's what I did:
try {
const generatedPdfFilePath = `${directory}/feedback-${requestId}.pdf`;
const htmlFilePath = `${directory}/report-${requestId}.html`;
const htmlTemplate =
fs.readFileSync(path.join(process.cwd(), '/data/feedback-template.hbs'), 'utf-8');
const template = handlebars.compile(htmlTemplate);
const htmlFile = minify(template(data), {
collapseWhitespace: true,
});
fs.writeFileSync(htmlFilePath , htmlFile);
const options = {
format: 'A4',
printBackground: true,
path: generatedPdfFilePath ,
};
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
headless: true,
});
const page = await browser.newPage();
await page.goto(`file://${htmlFilePath}`, {
waitUntil: 'networkidle0',
timeout: 300000,
});
await page.pdf(options);
// Do something here to find number of pages in this pdf
await browser.close();
resolve({ file: generatedPdfFilePath });
} catch (error) {
console.log(error);
reject(error);
}
So far what I have done is created an html template for the pdf, then used puppeteer, headless chrome for nodejs to generate the required pdf of the page. But now Im sort of stuck because I want to know how many pages are actually in this pdf file or in other words what is the size of the pdf which I need in further calculations. I have only mentioned the relevant code here for ease.
Also, Im pretty new to puppeteer, Can someone explain how can I get details of this pdf. I have been searching for quite some time now and no luck. Puppeteer's documentation isn't helping in any case no details are there on why we do what we do. All I get is the details on pdf options..
docs
Any help would be much appreciated.
You can use the pdf-parse node module, like this:
const fs = require('fs');
const pdf = require('pdf-parse');
let dataBuffer = fs.readFileSync('path to PDF file...');
pdf(dataBuffer).then(function(data) {
// number of pages
console.log(data.numpages);
});
Your code would become something like:
const pdf = require('pdf-parse');
try {
const generatedPdfFilePath = `${directory}/feedback-${requestId}.pdf`;
const htmlFilePath = `${directory}/report-${requestId}.html`;
const htmlTemplate =
fs.readFileSync(path.join(process.cwd(), '/data/feedback-template.hbs'), 'utf-8');
const template = handlebars.compile(htmlTemplate);
const htmlFile = minify(template(data), {
collapseWhitespace: true,
});
fs.writeFileSync(htmlFilePath , htmlFile);
const options = {
format: 'A4',
printBackground: true,
path: generatedPdfFilePath ,
};
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
headless: true,
});
const page = await browser.newPage();
await page.goto(`file://${htmlFilePath}`, {
waitUntil: 'networkidle0',
timeout: 300000,
});
await page.pdf(options);
// Do something here to find number of pages in this pdf
let dataBuffer = fs.readFileSync(htmlFilePath);
const pdfInfo = await pdf(dataBuffer);
const numPages = pdfInfo.numpages;
await browser.close();
resolve({ file: generatedPdfFilePath });
} catch (error) {
console.log(error);
reject(error);
}
We have some routing logic that kicks you to the homepage if you dont have a JWT_TOKEN set... I want to set this before the page loads/before the js is invoked.
how do i do this ?
You have to register localStorage item like this:
await page.evaluate(() => {
localStorage.setItem('token', 'example-token');
});
You should do it after page page.goto - browser must have an url to register local storage item on it. After this, enter the same page once again, this time token should be here before the page is loaded.
Here is a fully working example:
const puppeteer = require('puppeteer');
const http = require('http');
const html = `
<html>
<body>
<div id="element"></div>
<script>
document.getElementById('element').innerHTML =
localStorage.getItem('token') ? 'signed' : 'not signed';
</script>
</body>
</html>`;
http
.createServer((req, res) => {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write(html);
res.end();
})
.listen(8080);
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('http://localhost:8080/');
await page.evaluate(() => {
localStorage.setItem('token', 'example-token');
});
await page.goto('http://localhost:8080/');
const text = await page.evaluate(
() => document.querySelector('#element').textContent
);
console.log(text);
await browser.close();
process.exit(0);
})();
There's some discussion about this in Puppeteer's GitHub issues.
You can load a page on the domain, set your localStorage, then go to the actual page you want to load with localStorage ready. You can also intercept the first url load to return instantly instead of actually load the page, potentially saving a lot of time.
const doSomePuppeteerThings = async () => {
const url = 'http://example.com/';
const browser = await puppeteer.launch();
const localStorage = { storageKey: 'storageValue' };
await setDomainLocalStorage(browser, url, localStorage);
const page = await browser.newPage();
// do your actual puppeteer things now
};
const setDomainLocalStorage = async (browser, url, values) => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', r => {
r.respond({
status: 200,
contentType: 'text/plain',
body: 'tweak me.',
});
});
await page.goto(url);
await page.evaluate(values => {
for (const key in values) {
localStorage.setItem(key, values[key]);
}
}, values);
await page.close();
};
in 2021 it work with following code:
// store in localstorage the token
await page.evaluateOnNewDocument (
token => {
localStorage.clear();
localStorage.setItem('token', token);
}, 'eyJh...9_8cw');
// open the url
await page.goto('http://localhost:3000/Admin', { waitUntil: 'load' });
The next line from the first comment does not work unfortunately
await page.evaluate(() => {
localStorage.setItem('token', 'example-token'); // not work, produce errors :(
});
Without requiring to double goTo this would work:
const browser = await puppeteer.launch();
browser.on('targetchanged', async (target) => {
const targetPage = await target.page();
const client = await targetPage.target().createCDPSession();
await client.send('Runtime.evaluate', {
expression: `localStorage.setItem('hello', 'world')`,
});
});
// newPage, goTo, etc...
Adapted from the lighthouse doc for puppeteer that do something similar: https://github.com/GoogleChrome/lighthouse/blob/master/docs/puppeteer.md
Try and additional script tag. Example:
Say you have a main.js script that houses your routing logic.
Then a setJWT.js script that houses your token logic.
Then within your html that is loading these scripts order them in this way:
<script src='setJWT.js'></script>
<script src='main.js'></script>
This would only be good for initial start of the page.
Most routing libraries, however, usually have an event hook system that you can hook into before a route renders. I would store the setJWT logic somewhere in that callback.
I have multiple links in a single page whom I would like to access either sequentially or all at once. What I want to do is open all the links in their respective new tabs and get the page as pdf for all the pages. How do I achieve the same with puppeteer?
I can get all the links with a DOM and href property but I don't know how to open them in new tab access them and then close them.
You can open a new page in a loop:
const puppeteer = require('puppeteer');
(async () => {
try {
const browser = await puppeteer.launch();
const urls = [
'https://www.google.com',
'https://www.duckduckgo.com',
'https://www.bing.com',
];
const pdfs = urls.map(async (url, i) => {
const page = await browser.newPage();
console.log(`loading page: ${url}`);
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 120000,
});
console.log(`saving as pdf: ${url}`);
await page.pdf({
path: `${i}.pdf`,
format: 'Letter',
printBackground: true,
});
console.log(`closing page: ${url}`);
await page.close();
});
Promise.all(pdfs).then(() => {
browser.close();
});
} catch (error) {
console.log(error);
}
})();
To open a new tab (activate) it you just need to make a call to page.bringToFront()
const page1 = await browser.newPage();
await page1.goto('https://www.google.com');
const page2 = await browser.newPage();
await page2.goto('https://www.bing.com');
const pageList = await browser.pages();
console.log("NUMBER TABS:", pageList.length);
//switch tabs here
await page1.bringToFront();
//Do something... save as pdf
await page2.bringToFront();
//Do something... save as pdf
I suspect you have an array of pages so you might need to tweak the above code to cater for that.
As for generating a single pdf from multiple tabs I am pretty certain this is not possible. I suspect there will be a node library that can take multiple pdf files and merge into one.
pdf-merge might be what you are looking for.
You can also use a for loop.
(async ()=>{
const movieURL= ["https://www.imdb.com/title/tt0234215", "https://www.imdb.com/title/tt0411008"];
for (var i = 0; i < movieURL.length; i++) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(movieURL[i], {waitUntil: "networkidle2"});
const movieData = await page.evaluate(() => {
let movieTitle = document.querySelector('div[class="TitleBlock"] > h1').innerText;
return{movieTitle}
});
await browser.close();
await console.log(movieData);
}
})()