Node.js: share connection object throughout the application - javascript

I am a having issues with implementing generic-pool using puppeteer. Below is my relevant part of the code.
UPDATE
Thanks #Jacob for the help and i am more clear about the concept and how it works and the code is also more readable and clear. I am still having issues where a generic pool is getting created on every request. How do i ensure that the same generic pool is used every time instead of creating new one
browser-pool.js
const genericPool = require('generic-pool');
const puppeteer = require('puppeteer');
class BrowserPool {
static async getPool() {
const browserParams = process.env.NODE_ENV == 'development' ? {
headless: false,
devtools: false,
executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
}
:
{
headless: true,
devtools: false,
executablePath: 'google-chrome-unstable',
args: ['--no-sandbox', '--disable-dev-shm-usage']
};
const factory = {
create: function() {
return puppeteer.launch(browserParams);
},
destroy: function(instance) {
console.log('closing browser in hrere.....');
instance.close();
}
};
const opts = {
max: 5
};
this.myBrowserPool = genericPool.createPool(factory, opts);
}
static async returnPool() {
if (this.myBrowserPool == "") {
getPool();
}
return this.myBrowserPool.acquire();
}
}
BrowserPool.myBrowserPool = null;
module.exports = BrowserPool;
process-export.js
const BrowserPool = require('./browser-pool');
async function performExport(params){
const myPool = BrowserPool.getPool();
const resp = BrowserPool.myBrowserPool.acquire().then(async function(client){
try {
const url = config.get('url');
const page = await client.newPage();
await page.goto(url, {waitUntil: ['networkidle2', 'domcontentloaded']});
let gotoUrl = `${url}/dashboards/${exportParams.dashboardId}?csv_export_id=${exportParams.csvExportId}`;
//more processing
await page.goto(gotoUrl, {waitUntil: 'networkidle2' })
await myPool().myBrowserPool.release(client);
return Data;
} catch(err) {
try {
const l = await BrowserPool.myBrowserPool.destroy(client);
} catch(e) {
}
return err;
}
}).catch(function(err) {
return err;
});
return resp;
}
module.exports.performExport = performExport;
My understanding is that
1) When the application starts I can spin up for example 2 chromium instances and then when ever i want to visit a page i can use either of the two connections, so the browsers are essentially open and we improve the performance since the browser start can take time. is this correct?
2) Where do I place the acquire() code, I understand this should be in the app.js, so we acquire the instances rite when the app boots, but my pupeteer code is in a different file, how do i pass the browser reference in the file which has my pupeteer code.
When I use the above the code, a new browser instances spins up every time and the max property is not considered and it opens up as many instances are requested.
My apologies if its something very trial and i might have not understood the concept fully. Any help in clarifying this would be really helpful.

When using a pool, you'll need to use .acquire() to obtain an object, and then .release() when you're done so the object is returned to the pool and made available to something else. Without using .release(), you'd might as well have no pool at all. I like to use this helper pattern with pools:
class BrowserPool {
// ...
static async withBrowser(fn) {
const pool = BrowserPool.myBrowserPool;
const browser = await pool.acquire();
try {
await fn(browser);
} finally {
pool.release(browser);
}
}
}
This can be used like this anywhere in your code:
await BrowserPool.withBrowser(async browser => {
await browser.doSomeThing();
await browser.doSomeThingElse();
});
The key is the finally clause makes sure that whether your tasks complete or throw an error, you'll cleanly release the browser back to the pool every time.
It sounds like you might have the concept of the max option backwards as well and are expecting the browser instances to be spawned up to max. Rather, max means "only create up to max number of resources." If you try to acquire a sixth resource without anything having been released, for example, the acquire(...) call will block until one item is returned to the pool.
The min option, on the other hand, means "keep at least this many items on hand at all times", which you can use to pre-allocate resources. If you want 5 items to be created in advance, set min to 5. If you want 5 items and only five items to be created, set both min and max to 5.
Update:
I notice in your original code that you destroy in case of error and release when there isn't an error. Still would prefer the benefit of a wrapper function like mine to centralize all resource acquiring/releasing logic (the SRP approach). Here's how it could be updated to automatically destroy on errors instead:
class BrowserPool {
// ...
static async withBrowser(fn) {
const pool = BrowserPool.myBrowserPool;
const browser = await pool.acquire();
try {
await fn(browser);
pool.release(browser);
} catch (err) {
await pool.destroy(browser);
throw err;
}
}
}
Addendum
Figuring out what's going on in your code will be easier if you embrace the async function instead of mixing async function stuff and Promise callback stuff. Here's how it can be rewritten:
async function performExport(params){
const myPool = BrowserPool.myBrowserPool;
const client = await myPool.acquire();
try {
const url = config.get('url');
const page = await client.newPage();
await page.goto(url, {waitUntil: ['networkidle2', 'domcontentloaded']});
let gotoUrl = `${url}/dashboards/${exportParams.dashboardId}?csv_export_id=${exportParams.csvExportId}`;
//more processing
await page.goto(gotoUrl, {waitUntil: 'networkidle2' })
await myPool.release(client);
return Data;
} catch(err) {
try {
const l = await myPool.destroy(client);
} catch(e) {
}
return err; // Are you sure you want to do this? Would suggest throw err.
}
}

Related

js imported function error "getUser not a function"

I have three files
BotJobActions.js
TestDate.js
CreateCron.js
The BotJobActions file creates a function called getUser that returns the user connected to a specific job, then exports the getUser along with a bunch of other functions.
const getUser = async (jobId) =>{
await mongoConnect(process.env.DB_PWORD)
try {
const user = await User.findOne({pendingJobs:jobId})
return user
} catch (err) {
console.log(err)
}
}
module.exports = { newJob, getUserJobs, getUser, updateUserJob, destroyUserPendingJob, destroyUserCompletedJob, activateJob, deactivateJob, endJob }
TestDate defines a function called runBot which runs a bot Job. In runBot it also calls the getUser function, so I can make changes to a specific user. Then exports the function because it will be used in other files.
const { getUser } = require("../bot/botJobActions");
const runBot = async (todayJobs) =>{
// await mongoConnect(process.env.DB_PWORD)
for(const job of todayJobs){
const clubPassword = decryptToken(job.clubPassword.token, job.clubPassword.iv)
const user = await getUser(job.id)
if(job.proxy){
const proxyConfig = await getProxyConfig(user)
if(proxyConfig.status === "no proxy") console.log("[-] Proxy Config Retrival Error/Running Without Proxy")
// await startBot(member=job.member?job.member : null, proxy=proxyConfig.status === 'success'?proxyConfig:null, job.clubUsername, clubPassword, job.startTime, job.endTime, job.courseList, job.id)
await console.log(member=job.member?job.member : null, proxy=proxyConfig.status === 'success'?proxyConfig:null, job.clubUsername, clubPassword, job.startTime, job.endTime, job.courseList, job.id)
}else{
// await startBot(member=job.member?job.member : null, proxy=null, job.clubUsername, clubPassword, job.startTime, job.endTime, job.courseList, job.id)
await console.log(member=job.member?job.member : null, proxy=null, job.clubUsername, clubPassword, job.startTime, job.endTime, job.courseList, job.id)
}
}
return
}
module.exports = { runBot, getJobs }
CreateCron is a function that runs whenever a job is created with a specific start time. This function will create a cron job for that specified time to run the bot.
const schedule = require('node-schedule');
const { runBot } = require('./testDate');
const createCron = (job) =>{
const startDate = new Date(job.botStartDate)
const startTime = new Date(`09/19/2000 ${job.botStartTime}`)
startDate.setHours(startTime.getHours())
startDate.setMinutes(startTime.getMinutes())
console.log(startDate.toUTCString())
schedule.scheduleJob(startDate, async function(){
console.log('run job')
await runBot([job])
})
}
My problem thought is that whenever I run the createCron function, I get an error saying that the getUser is not a function. Even thought it is.
Any help is appreciated!!
I was able to fix the problem. All I had to do was use the absolute path to the function instead of the relative path. Then the functions worked. Hope this can help somebody!

Is there a way to open multiple tabs simultaneously on Playwright or Puppeteer to complete the same tasks?

I just started coding, and I was wondering if there was a way to open multiple tabs concurrently with one another. Currently, my code goes something like this:
const puppeteer = require("puppeteer");
const rand_url = "https://www.google.com";
async function initBrowser() {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto(rand_url);
await page.setViewport({
width: 1200,
height: 800,
});
return page;
}
async function login(page) {
await page.goto("https://www.google.com");
await page.waitFor(100);
await page.type("input[id ='user_login'", "xxx");
await page.waitFor(100);
await page.type("input[id ='user_password'", "xxx");
}
this is not my exact code, replaced with different aliases, but you get the idea. I was wondering if there was anyone out there that knows the code that allows this same exact browser to be opened on multiple instances, replacing the respective login info only. Of course, it would be great to prevent my IP from getting banned too, so if there was a way to apply proxies to each respective "browser"/ instance, that would be perfect.
Lastly, I would like to know whether or not playwright or puppeteer is superior in the way they can handle these multiple instances. I don't even know if this is a possibility, but please enlighten me. I want to learn more.
You can use multiple browser window as different login/cookies.
For simplicity, you can use the puppeteer-cluster module by Thomas Dondorf.
This module can make your puppeteer launched and queued one by one so that you can use this to automating your login, and even save login cookies for the next launches.
Feel free to go to the Github: https://github.com/thomasdondorf/puppeteer-cluster
const { Cluster } = require('puppeteer-cluster')
(async () => {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 2, // <= this is the number of
// parallel task running simultaneously
}) // You can change to the number of CPU
const cpuNumber = require('os').cpus().length // for example
await cluster.task(async ({ page, data: [username, password] }) => {
await page.goto('https://www.example.com')
await page.waitForTimeout(100)
await page.type('input[id ="user_login"', username)
await page.waitForTimeout(100)
await page.type('input[id ="user_password"', password)
const screen = await page.screenshot()
// Store screenshot, Save Cookies, do something else
});
cluster.queue(['myFirstUsername', 'PassW0Rd1'])
cluster.queue(['anotherUsername', 'Secr3tAgent!'])
// cluster.queue([username, password])
// username and password array passed into cluster task function
// many more pages/account
await cluster.idle()
await cluster.close()
})()
For Playwright, sadly still unsupported by the module above,you can use browser pool (cluster) module to automating the Playwright launcher.
And for proxy usage, I recommend Puppeteer library as the legendary one.
Don't forget to choose my answer as the right one, if this helps you.
There are profiling and proxy options; you could combine them to achieve your goal:
Profile, https://playwright.dev/docs/api/class-browsertype#browser-type-launch-persistent-context
import { chromium } from 'playwright'
const userDataDir = /tmp/ + process.argv[2]
const browserContext = await chromium.launchPersistentContext(userDataDir)
// ...
Proxy, https://playwright.dev/docs/api/class-browsertype#browser-type-launch
import { chromium } from 'playwright'
const proxy = { /* secret */ }
const browser = await chromium.launch({
proxy: { server: 'pre-context' }
})
const browserContext = await browser.newContext({
proxy: {
server: `http://${proxy.ip}:${proxy.port}`,
username: proxy.username,
password: proxy.password,
}
})
// ...

acync await react/redux js async events

https://ibb.co/dsNrnPQ -- screenshot of error
I get a problem with an async event.
When logging into the site, when locale-storage is not ready yet. I can't get it async.
After refreshing the page, the problem goes away.
Unhandled Rejection (SyntaxError): Unexpected token u in JSON at position 0
Problem in string
const userData = await userDataGetter();
export function setBalanseFetch(){
return async dispatch => {
const userData = await userDataGetter();
const userID = await userData.userId;
try{
const respone = await axios.post('/api/profile', {userID})
const json = await respone["data"];
const FetchBalanse = json.items[0].balanse;
dispatch( {type: FETCH_BALANSE, payload: Number(FetchBalanse)})
}catch(e){
console.log(`Axios spend balanse request failed: ${e}`);
}
}
}
code function userDataGetter
async function userDataGetter(){
const userData = await JSON.parse(localStorage.userData);
return userData;
}
export default userDataGetter
You have:
const userData = await JSON.parse(localStorage.userData);
However, JSON.parse is not an asynchronous function. It does not wait for a localStorage key to be present and ready, which is what your code seems to want it to do. (Also, as a comment pointed out, you want localStorage.get('userData')). Nor are you checking that it is present at all.
You also don't show where the localStorage is getting set. But likely your solution will be to then trigger the code that depends on it after you know its been set from the same place that's setting it, and when you need to access it any other time, check for its presence first.

Example script provided by neo4j for JavaScript won't run

I am very new to the graph database ecosystem and for start I am experimenting with the neo4j. I would very much like to work with node and neo4j. So after a quick search I found neo4j-driver that is an officially supported driver for JavaScript and an example provided which is:
const neo4j = require('neo4j-driver')
const driver = neo4j.driver(uri, neo4j.auth.basic(user, password))
const session = driver.session()
const personName = 'Alice'
try {
const result = await session.run(
'CREATE (a:Person {name: $name}) RETURN a',
{ name: personName }
)
const singleRecord = result.records[0]
const node = singleRecord.get(0)
console.log(node.properties.name)
} finally {
await session.close()
}
// on application exit:
await driver.close()
now when I run this code, I immediately get the following error:
SyntaxError: await is only valid in async function
Now I thought I understood the error that I would have to wrap the try-catch block with anonymous async function to get rid of the error. The changed code body is:
const config = {
"neo4j": {
"url": "neo4j://localhost",
"authUser": "neo4j",
"authKey": "adminPassword"
}
}
const neo4j = require("neo4j-driver");
const driver = neo4j.driver(
config.neo4j.url,
neo4j.auth.basic(config.neo4j.authUser, config.neo4j.authKey)
);
const session = driver.session();
(async () => {
try {
const result = await session.run('CREATE (a:Person {name: $name}) RETURN a', { name: 'Alice' });
const singleRecord = result.records[0];
const node = singleRecord.get(0);
console.log(node.properties.name);
} catch (error) {
console.log("Error Body: ", error);
} finally {
await session.close();
}
})();
await driver.close();
But to my dismay, I have run into another error that is very cryptic:
{ Neo4jError: Could not perform discovery. No routing servers available. Known routing table: RoutingTable[database=Sample database, expirationTime=0, currentTime=1592397056399, routers=[], readers=[], writers=[]]
at captureStacktrace (/Users/pc/node_modules/neo4j-driver/lib/result.js:263:15)
at new Result (/Users/pc/node_modules/neo4j-driver/lib/result.js:68:19)
at Session._run (/Users/pc/node_modules/neo4j-driver/lib/session.js:174:14)
at Session.run (/Users/pc/node_modules/neo4j-driver/lib/session.js:135:19)
at /Users/pc/neoNode.js:20:38
at Object.<anonymous> (/Users/pc/neoNode.js:31:3)
at Module._compile (module.js:653:30)
at Object.Module._extensions..js (module.js:664:10)
at Module.load (module.js:566:32)
at tryModuleLoad (module.js:506:12) code: 'ServiceUnavailable', name: 'Neo4jError' }
I also had some problems with this.
First off, Natam Oliveira is correct. You need to use the bolt protocol, and await promises needs to be within an async function. For some reason the neo4j protocol is used in some examples in the docs. Additionally it would seem both examples currently provided by Neo4j—in the driver-manual and javascript-driver section—causes errors if you use them outside of some kind of unspecified environment.
There were some clues on the npmjs pagckage page, though, so by working them into the existing code, I was at least able to spit out some data. However I'm also wondering on how you could make this work inside the async function, so an explanation to how that could work with this driver would be very welcome.
Here's what worked for me:
const neo4j = require('neo4j-driver')
const cnx = {
user: 'neo4j',
password: 'some passphrase',
uri: 'bolt://localhost:7687'
}
const driver = neo4j.driver(cnx.uri, neo4j.auth.basic(cnx.user, cnx.password))
driver.verifyConnectivity()
.then((cnxMsg) => {
console.log(cnxMsg)
})
const session = driver.session({ database: 'neo4j' })
session.run('MATCH (n:Movie) RETURN n LIMIT 5')
.subscribe({
onKeys: keys => {
console.log(keys)
},
onNext: record => {
console.log(record.get('n').properties.title)
},
onCompleted: () => {
session.close()
},
onError: error => {
console.error(error)
}
})
This spits out some movies using the streaming API as seen in the NPM documentation. (Note: It will only work if you started/installed the Movie database, so double check that you didn't delete it, as its deletion is also part of the Neo4j tutorial.) Now just change the MATCH Cypher query to whatever you like, and play around with the output, for instance by piping it to Express.
Sources:
https://neo4j.com/docs/driver-manual/current/client-applications/
https://neo4j.com/developer/javascript/#javascript-driver
https://www.npmjs.com/package/neo4j-driver
https://neo4j.com/docs/api/javascript-driver/current/
first of all, I think your URL should be "url": "bolt://localhost:7687"
And you still with await driver.close() outside an async function
If you are starting to use neo4j, look for an OGM (Object Graph Model) to help you.

How to call Page.navigate and trigger associated Page.loadEventFired?

I am using Chrome stable 60 (https://chromedevtools.github.io/devtools-protocol/1-2/Page/) for headless. I need to be able to do this:
Navigate to page 1
Take screenshot1
Navigate to page 2 (after page 1 is done)
Take screenshot2
However, I can't see to call Page.navigate twice because Page.loadEventFired will pick up on the latest one.
I don't want to use Canary because it's so unstable (screenshot doesn't even work right). So I think Target isn't an option (if it could be).
What is the best way to do url navigation in serial fashion like that?
I looked at https://github.com/LucianoGanga/simple-headless-chrome to see how they do it (await mainTab.goTo) but can't seem to figure out yet.
The link here https://github.com/cyrus-and/chrome-remote-interface/issues/92 gave me some idea:
const fs = require('fs');
const CDP = require('chrome-remote-interface');
function loadForScrot(url) {
return new Promise(async (fulfill, reject) => {
const tab = await CDP.New();
const client = await CDP({tab});
const {Page} = client;
Page.loadEventFired(() => {
fulfill({client, tab});
});
await Page.enable();
await Page.navigate({url});
});
}
async function process(urls) {
try {
const handlers = await Promise.all(urls.map(loadForScrot));
for (const {client, tab} of handlers) {
const {Page} = client;
await CDP.Activate({id: tab.id});
const filename = `/tmp/scrot_${tab.id}.png`;
const result = await Page.captureScreenshot();
const image = Buffer.from(result.data, 'base64');
fs.writeFileSync(filename, image);
console.log(filename);
await client.close();
}
} catch (err) {
console.error(err);
}
}
process(['http://example.com',
'http://example.com',
'http://example.com',
'http://example.com',
'http://example.com',
'http://example.com',
'http://example.com',
'http://example.com']);
Checkout the new library from google team puppeteer

Categories

Resources