I use puppeteer-cluster + node js. I have some script.
I'm a newbie
The script runs halfway through and then ends and is not executed further. I've been looking for a solution for a week now, I don't understand what's the matter. Help me please enter code here
const { Cluster } = require('puppeteer-cluster-delay');
(async () => {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 2,
puppeteerOptions: {headless: false,
},
});
await cluster.task(async ({ page, data: url }) => {
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('1');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('2');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('3');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('4');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('5');
await page.goto('http://www.nike.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('6');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('7');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('8');
await page.goto('http://www.google.com/');
await new Promise(resolve => {
setTimeout(() => { // user is waiting for an asynchronous event
try {
resolve();
} catch (err) {
// handle error
}
}, 5000);
});
await console.log('end');
});
await cluster.queue();
await cluster.queue();
// many more pages
await cluster.idle();
await cluster.close();
})();
the script reaches the 5th iteration and ends ((
We need to use timeout argument in launching puppeteer.
Set timeout = 0 to off ones
Related
I'm fetching data from a website but the process sometimes fails so I make the function to retry it. But I'm using a 15 seconds timeout, so when the timeout is triggered it will stop retrying the function and it will return some error message from the website.
This works fine in my local machine but when I deploy my code to Vercel (running on AWS Lambda as far as I know), the setTimeout is being completely ignored, so the fetchData function keeps running until it gets the correct response or until the server default 60 seconds timeout triggers.
Here's the code:
router.get('/test', async (req, res) => {
try {
const browser = await playwright.launchChromium({
headless: false });
const context = await browser.newContext();
const page = await context.newPage();
let timeout = false;
let fetchDataTimeout = setTimeout(() => {
timeout = true;
}, 15000);
const fetchData = async () => {
await page.type('#code', '6824498040');
let data = await (
await Promise.all([
page.waitForResponse(
(response) =>
response.url() === `${env.API_URL2}` && response.status() === 200,
{ timeout: 10000 },
),
page.click('#btn-check'),
])
)[0].json();
if (data.errors && !timeout) {
await page.reload();
return await fetchData();
} else {
clearTimeout(fetchDataTimeout);
return data;
}
};
let data = await fetchData();
await browser.close();
res.json({
status: 200,
message: data,
});
} catch (error) {
console.error(error);
return res.status(500).send({ 'Server Error': `${error}` });
}
});
I read that you have to wrap the setTimeout into a Promise and return it as an async function. Like this:
const timeOut = async (t) => {
return new Promise((resolve, reject) => {
setTimeout(() => {
resolve(`Completed in ${t}`)
}, t)
})
}
await timeOut(15000).then((result) => console.log(result))
But this will trigger the 15 seconds wait always. I need to discard the waiting if I get the correct response from fetchData and trigger the timeout if I don't 15 seconds after I start trying.
Any ideas???
The solution was quite simple. Since AWS Lambda requires an async function to properly wait the timeouts, I just had to wrap the timeout in a Promise. But then what did the trick was wrapping both functions, the timeout and the fetchData, in a Promise.race() method. By doing this, the promise that resolves first will stop the other one from running.
The code is now like this:
router.get('/test', async (req, res) => {
try {
const browser = await playwright.launchChromium({headless: false });
const context = await browser.newContext();
const page = await context.newPage();
const timeout = () =>
new Promise((resolve, reject) => {
setTimeout(() => {
reject('FUNCTION TIMEOUT');
}, 12000);
});
const fetchData = async () => {
await page.type('#code', '6824498040');
let response = await (
await Promise.all([
page.waitForResponse(
(res) =>
res.url() === `${env.API_URL2}` && res.status() === 200,
{ timeout: 10000 },
),
page.click('#btn-check'),
])
)[0].json();
if (data.error) {
await page.reload();
return await fetchData();
} else return data;
}
};
let data = await Promise.race([fetchData(), timeout()]);
await browser.close();
res.json({
status: 200,
message: data,
});
} catch (error) {
console.error(error);
return res.status(500).send({ 'Server Error': `${error}` });
}
});
I implemented a 12 seconds timeout instand of 15. I tested this on Vercel (AWS Lambda) and it works fine.
Thanks everybody, especially dandavis, this implementation was his idea.
Anyway, I hope it helps.
I'm trying to encapsulate some intialization / clean up code in a single Promise. What I want if to execute some code, execute the then and then execute some more code. This is what I came up with:
function initialize() {
let callback;
console.log('intialization');
const promise = new Promise(resolve => callback = resolve);
new Promise(async () => {
await callback();
await promise;
console.log('cleanup');
});
return promise;
}
initialize().then(() => console.log('execute then'));
which gives me the following output in the terminal:
initialization
execute then
cleanup
- Promise {<fulfilled>: undefined}
All good so far. However, when we make the callback async, it no longer works.
initialize().then(
async () => {
await new Promise(resolve => {
setTimeout(
() => {
console.log('execute then');
resolve();
},
10000
)
})
}
);
gives me this output:
initialization
cleanup
- Promise {<pending>}
execute then
I would have expected it to look like this:
initialization
- Promise {<pending>}
execute then
cleanup
How can I fix this? Is this even possible at all?
You can accept a callback that defines an asynchronous operation. Then it can be inserted into the middle of an promise chain:
const delayMessage = (message, ms) =>
new Promise(resolve => setTimeout(() => {
console.log(message);
resolve();
}, ms));
async function somethingAsync() {
console.log('intialization');
}
function initialize(callback) {
return somethingAsync()
.then(callback)
.then(() => {
console.log('cleanup');
});
}
const middleOfProcess = () => delayMessage('execute then', 2000);
initialize(middleOfProcess);
It works even if there are multiple async steps to do in between, since you can simply chain them together:
const delayMessage = (message, ms) =>
new Promise(resolve => setTimeout(() => {
console.log(message);
resolve();
}, ms));
async function somethingAsync() {
console.log('intialization');
}
function initialize(callback) {
return somethingAsync()
.then(callback)
.then(() => {
console.log('cleanup');
});
}
const middleOfProcess = () => delayMessage('execute then1', 2000)
.then(() => delayMessage('execute then2', 2000))
.then(() => delayMessage('execute then3', 2000));
initialize(middleOfProcess);
The same can be done using async/await syntax:
const delayMessage = (message, ms) =>
new Promise(resolve => setTimeout(() => {
console.log(message);
resolve();
}, ms));
async function somethingAsync() {
console.log('intialization');
}
async function initialize(callback) {
await somethingAsync();
await callback();
console.log('cleanup');
}
const middleOfProcess = async () => {
await delayMessage('execute then1', 2000);
await delayMessage('execute then2', 2000);
await delayMessage('execute then3', 2000);
};
initialize(middleOfProcess);
i have a promise chain
If i receive error in getServiceCost I want to repeat the chain again (retry) for 2 times how can i achieve this when using Promise chain , meaning again execute getUser, getServiceCost
getUser(100)
.then(getServices)
.then(getServiceCost)
.then(console.log);
function getUser(userId) {
return new Promise((resolve, reject) => {
console.log('Get the user from the database.');
setTimeout(() => {
resolve({
userId: userId,
username: 'admin'
});
}, 1000);
})
}
function getServices(user) {
return new Promise((resolve, reject) => {
console.log(`Get the services of ${user.username} from the API.`);
setTimeout(() => {
resolve(['Email', 'VPN', 'CDN']);
}, 3 * 1000);
});
}
function getServiceCost(services) {
return new Promise((resolve, reject) => {
console.log(`Calculate the service cost of ${services}.`);
setTimeout(() => {
resolve(services.length * 100);
}, 2 * 1000);
});
}
If i receive error in getServiceCost I want to repeat the chain again (retry) for 2 times how can i achieve this when using Promise chain , meaning again execute
getUser, getServiceCost
I'd use an async function (all modern environments support them, and you can transpile for obsolete environments), which lets you use a simple loop. Perhaps as a utility function you can reuse:
async function callWithRetry(fn, retries = 3) {
while (retries-- > 0) {
try {
return await fn();
} catch (error) {
if (retries === 0) {
throw error;
}
}
}
return new Error(`Out of retries`); // Probably using an `Error` subclass
}
Using it:
callWithRetry(() => getUser(100).then(getServices).then(getServiceCost))
.then(console.log)
.catch(error => { /*...handle/report error...*/ });
Or
callWithRetry(async () => {
const user = await getUser(100);
const services = await getServices(user);
return await getServiceCost(services);
})
.then(console.log)
.catch(error => { /*...handle/report error...*/ });
im trying to write a promise but seems to be missing something. here is my code:
const myPromise = new Promise(() => {
setTimeout(() => {
console.log("getting here");
return setinputs({ ...inputs, images: imageAsUrl });
}, 100);
});
myPromise
.then(() => {
console.log("getting here too");
firebase.database().ref(`collection/${idNode}`).set(inputs);
})
.then(() => {
console.log("all is set");
})
.catch((err) => {
console.log(err);
});
if i run the program, the first part of the promise is executing but all .then() functions arent executing. how do i fix this?
In this scheme, the promise callback has one (resolve) or two (resolve,reject) arguments.
let p = new Promise((resolve, reject)=> {
//do something
//resolve the promise:
if (result === "ok") {
resolve(3);
}
else {
reject("Something is wrong");
}
});
p.then(res => {
console.log(res); // "3"
}).catch(err => {
console.error(err); //"Something is wrrong
});
Of course, nowadays you can use async + await in a lot of cases.
You need to resolve the promise, using resolve() and also return the promise from firebase so the next .then in the chain works properly.
const myPromise = new Promise((resolve, reject) => {
setTimeout(() => {
console.log("getting here");
// You have to call resolve for all `.then` methods to be triggered
resolve({ ...inputs, images: imageAsUrl });
}, 100);
});
myPromise
.then((inputs) => {
console.log("getting here too");
// You have to return a promise in a .then function for the next .then to work properly
return firebase.database().ref(`collection/${idNode}`).set(inputs);
})
.then(() => {
console.log("all is set");
})
.catch((err) => {
console.log(err);
});
I'm familiar with Mocha's support of delaying the root suite to enable performing asynchronous operations before a test is executed, by use of running Mocha with the --delay flag, but this impacts all tests.
Is it possible to do something like this on a test by test basis, without using the --delay flag?
An example of a working asynchronous test is below, but unfortunately not of all of our tests are async and call run(). Please note that the below example also leverages dynamically generating tests to generate a test for each URL that is detected during the asynchronous operation before the suite executes.
driver = await new Builder().forBrowser('chrome').build();
await driver.get('http://example.org');
await driver.findElements(By.css('article header a')).then(async function (anchors) {
Promise.all(
anchors.map(async anchor => {
return new Promise(async function (resolve, reject) {
try {
const href = await anchor.getAttribute('href');
urls.push(href);
resolve();
} catch (err) {
console.log('Catch')
reject(err);
}
})
})
).then(function () {
driver.quit();
describe('my suite', function () {
urls.forEach(function (url) {
it(`Loads ${url}`, async function () {
await driver.get(url);
await driver.getTitle().then(function (title) {
assert.strictEqual(1, 1);
});
});
});
});
run();
})
});
Following #destroyer's suggestion, I was successful in accomplishing something similar using asynchronous hooks (below), but cannot dynamically generate a separate test for each URL since Mocha does not delay the root suite execution.
describe('Async test suite', function () {
const getAnchors = function () {
return new Promise(async (resolve) => {
driver = await new Builder().forBrowser('chrome').build();
await driver.get('http://example.org');
await driver.findElements(By.css('article header a'))
.then(async (anchors) => {
resolve(anchors);
})
});
}
const getUrls = function (anchors) {
return new Promise(async resolve => {
for (i = 0; i < anchors.length; i++) {
urls.push(await anchors[i].getAttribute('href'));
if (i === (anchors.length - 1)) {
resolve(urls);
}
}
});
}
const iterateUrls = function (urls) {
return new Promise(async (resolve, reject) => {
for (i = 0; i < urls.length; i++) {
await driver.get(urls[i])
const thisUrl = await driver.getCurrentUrl();
try {
await assert.strictEqual(thisUrl, urls[i]);
} catch (err) {
reject(err);
break;
}
if (i === (urls.length - 1)) {
resolve();
}
}
})
}
async function asyncController() {
Promise.all([
anchors = await getAnchors(),
await getUrls(anchors)
])
}
// Trigger async functions here
before(function (done) {
asyncController()
.then(() => {
done();
})
});
// Close the browser after test completes
after(async function () {
await driver.quit()
});
describe('Checks URLs', function () {
it('Iterates over URLs', async function (done) {
try {
await iterateUrls(urls);
} catch (err) {
done(err);
}
});
});
});