Node.js how to synchronously read lines from stream.Readable

Node.js how to synchronously read lines from stream.Readable - javascript

I'm interacting with a child process through stdio, and I need to wait for a line from childProcess.stdout each time I write some command to childProcess.stdin.
It's easy to wrap an asynchronous method for writing like below:
async function write(data){
return new Promise(resolve=>{
childProcess.stdin.write(data,()=>resolve());
})
}
However, it turns out quite difficult when it comes to reading, since data from stdout must be processed using listeners. I've tried below:
const LineReader = require("readline")
const reader = LineReader.createInterface(childProcess.stdout);
async function read(){
return new Promise(resolve=>{
reader.once("line",line=>resolve(line));
})
}
But it always returns the first line.
I know I may achieve this using setInterval, And I've already implemented the functionality this way. But it obviously has an impact on the performance, so now I'm trying to optimize it by wrapping it into an asynchronous method.
Any suggestions and solutions will be appreciated!

Well, I ended up with something pretty similar to what you were trying. It makes some assumptions that are mentioned in the code and needs more complete error handling:
const cp = require('child_process');
const readline = require('readline');
const child = cp.spawn("node", ["./echo.js"]);
child.on('error', err => {
console.log(err);
}).on('exit', () => {
console.log("child exited");
});
const reader = readline.createInterface({ input: child.stdout });
// this will miss line events that occurred before this is called
// so this only really works if you know the output comes one line at a time
function nextLine() {
return new Promise(resolve => {
reader.once('line', resolve);
});
}
// this does not check for stdin that is full and wants us to wait
// for a drain event
function write(str) {
return new Promise(resolve => {
let ready = child.stdin.write(str, resolve);
if (!ready) {
console.log("stream isn't ready yet");
}
});
}
async function sendCmd(cmd) {
// get line reader event handler installed so there's no race condition
// on missing the return event
let p = nextLine();
// send the command
await write(cmd);
return p;
}
// send a sequence of commands and get their results
async function run() {
let result1 = await sendCmd("hi\n");
console.log(`Got '${result1}'`);
let result2 = await sendCmd("goodbye\n");
console.log(`Got '${result2}'`);
let result3 = await sendCmd("exit\n");
console.log(`Got '${result3}'`);
}
run().then(() => {
console.log("done");
}).catch(err => {
console.log(err);
});
And, for testing purposes, I ran it with this echo app:
process.stdin.on("data", data => {
let str = data.toString();
let ready = process.stdout.write("return: " + str, () => {
if (str.startsWith("exit")) {
process.exit();
}
});
if (!ready) {
console.log("echo wasn't ready");
}
});

Related

Running two browser instances in parallel for same list of websites in Puppeteer

I wrote javascript code for a web crawler that scraps data from a list of websites (in csv file) in a single browser instance (code below). Now I want to modify the code for the scenario in which every single website in the list runs parallel at the same time in two browser instances. For example, a website www.a.com in the list should run in parallel at the same time on two browser instances and the same goes for the rest of the websites. If anyone can help me, please. I would be very thankful.
(async () => {
require("dotenv").config();
if (!process.env.PROXY_SPKI_FINGERPRINT) {
throw new Error("PROXY_SPKI_FINGERPRINT is not defined in environment.");
}
const fs = require("fs");
const fsPromises = fs.promises;
const pptr = require("puppeteer");
const browser = await pptr.launch({
args: [
"--proxy-server=https://127.0.0.1:8000",
"--ignore-certificate-errors-spki-list=" + process.env.PROXY_SPKI_FINGERPRINT,
"--disable-web-security",
],
// headless: false,
});
const sites = (await fsPromises.readFile(process.argv[2])) // sites list in csv file
.toString()
.split("\n")
.map(line => line.split(",")[1])
.filter(s => s);
for (let i in sites) {
const site = sites[i];
console.log(`[${i}] ${site}`);
try {
await fsPromises.appendFile("data.txt", JSON.stringify(await crawl(browser, site)) + "\n");
} catch (e) {
console.error(e);
}
}
await browser.close();
async function crawl(browser, site) {
const page = await browser.newPage();
try {
const grepResult = [];
page.on("request", async request => {
request.continue();
})
page.on("response", async response => {
try {
if (response.request().resourceType() === "script" &&
response.headers()["content-type"] &&
response.headers()["content-type"].includes("javascript")) {
const js = await response.text();
const grepPartResult = grepMagicWords(js);
grepResult.push([response.request().url(), grepPartResult]);
}
} catch (e) {}
});
await page.setRequestInterception(true);
try {
await page.goto("http://" + site, {waitUntil: "load", timeout: 60000});
await new Promise(resolve => { setTimeout(resolve, 10000); });
} catch (e) { console.error(e); }
const [flows, url] = await Promise.race([
page.evaluate(() => [J$.FLOWS, document.URL]),
new Promise((_, reject) => { setTimeout(() => { reject(); }, 5000); })
]);
return {url: url, grepResult: grepResult, flows: flows};
} finally {
await page.close();
}
function grepMagicWords(js) {
var re = /(?:\'|\")(?:g|s)etItem(?:\'|\")/g, match, result = [];
while (match = re.exec(js)) {
result.push(js.substring(match.index - 100, match.index + 100));
}
return result;
}
}
})();

You can launch multiple browsers and run them in parallel. You would have to restructure your app slighltly for that. Create a wrapper for crawl which launches it with a new browser instance. I created crawlNewInstance which does that for you. You would also need to run crawlNewInstance() in parallel.
Checkout this code:
const sites = (await fsPromises.readFile(process.argv[2])) // sites list in csv file
.toString()
.split("\n")
.map(line => line.split(",")[1])
.filter(s => s);
const crawlerProms = sites.map(async (site, index) => {
try {
console.log(`[${index}] ${site}`);
await fsPromises.appendFile("data.txt", JSON.stringify(await crawlNewInstance(site)) + "\n");
} catch (e) {
console.error(e);
}
}
// await all the crawlers!.
await Promise.all(crawlerProms)
async function crawlNewInstance(site) {
const browser = await pptr.launch({
args: [
"--proxy-server=https://127.0.0.1:8000",
"--ignore-certificate-errors-spki-list=" + process.env.PROXY_SPKI_FINGERPRINT,
"--disable-web-security",
],
// headless: false,
});
const result = await crawl(browser, site)
await browser.close()
return result
}
optional
The above answers basically the question. But If you want to go further I was in a run and had nothing todo :)
If you have plenty of pages, which you wanted to crawl in parallel and for example limit the amount of parallel requests you could use a Queue:
var { EventEmitter} = require('events')
class AsyncQueue extends EventEmitter {
limit = 2
enqueued = []
running = 0
constructor(limit) {
super()
this.limit = limit
}
isEmpty() {
return this.enqueued.length === 0
}
// make sure to only pass `async` function to this queue!
enqueue(fn) {
// add to queue
this.enqueued.push(fn)
// start a job. If max instances are already running it does nothing.
// otherwise it runs a new job!.
this.next()
}
// if a job is done try starting a new one!.
done() {
this.running--
console.log('job done! remaining:', this.limit - this.running)
this.next()
}
async next() {
// emit if queue is empty once.
if(this.isEmpty()) {
this.emit('empty')
return
}
// if no jobs are available OR limit is reached do nothing
if(this.running >= this.limit) {
console.log('queueu full.. waiting!')
return
}
this.running++
console.log('running job! remaining slots:', this.limit - this.running)
// first in, first out! so take first element in array.
const job = this.enqueued.shift()
try {
await job()
} catch(err) {
console.log('Job failed!. ', err)
this.emit('error', err)
}
// job is done!
// Done() will call the next job if there are any available!.
this.done()
}
}
The queue could be utilised with this code:
// create queue
const limit = 3
const queue = new AsyncQueue(limit)
// listen for any errors..
queue.on('error', err => {
console.error('error occured in queue.', err)
})
for(let site of sites) {
// enqueue all crawler jobs.
// pass an async function which does whatever you want. In this case it crawls
// a web page!.
queue.enqueue(async() => {
await fsPromises.appendFile("data.txt", JSON.stringify(await crawlNewInstance(site)) + "\n");
})
}
// helper for watiing for the queue!
const waitForQueue = async () => {
if(queue.isEmpty) return Promise.resolve()
return new Promise((res, rej) => {
queue.once('empty', res)
})
}
await waitForQueue()
console.log('crawlers done!.')
Even further with BrowserPool
It would also be possible to reuse your browser instances, so it would not be necessary to start a new browser instance for every crawling process. This can be done using this Browserpool helper class
var pptr = require('puppeteer')
async function launchPuppeteer() {
return await pptr.launch({
args: [
"--proxy-server=https://127.0.0.1:8000",
"--ignore-certificate-errors-spki-list=" + process.env.PROXY_SPKI_FINGERPRINT,
"--disable-web-security",
],
// headless: false,
});
}
// manages browser connections.
// creates a pool on startup and allows getting references to
// the browsers! .
class BrowserPool {
browsers = []
async get() {
// return browser if there is one!
if(this.browsers.length > 0) {
return this.browsers.splice(0, 1)[0]
}
// no browser available anymore..
// launch a new one!
return await launchPuppeteer()
}
// used for putting a browser back in pool!.
handback(browser) {
this.browsers.push(browser)
}
// shuts down all browsers!.
async shutDown() {
for(let browser of this.browsers) {
await browser.close()
}
}
}
You can then remove crawlNewInstance() and adjust the code to look like this finally:
const sites = (await fsPromises.readFile(process.argv[2])) // sites list in csv file
.toString()
.split("\n")
.map(line => line.split(",")[1])
.filter(s => s);
// create browserpool
const pool = new BrowserPool()
// create queue
const limit = 3
const queue = new AsyncQueue(3)
// listen to errors:
queue.on('error', err => {
console.error('error in the queue detected!', err)
})
// enqueue your jobs
for(let site of sites) {
// enqueue an async function which takes a browser from pool
queue.enqueue(async () => {
try {
// get the browser and crawl a page!.
const browser = await pool.get()
const result = await crawl(browser, site)
await fsPromises.appendFile("data.txt", JSON.stringify(result) + "\n");
// return the browser back to pool so other crawlers can use it! .
pool.handback(browser)
} catch(err) {
console.error(err)
}
})
}
// helper for watiing for the queue!
const waitForQueue = async () => {
// maybe jobs fail in a few milliseconds so check first if its already empty..
if(queue.isEmpty) return Promise.resolve()
return new Promise((res, rej) => {
queue.once('empty', res)
})
}
// wait for the queue to finish :)
await waitForQueue()
// in the very end, shut down all browser:
await pool.shutDown()
console.log('done!.')
Have fun and feel free to leave a comment.

Mocha.run() exits without awaiting async tests

I want to use mocha to run some basic post-deploy tests against live services. I want to programmatically load tests based upon a config object passed to the test script.
The problem I'm encountering is that mocha.run() executes and exits the node process without continuing with the rest of the code. It is not clear to me how to force node to wait for the mocha result and continue with the rest of the code.
mocha-setup.js
const Mocha = require('mocha');
const util = require('util');
const { Test, Suite } = Mocha;
const timeoutMillis = 300000; // five minutes
const mocha = new Mocha({
timeout: timeoutMillis,
reporter: 'mochawesome',
reporterOptions: {
reportDir: '../reports/unit'
},
color: true,
});
const makeSuite = (suiteName) => Suite.create(mocha.suite, suiteName);
const runMochaTests = async (tests, config) => {
const suite = makeSuite('My programmatic test suite');
tests.forEach(({ test, statement }) => {
suite.addTest(new Test(statement, async () => {
await test(config);
}));
});
console.log('This logs as expected.');
const promisifyMocha = util.promisify(() => mocha.run());
const result = await promisifyMocha(); // code seems to exit here
console.log('I am never logged');
console.log(result); // also never logged
// `result` is never returned, and higher level code implementing `runMochaTests` does not continue either
return result; // Eventually, I want to return the number of failures
};
module.exports = {
runMochaTests,
};
The plan is for runMochaTests to return the number of failures. runMochaTests() will be used by different higher order code modules as necessary.
Lastly, in my best life, I would not have to manually promisify mocha at all. If there was a way to use something like const result = await mocha.run(), that feels like the best implementation.

My problem was that Mocha is fundamentally about event listeners, not promises. Using this reference code, I ended up with...
const runMochaTests = async (tests, config) => new Promise((resolve, reject) => {
const testResults = [];
let numberOfFailures = 0;
try {
const suite = makeSuite('Verify Regional Apps');
tests.forEach(({ test, statement }) => {
suite.addTest(new Test(statement, async () => {
await test(config);
}));
});
const runner = mocha.run();
runner.on('test end', (testResult) => {
if (testResult.state === 'failed') numberOfFailures += 1;
testResults.push(testResult);
});
runner.on('end', () => {
runner.removeAllListeners('test end');
runner.removeAllListeners('end');
ClearModule.all(); // Needed to ensure all tests are executed every execution.
resolve(numberOfFailures);
});
} catch (err) {
reject(err);
}
});

Mocha Dynamic test generation in before block not getting executed

As suggested in this post , I tried the steps to create dynamic tests , but I see the actual test(test.getMochaTest()in my below implementation) not getting executed. What's that I'm missing here, the call on test.getMochaTest() does not get executed in the before block.
describe('Dynamic Tests for Mocha', async () => {
let outcome ;
before(async() => {
await init().catch(() => console.error('Puppeteer environment initialization failed'));
return collectTests().then(async(collectTests) => {
console.info('4.Executing tests :');
describe('Dynamic test execution', async() => {
collectTests.forEach(async(test) => {
console.info(`\tModule under test : ${test.name}`);
// impl. of test.getMochaTest() DOES NOT get executed.
it(test.name, async() => outcome = await test.getMochaTest().catch(async () => {
console.error(`error while executing test:\t${test.name}`);
}));
});
}) ;
});
});
after(async () => {
console.info('5. Exiting tests...');
await HelperUtils.delay(10000).then(async () => { await browser.close(); });
console.log('executing after block');
});
it('placeholder', async() => {
await
console.log('place holder hack - skip it');
});
});
Array of tests is returned here :
async function collectTests():Promise<Array<ITest>> {
console.info('3.Collecting tests to execute ...');
testArray = new Array<ITest>();
const login:ITest = new SLogin('Login Check');
testArray.push(login);
return testArray;
}
The below implementation of getMochaTest in SLogin -> does not get executed .
export default class SLogin extends BTest implements ITest {
constructor(name: string) {
super(name);
}
async getMochaTest():Promise<Mocha.Func> {
return async () => {
console.log('Running Login check');
expect(true).to.equal(true);
};
}
}

It doesn't look like you're actually invoking the test.
Calling test.getMochaTest() only returns the async test function in a Promise, it doesn't execute it. So your catch block is catching errors while obtaining the function, not while executing it.
Breaking it out across multiple lines will hopefully make things clearer.
Here's what your code sample does. Notice it never executes the returned test function:
it(test.name, async () => {
const testFn = await test.getMochaTest().catch(() =>
console.error(`error while ***obtaining*** test: \t${test.name}`));
// oops - testFn never gets called!
});
And here's a corrected version where the test actually gets called:
it(test.name, async () => {
const testFn = await test.getMochaTest().catch(() =>
console.error(`error while ***obtaining*** test: \t${test.name}`));
const outcome = await testFn().catch(() =>
console.error(`error while ***executing*** test: \t${test.name}`));
});
Note: I wrote it that way with await and catch() to better match the format of your code sample. However, it's worth pointing out that it mixes async/await and Promise syntax. More idiomatic would be to catch errors with a try/catch block when using async/await.

A Promise with child_process running a Python script in Node.js - Process exits before all data

I'm trying to get sublist3r to run in a node app. It runs, however, it only shows the banner and then exits out in about 5 seconds. The script is supposed to reach out to the web and takes about 30 seconds to run. If I don't use a promise, it will work just fine. Does it have something to do with pyrog.stdout.on('data') not waiting before it outputs? I've read around and tried 'end' with no luck.
Tried everything on this article if it didn't involve editing python script (I don't think I should need to ?!)
Also read the Node.js Child Process but it doesn't mention using promises with spawn, and I believe that's what I need to use for running a Python script.
sublist3r screen shot
Am I using Promises wrong?
Any help is much appreciated
Edit: Added await to runPy, verified same issue. Have also tried making it a variable, let test = await runPy ... with no success
var express = require('express');
var router = express.Router();
let runPy = new Promise((success, nosuccess) => {
const {spawn} = require('child_process')
const pyprog = spawn('python',['/path/to/sublist3r.py', '-d', 'domain.com'] )
pyprog.stdout.on('data', (data) => {
success(data)
})
pyprog.stderr.on('data', (data) => {
nosuccess(data)
})
pyprog.on('close', (code) => {
console.log(`child process ended with ${code}`);
})
})
/* GET home page. */
router.get('/', async (req, res) => {
// EDIT: Added await and verified same issue.
await runPy.then(fromRunPy => {
console.log(fromRunPy.toString());
})
// It works fine below, but I want a promise for no other reason that I can't get it to work...
// const { spawn } = require('child_process');
// const pyProg = spawn('python', ['/home/wayne/BugHunterJS/controllers/Sublist3r/sublist3r.py', '-d', req.body.domain]);
// console.log('inside post');
// pyProg.stdout.on('data', function(data) {
// let sublist3rData = data.toString();
// console.log(sublist3rData);
// });
});
module.exports = router

Not shortly after I asked, I found a solution if anyone is looking. I'm still not really sure how it works, got to figure that out later. I think pushing it to the array is the key.
const {
spawn
} = require('child_process')
const logOutput = (name) => (data) => console.log(`[${name}] ${data}`)
function run() {
return new Promise((resolve, reject) => {
const process = spawn('python', ['/path/sublist3r.py', '-d', 'domain.com']);
const out = []
process.stdout.on(
'data',
(data) => {
out.push(data.toString());
logOutput('stdout')(data);
}
);
const err = []
process.stderr.on(
'data',
(data) => {
err.push(data.toString());
logOutput('stderr')(data);
}
);
process.on('exit', (code, signal) => {
logOutput('exit')(`${code} (${signal})`)
if (code === 0) {
resolve(out);
} else {
reject(new Error(err.join('\n')))
}
});
});
}
(async () => {
try {
const output = await run()
logOutput('main')(output)
process.exit(0)
} catch (e) {
console.error('Error during script execution ', e.stack);
process.exit(1);
}
})();

JS - Interrupt async execution

The actual Problem
The problem I am trying to solve is that a user may not provide enough information on a message (discord). To then get all of the necessary data the user is prompted to react to a bot message.
There are 3 stages of that bot message, "InitialPrompt", "TimeoutWarning" and "TimeoutSuccess(TicketFailure)".
What I wanted the solution to be
With the way I've written my code there is no way to abort the timeout after it has been initialized. I thought that throwing an error would stop the execution of a function. I guess that doesn't happen because async calls get queued up instead of being ran line by line.
Is there a way to do this without adding a boolean and checking infront of each function call?
The solution that I could come up with
const interPtr = {interrupted : false};
interruptSignal(interPtr);
if(interPtr.interrupted) return;
console.log("...");
...
The actual code
JS Fiddle
(async () => {
const sleep = async time => new Promise(resolve => setTimeout(resolve, time));
const interruptSignal = () => new Promise(async (res, rej) => {
console.log("interruptStarted")
await sleep(2000);
console.log("interruptRan");
throw "Interrupted"
});
const timeOutHandler = async () => {
interruptSignal();
console.log("TimeoutStarted")
await sleep(5000);
console.log("TimeoutWarning")
await sleep(5000);
console.log("TimeoutSuccess->TicketFailure")
};
try {
await timeOutHandler();
} catch (e) {
console.log(e);
}
})()

Develop Reference

JavaScript is the programming language of the Web.

Node.js how to synchronously read lines from stream.Readable - javascript

Related

Running two browser instances in parallel for same list of websites in Puppeteer

Mocha.run() exits without awaiting async tests

Mocha Dynamic test generation in before block not getting executed

A Promise with child_process running a Python script in Node.js - Process exits before all data

JS - Interrupt async execution

Categories

Resources