not able to kill process in node.js - javascript

I am working on a node script where I will be opening and closing the browser using child process but its opening the browser but not able to close it on pressing Ctrl + C.
Here's my code:
const { kill } = require('process');
var start = (process.platform == 'darwin'? 'open': process.platform == 'win32'? 'start': 'xdg-open');
//open browser
var url = 'https://www.google.com';
var cmd = start + ' ' + url;
var child = require('child_process').exec(cmd, function(error, stdout, stderr) {
if (error) {
console.log('exec error: ' + error);
}
});
process.on('SIGINT', function() {
kill(child.pid, 'SIGINT');
fs.writeFile(./scraper.js)
process.exit();
});
scarper.js
const cheerio = require('cheerio');
const request = require('request-promise');
const fs = require('fs');
const link="https://www.imdb.com/chart/top/?ref_=nv_mv_250";
(async()=>{
const html = await request(link);
const $ = cheerio.load(html);
const table = $('.lister-list');
const rows = table.find('tr');
const data = [];
for(let i=0;i<rows.length;i++){
const row = rows.eq(i);
const cols = row.find('td');
const rank = i+1;
const title = cols.eq(1).find('a').text().trim();
const year = cols.eq(1).find('span').text().trim();
const rating = cols.eq(2).text().trim();
const votes = cols.eq(4).text();
const image = cols.eq(1).find('img').attr('src');
const movie = {
rank: rank,
title: title,
year: year,
rating: rating,
votes: votes,
image: image
};
data.push(movie);
}
console.log(data);
fs.writeFile('./data.json',JSON.stringify(data));
})();
I am working on a node.js app where I am using socket.io to send data to multiple clients but the socket is only able to send data to one client i.e if I open my webpage in two tabs its not working in both the tabs but when I open just 1 tab of webpage it is able to transmit the data.I dont know why? Can someone help,Here's my code:

try to use
setTimeout((function() {
return process.abort();
}), 5000);
or try
setTimeout((function() {
return process.kill(process.pid);
}), 5000);

Related

Calling a function from a smart contract

I'm making a node app that writes contracts with the Arbitrum api, I have successfully accessed the api, I'm new to node and basically I need to access the Arbitrum website with code and write some contracts, if there's anything unclear ask me to edit the post.
my current code:
var Web3 = require('web3');
var web3 = new Web3(new Web3.providers.HttpProvider());
var version = web3.version.api;
var getJSON = require('get-json');
getJSON(
'https://api.arbiscan.io/api?module=contract&action=getabi&address=0x0000000000000000000000000000000000001004&apikey=YourApiKeyToken',
function (error, data) {
var contractABI = '';
contractABI = JSON.parse(data);
if (contractABI != '') {
var MyContract = web3.eth.contract(contractABI);
var myContractInstance = MyContract.at(
'0x604cd9df9e73a792feef9877aa53fc25d1b9baa5'
);
var result = myContractInstance.memberId(
'0xfe8ad7dd2f564a877cc23feea6c0a9cc2e783715'
);
console.log('result1 : ' + result);
var result = myContractInstance.members(1);
console.log('result2 : ' + result);
} else {
console.log('Error');
}
}
);

Nodejs Scraper isn't moving to next page(s)

Hey guys this is a follow on from my other question, i have created a Nodejs Scraper that doesnt seem to want to go through the pages, it stays on the first. my source code is below
const rp = require('request-promise');
const request = require('request');
const otcsv = require('objects-to-csv');
const cheerio = require('cheerio');
//URL To scrape
const baseURL = 'xxx';
const searchURL = 'xxxx';
//scrape info
const getCompanies = async () => {
// Pagination test
for (let index = 1; index <= 20; index = index + 1) {
const html = await rp.get(baseURL + searchURL + index);
const $ = await cheerio.load(html);
console.log("Loading Pages....");
console.log("At page number " + index);
// end pagination test
//const htmls = await rp(baseURL + searchURL);
const businessMap = cheerio('a.business-name', html).map(async (i, e) => {
const link = baseURL + e.attribs.href;
const innerHtml = await rp(link);
const emailAddress = cheerio('a.email-business', innerHtml).prop('href');
const name = e.children[0].data || cheerio('h1', innerHtml).text();
const phone = cheerio('p.phone', innerHtml).text();
return {
// link,
name,
emailAddress: emailAddress ? emailAddress.replace('mailto:', '') : '',
phone,
}
}).get();
return Promise.all(businessMap);
}
};
console.log("Finished Scraping.... Now Saving!")
//save to CSV
getCompanies()
.then(result => {
const transformed = new otcsv(result);
return transformed.toDisk('./output.csv');
})
.then(() => console.log('Scrape Complete :D '));
As you can see I have tried a few different ways to make this happen so any help will be gratefully appreciated.

Issues with request, and cheerio when web-scraping

I'm trying to write a code that makes a request to a website, for webscraping
So this are the steps:
Here First part of Code STARTS
The program makes the request to the mainURL
The program selects some objects from the html of the mainURL, and store them in an array of objects(advert), on of the properties of the object, is it's link, which we'll call numberURL, that the code automatically selects using a css selector, the amount of objects is something like 80-90;
The program makes requests to every numberURL(80-90 requests),
and for each of them it does set another properties to the same object, and selects another link, that we'll call accountURL
The program creates an CSV file where it writes every object in different rows
Here First part of Code ENDS
So actually the first part works pretty good, it doesn't have any issues, but the second part does
Here Second part of Code STARTS
The program makes requests to every accountURL from the previous object
The program selects some objects from the html of the accountURL, and stores them in an another array of another objects(account), also using CSS selectors
The program should console.log() all the account objects
Here Second part of Code ENDS
But the second part does have some bugs, because when console.logging the objects we see that the objects properties doesn't changed their default value.
So in debugging purposes I took one advert object and putted it's value manually from the code
post[0].link = 'https://999.md/ru/profile/denisserj'
Finally when running the code for this object it actually works correctly, so it shows the changed properties, but for the rest of them it doesn't.
I tried to set some Timeouts, thinking that the code tries to read the link, before the second request finished, but no effects
I also tried to console.log the link, to see if it exists in the array, so it actually exists there, but also no effect.
Finally here is the code:
// CLASSES
class advert {
constructor() {
this.id = 0;
this.tile = new String();
this.link = new String();
this.phone = new String();
this.account = new String();
this.accountLink = new String();
this.text = new String();
this.operator = new String();
}
show() {
console.log(this.id, this.title, this.link, this.phone, this.account, this.accountLink, this.text, this.operator);
}
}
class account {
constructor() {
this.name = 0;
this.createdAt = 0;
this.phone = [];
this.ads = [];
this.adsNumber = 0;
}
show() {
console.log(this.name, this.createdAt, this.phone, this.ads, this.adsNumber);
}
}
// HEADERS
const mainRequest = require('request');
const auxRequest = require('request');
const cheerio1 = require('cheerio');
const cheerio2 = require('cheerio');
const fs = require('fs');
const fs2 = require('fs');
const adFile = fs.createWriteStream('anunturi.csv');
const accFile = fs2.createWriteStream('conturi.csv');
// SETTINGS
const host = 'https://999.md'
const category = 'https://999.md/ru/list/transport/cars'
const timeLimit = 60; //seconds
// VARIABLES
let post = [];
let postNumber = 0;
let acc = [];
// FUNCTIONS
function deleteFromArray(j) {
post.splice(j, 1);
}
function number(i) {
let category = post[i].link;
auxRequest(category, (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio1.load(html);
let phone;
const siteTitle = $('strong').each((id, el) => {
phone = $(el).text();
});
const txt = $('.adPage__content__description').html();
const person = $('.adPage__header__stats').find('.adPage__header__stats__owner').text();
const linkToPerson = host + $('.adPage__header__stats').find('.adPage__header__stats__owner').find('a').attr('href');
post[i].phone = phone;
post[i].account = person;
post[i].accountLink = linkToPerson;
post[i].text = txt;
if (i == postNumber) {
console.log('1. Number Putting done')
writeToFileAd(accountPutter, writeToFileAccount);
}
}
});
}
function writeToFileAd() {
adFile.write('ID, Titlu, Link, Text, Cont, LinkCont, Operator\n')
for (let i = 0; i <= postNumber; i++) {
adFile.write(`${post[i].id}, ${post[i].title}, ${post[i].link}, ${post[i].phone}, ${post[i].account}, ${post[i].accountLink}, ${post[i].operator}\n`);
}
console.log('2. Write To File Ad done')
accountPutter();
}
function accountAnalyzis(i) {
let category = post[i].link;
const mainRequest = require('request');
category = category.replace('/ru/', '/ro/');
mainRequest(category, (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio2.load(html);
const name = $('.user-profile__sidebar-info__main-wrapper').find('.login-wrapper').text();
let createdAt = $('.date-registration').text();
createdAt = createdAt.replace('Pe site din ', '');
const phones = $('.user-profile__info__data').find('dd').each((id, el) => {
let phone = $(el).text();
acc[i].phone.push(phone);
});
const ads = $('.profile-ads-list-photo-item-title').find('a').each((id, el) => {
let ad = host + $(el).attr('href');
acc[i].ads.push(ad);
acc[i].adsNumber++;
});
acc[i].name = name;
acc[i].createdAt = createdAt;
console.log(name)
if (i == postNumber) {
console.log('3. Account Putting done')
writeToFileAccount();
}
}
});
}
function writeToFileAccount() {
for (let i = 0; i <= postNumber; i++) {
accFile.write(`${acc[i].name}, ${acc[i].createdAt}, ${acc[i].phone}, ${acc[i].ads}, ${acc[i].adsNumber}\n`);
}
console.log('4. Write to file Account done');
}
function numberPutter() {
for (let i = 0; i <= postNumber; i++) {
number(i);
}
}
function accountPutter() {
for (let i = 0; i <= postNumber; i++) {
accountAnalyzis(i);
}
}
// MAIN
mainRequest(category, (error, response, html) => {
let links = [];
for (let i = 0; i < 1000; i++) {
post[i] = new advert();
}
for (let i = 0; i < 1000; i++) {
acc[i] = new account();
}
if (!error && response.statusCode == 200) {
const $ = cheerio2.load(html);
const siteTitle = $('.ads-list-photo-item-title').each((id, el) => {
const ref = host + $(el).children().attr('href');
const title = $(el).text();
post[id].id = id + 1;
post[id].title = title;
post[id].link = ref;
links[id] = ref;
postNumber = id;
});
post[0].link = 'https://999.md/ru/profile/denisserj'
numberPutter()
}
});
You have an error in line
const siteTitle = $('.ads-list-photo-item-title').each((id, el) => {
What you actually want is .find('a').each...

Node.JS RemoteExec call not firing properly

Querying a database for a list of servers to perform a command on. The array is populated properly and echos out as planned, but none of the connections occur. I tried both passing the array directly into rexec and looping through a forEachAsync. Neither process the server list properly. Am I referencing the array elements improperly?
Mind the syntax errors at the end, I was just trying to include both methods I tried.
#!
var mysql = require('mysql');
var resultset = require('node-array');
var rexec = require('remote-exec');
var fs = require('fs');
var _ = require('lodash');
//var streamBuffers = require('stream-buffers');
var moment = require('moment');
var util = require('util');
var now = moment().format('YYYYMMDD_HHmmss');
var logStdout = process.stdout;
var errStderr = process.stderr;
console.log = function () {
logStdout.write(util.format.apply(null, arguments) + '\n');
}
console.error = function () {
errStderr.write(util.format.apply(null, arguments) + '\n');
}
var connection = mysql.createConnection({
host : 'abc',
user : 'user',
password : '******',
database : 'db'
});
var ssh_options = {
port: 22,
username: 'e109gh',
privateKey: fs.readFileSync('R:/nodeJS/sshkey.priv'),
stdout: fs.createWriteStream('./out.txt'),
stderr: fs.createWriteStream('./err.txt')
}
var my_conn_options = _.clone(ssh_options);
var cmds = ['hostname -i'];
connection.query('SELECT name FROM server', function(err, rows) {
rows.forEachAsync(function(element, index, array) {
console.log(element.name);
rexec(element.name,cmds,my_conn_options,function(err){
if (err) {
now = moment().format('YYYY-MM-DD HH:mm:ss');
console.error(err);
} else {
console.log("it worked for "+element.name);
}
});
});
});
// var buffer = new streamBuffers.WritableStreamBuffer();
connection.end(function(err) {});
// my_conn_options.stdout = buffer;
//
// rexec(rows,cmds,my_conn_options,function(err){
// if (err) {
// now = moment().format('YYYY-MM-DD HH:mm:ss');
// console.error(err);
// } else {
// console.log()
// }
// });
//
//});

Reading a file line by line, parse them and insert them in mongo in node js

I have a file which is tab separated. It has thousands of data. How can I use nodeJs to read the file, line by line, parse them and create an object and insert them in a mongo DB.
I am just learning node and mongo. I come from different background. So how can this be done.
Finally the Mongo DB has to be populated with proper data.
I searched in net but I could not find the complete solution.
Thanks.
I had an issue with the answer by Juvenik. My problem was that the database would not be populated by the time readline had completed. The lines were being read synchronously, but the DB insertion was asynchronous.
Instead, I found a simpler solution with the line-reader package. It reads the lines and waits for a callback before continuing.
var MongoClient = require('mongodb').MongoClient
var dbName = 'yourDbName'
var url = 'mongodb://localhost:27017/' + dbName
var collectionName = 'yourCollectionName'
var filename = 'yourFileName.txt'
var printLine = 1000
MongoClient.connect(url, function(err, db) {
if (err) {
console.error('Problem connecting to database')
} else {
console.log('Connected correctly to server.')
var lineReader = require('line-reader')
var collection = db.collection(collectionName)
var lineNum = -1
var headers = []
lineReader.eachLine(filename, function(line, last, cb) {
lineNum++
try {
var split = line.split('\t')
var object = {}
if (lineNum > 0) {
for (var i = 0; i < split.length; i += 1) {
object[headers[i]] = split[i]
}
collection.insert(object, function (insertErr, insertObj) {
if (insertErr) console.error(insertErr)
if (lineNum % printLine === 0) console.log('Line ' + lineNum)
if (last) {
console.log('Done with ' + filename + ' (' + lineNum + ' records)')
process.exit(0)
} else {
cb()
}
})
} else {
headers = line.split('\t')
cb()
}
} catch (lineError) {
console.error(lineError)
}
})
}
})
I came across similar problem. This approach worked for me.
Have a look, it might be helpful.
var mongoDb = require('mongodb');
var mongoClient = mongoDb.MongoClient;
var dbname = 'YOUR_DB_NAME';
var collectionName = 'YOUR_COLLECTION_NAME';
var url = 'mongodb://localhost:27017/'+dbname;
var filename = 'FIle_Name.txt';
console.log('***************Process started');
mongoClient.connect(url,function(err,db){
if(err){
console.log('error on connection '+err);
}
else{
console.log('***************Successfully connected to mongodb');
var collection = db.collection(collectionName);
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream(filename);
var outstream = new stream;
var rl = readline.createInterface(instream,outstream);
console.log('***************Parsing, please wait ...');
rl.on('line',function(line){
try{
var arr = line.split('\t');
var object = {};
//Parse them here
//Example
object['name'] = arr[0]; //Just an example
var res = collection.insert(object);
}
catch (err){
console.log(err);
}
});
rl.on('close',function(){
db.close();
console.log('***************completed');
});
}
});
I am a learner too. If someone can make it better, it will be good.
Here is a more performant (inserting batches of objects) and updated version (using async and latest mongo driver) of frank-0's answer
const lineReader = require('line-reader');
async function readFileAndInsertInMongo(file) {
let total = 0;
return new Promise((resolve, reject) => {
let buffer = [];
lineReader.eachLine(file, (line, last, cb) => {
// prepare your object based on the line content
let insertObject = {'some_content': 'some_value'};
if (total % 10000 === 0 || last) {
collection.insertMany(buffer, function(err, res){
if (last) {
if (err) {
reject(err);
} else {
resolve(res);
}
} else {
buffer = [];
return cb();
}
});
} else {
buffer.push(insertObject);
return cb();
}
});
});
}
This really is the best solution I have found to parse huge files and insert them in the database without exploding Node's memory. Hope this can help ;)

Categories

Resources