I'm trying to post # sing to make some words turn to hashtags but the code doesn't work and it's via API using this code:
true == function()
{
var getMyItem = function (str)
{
if (sessionStorage.getItem(str) == null)
{
setMyItem(str, 0);
return 0;
}
return sessionStorage.getItem(str);
}
var setMyItem = function (key, value)
{
sessionStorage.setItem(key, value);
}
function bot(post)
{
const Http = new XMLHttpRequest();
var Token="xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; // Token
var ID= "xxxxxx";
var url= 'https://bot.xxx/api/'+Token+'/sendMessage?text='+post+'&chat_id='+ID;
Http.open("GET", url);
Http.send();
}
// you need to change the url variable contents for your own Telegram API or those of the same SN APIs..
So when it comes to call the post like with this code:
var myDate = new Date().toTimeString().replace(/.*(\d{2}:\d{2}:\d{2}).*/, "$1");
var post_n = "%23Hi. It's: " + myDate + "\n";
var post = encodeURI(post_n);
if (getMyItem(key + "htr") != "yes")
{
bot(post);
setMyItem((key + "htr"),"yes");
}
return true;
}
}()
, it doesnt support # sign in my browsers including Chrome an Firefox and nothing is sent actually.
So if it were for Telegram, the bot would look like this (and I donno if it works or not but I'm talking about another SN):
var url= 'https://api.telegram.org/'+Token+'...
Please tell me how to send # code. By the way, "%23" doesn't work.
I used the command to export the hard drive ID to drive C:
var command="wmic diskdrive get SerialNumber > C:/idhdd.txt";
app.system("cmd.exe /c\""+command+"" );
I get the text file
SerialNumber
2012062914345300
Is there a JavaScript statement to remove SerialNumber, I just want to get the ID in the text file and save it to the hard drive C.
Here's ready-to-use getDriveIDs() function that should work in any Adobe app and will return array of HDD ID strings for you. I hope this can be easily generalized for other scenarios with Windows scripting inside Adobe scripting ;-)
//----------------------------------------------------------------------//
// Detects IDs (serial numbers) of connected drives and returns them as array of strings.
var getDriveIDs = function() {
var idFile = File(Folder.temp + '/saved_hdd_serials.txt');
var scriptFile = File(Folder.temp + '/dump_hdd_serials.bat');
var scriptContent = 'wmic diskdrive get SerialNumber > ' + idFile.fsName + '\n';
var ids = []
withTempFile(scriptFile, scriptContent, function() {
scriptFile.execute();
$.writeln(idFile.length == 0); // wait for asynchronous script execution to finish
$.sleep(1);
withTempFile(idFile, undefined, function(file, lines) {
ids = lines.slice(1);
});
});
return ids;
};
//----------------------------------------------------------------------//
// utilities
var withTempFile = function(file, content, callback) {
if (undefined == content) { // read temp file
file.open('r');
content = [];
while (!file.eof)
content.push(file.readln());
} else { // write temp file
file.open('w');
file.write(content);
content = undefined;
}
file.close();
callback(file, content);
file.remove();
}
//----------------------------------------------------------------------//
// main: demo
var ids = getDriveIDs();
alert('Drive IDs:\n\t' + ids.join('\n\t'));
I got data from MySQL to check if my login is correct, so I need to save some data as cookies for 30 minutes and some redirect to my homepage. I tried to do so in a final code but, didn't work.
And to sane another curiosity: may I call a node.js code file (file that links to database e execute queries) in html file as script then use the ResultSet to fill the html form with JQuery?
// This one is what is in the top of modelAutonomo.js
var dadosJSON = JSON.stringify("{}");
const sql = require('../config/sql');
const requisicao = require('request');
var cookieNome = '';
var cookieEmail = '';
var cookieCPF = '';
var autonomo = {
...
...
// This is the function to login
fazerLogin: async function (entradaJSON) {
var aux;
console.log("Encontrado " + entradaJSON);
dadosJSON = converterParaObjetoJSON(entradaJSON);
aux = await sql.consultarAutonomoCPFeEmail(dadosJSON);
console.log("Aux: " + aux);
aux = JSON.parse(aux);
console.log(dadosJSON.senha + " e " + aux.senha);
if(dadosJSON.senha != aux.senha) {
console.log("Senha incorreta!");
} else if (dadosJSON.cpf != aux.cpf) {
console.log("CPF incorreto!");
} else {
preencher(aux);
console.log("Logado: " + autonomo.nome);
cookieEmail = autonomo.email;
cookieNome = autonomo.nome;
cookieCPF = autonomo.cpf;
}
// I tried this to make the redirect, but don't even write the page html code in the console
requisicao(__dirname + '/../html/index.html', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body) // Print the my web homepage.
}
});
}
}
The structure of app is:
index.js
|--model
|--modelAutonomo.js
|--config
|--routes.js (who redirects to another pages/files of code)
|--html
|--index.html (can be redirected by sending a request to "/")
|--controller
And this is the git repository of him:
https://github.com/diegossilva-1995-01-25/ReformaAqui
I see in your repo that you are using express.js. The express documentation is fairly straightforward:
set a cookie
redirect
res.cookie(cookieName, cookieValue);
res.redirect(redirectPath);
That res variable is available in the request handler.
I am currently building a web scraper in NodeJS and I am facing a certain problem. After running my code, I receive this error:
undefined is not a valid uri or options object.
I am not sure how to bypass this error, I've looked at these examples: Example One, Example Two
Here is all my code:
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var START_URL = "http://example.com";
var pagesVisited = {};
var numPagesVisited = 0;
var pagesToVisit = [];
var url = new URL(START_URL);
var baseUrl = url.protocol + "//" + url.hostname;
pagesToVisit.push(START_URL);
setInterval(crawl,5000);
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
function visitPage(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
// Make the request
console.log("Visiting page " + url);
request(url, function(error, response, body) {
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
console.log(response.statusCode);
callback();
return;
}else{
console.log(error);
}
// Parse the document body
var $ = cheerio.load(body);
collectInternalLinks($);
// In this short program, our callback is just calling crawl()
callback();
});
}
function collectInternalLinks($) {
var relativeLinks = $("a[href^='/']");
console.log("Found " + relativeLinks.length + " relative links on page");
relativeLinks.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
}
Once your pagesToVisit empties, the url will be undefined since calling pop on an empty array returns this value.
I would add a check in visitPage that url is not undefined, e.g.
function visitPage(url, callback) {
if (!url) {
// We're done
return;
}
Or in crawl, check that pagesToVisit has elements, e.g.
function crawl() {
var nextPage = pagesToVisit.pop();
if (!nextPage) {
// We're done!
console.log('Crawl complete!');
} else if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
Taking hints from Terry Lennox's answer, I modified the crawl() function slightly:
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl, 5000);
} else if(nextPage) {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
All I am doing is check whether the popped element exists or not before calling visitPage().
I get the following output:
Visiting page http://example.com
Status code: 200
response.statusCode: 200
null
Found 0 relative links on page
^C
I'm just finishing off this basic webscraper project for a tshirt website.
It enters through one hardcoded url, the home page. It will search for any product pages, and add them to an url. If it finds another link (remainder), it will scrape that again and find any more product pages. It adds the product pages to urlSet and will then scrape those again, grab the tshirt data (price, img, title) and then convert, then write them to a CSV file.
For some reason, this is not working on the second run through of the scrape with 'remainder'.
If I remove the second scrape of url, everything works out fine and the file gets written correctly. But if I want to get the other product pages, it seems to be failing somewhere.
Here is my code, i apologise for posting so much of it but I don't know how it will be understood properly without the right context, hopefully it's been commented okay:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error)return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = imgURL;
tshirtObject.URL = url;
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
convertJson2Csv();
}
//convert tshirt objects and save as CSV file
function convertJson2Csv(){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) throw err;
});
}
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape) //scrape again but with remainder url
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
I'm console logging the arrayOfLinks in nextStep so I can see that they are being grabbed properly, I just cannot work out why they aren't being passed through to 'lastStep' properly.
Currently scraping http://shirts4mike.com/
[ 'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirt.php?id=108',
'http://shirts4mike.com/shirt.php?id=107',
'http://shirts4mike.com/shirt.php?id=106',
'http://shirts4mike.com/shirt.php?id=105' ]
The remainder is http://shirts4mike.com/shirts.php
http://shirts4mike.com/shirt.php?id=108
http://shirts4mike.com/shirt.php?id=107
http://shirts4mike.com/shirt.php?id=106
http://shirts4mike.com/shirt.php?id=105
Currently scraping http://shirts4mike.com/shirts.php
[ 'http://shirts4mike.com/shirts.phpshirts.php',
'http://shirts4mike.com/shirts.phpshirt.php?id=101',
'http://shirts4mike.com/shirts.phpshirt.php?id=102',
'http://shirts4mike.com/shirts.phpshirt.php?id=103',
'http://shirts4mike.com/shirts.phpshirt.php?id=104',
'http://shirts4mike.com/shirts.phpshirt.php?id=105',
'http://shirts4mike.com/shirts.phpshirt.php?id=106',
'http://shirts4mike.com/shirts.phpshirt.php?id=107',
'http://shirts4mike.com/shirts.phpshirt.php?id=108' ]
BUT if I choose to only call the first scrape and don't call the second, like this:
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
... Then everything works. I just don't get to all the urls.
What is happening here and how can I fix it? Thank you guys
The issue is tshirtArray is not defined in convertJson2Csv(). At lastlastScraperPt2 pass tshirtArray to convertJsonCsv()
convertJson2Csv(tshirtArray)
at convertJson2Csv
function convertJson2Csv(tshirtArray) {
// do stuff
}
One problem seems to be in your lastStep. It looks like you mean for remainder to be another array of urls. Correct me if I'm wrong there. However, what's happing is that the first time the if($('[type=submit]').length !== 0) condition fails, you'll automatically go down to the next block, because remainder start undefined. Whatever the current url is, you assign that one to remainder. For the rest of the iterations of your for-loop, you will never again hit the condition where remainder == undefined. So if you will only ever end up with one url assigned to remainder, while any more that you were hoping to get will simply be passed over.
You might want to define remainder as remainder = [];. And then instead of saying else if (remainder == undefined), you would just say
} else {
remainder.push(obj.arrayOfUrls[i]);
}
However, then you're passing an array of urls to scrape when scrape is only expecting a single url. If this is what you want and I am right in assuming that you mean for remainder to be an array of urls, you could defined a new function as follows:
function scrapeRemainders(remainders) {
var promises = [];
remainder.forEach(function (url) {
promises.push(requestPromise(url));
});
return Promise.all(promises).then(function (results) {
_.flattenDeep(results);
})
}
Then instead of the second scrape in your promise chain, you would replace it with scrapeRemainders. Also, for you the _ in the previous function, you would need to npm install lodash and then var _ = require('lodash'). On a side note, lodash has nothing to do with promises, but it is a great tool for data manipulation. You should look into it when you have the chance.
Also, in lastScraperPt1, you can change
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
to
return Promise.all(promiseArray);
It does the same thing.
Hope this helps. If this does not answer your question, comment at me and I can change my answer accordingly.
All fixed, it was grabbing the wrong urls in scrape(). Though I only knew this after I logged the statusCodes to the console :
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var urlHome = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = [];
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) {
errorHandler(error);
return reject(error);
}
if(!error && response.statusCode == 200){
return resolve(html);
}
if(response.statusCode !== 200){
console.log("response code is " + response.statusCode);
}
return resolve("");
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
var URL = 'http://shirts4mike.com/';
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(URL + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.push(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
var remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = urlHome + imgURL;
tshirtObject.URL = urlSet[i];
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
return tshirtArray;
}
//conver tshirt objects and save as CSV file
function convertJson2Csv(tshirtArray){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) errorHandler(err);
});
}
scrape(urlHome) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape)
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.then(convertJson2Csv)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
//If the site is down, an error message describing the issue should appear in the console.
//This is to be tested by disabling wifi on your device.
//When an error occurs log it to a file scraper-error.log . It should append to the bottom of the file with a time stamp and error
var errorHandler = function (error) {
console.log(error.message);
console.log('The scraper could not not scrape data from ' + url + ' there is either a problem with your internet connection or the site may be down');
/**
* create new date for log file
*/
var loggerDate = new Date();
/**
* create message as a variable
*/
var errLog = '[' + loggerDate + '] ' + error.message + '\n';
/**
*when the error occurs, log that to the error logger file
*/
fs.appendFile('scraper-error.log', errLog, function (err) {
if (err) throw err;
console.log('There was an error. The error was logged to scraper-error.log');
});
};