I got an array of strings and I'd like to check if any of these strings contain a certain substring.
The following code is an example of what I need to achieve: the function gets a message from an user and searches through a series of URLs the one that contains the user's message in the title:
var redditSubModule = "node";
var http = require('https');
var url = "https://www.reddit.com/r/soccerstreams/new/.json";
var i = 0;
var subject = (ctx.message.text).substring(8);
var request = http.get(url, function(response) {
var json = '';
response.on('data', function(chunk) {
json += chunk;
});
response.on('end', function() {
var redditResponse = JSON.parse(json);
redditResponse.data.children.forEach(function(child) {
if(child.data.domain !== 'self.node') {
url = child.data.url;
console.log('URL : ' + url);
if (url.indexOf(subject) > -1) {
console.log('URL : ' + url);
} else {
console.log("nothing...");
}
i++;
}
});
})
});
request.on('error', function(err) {
console.log(err);
});
If you try and fill subject with "USA" (because there's a thread with that string in the title) the indexOf doesn't seem to work and it prints a list of "nothing..."
Logging the typeof url gives string as type so I don't know what is going on here...
What am I doing wrong ?
You can use array.prototype.find and array.prototype.includes:
var strs = ["aaa", "xxx", "yyy_leon"];
var search = "leon";
var found = strs.find(s => s.includes(search));
if (found) {
console.log("found: ", found);
} else {
console.log("nothing");
}
Related
Hi i would like to know if it is possible use "break" or any other method to skip the crawling function after running the script for the first time and just use the array variable that contains the crawling information for the next user request.
let glossary = [];
/** Initialise crawling and save it into glossary array */
function init() {
const glossary_url = 'https://xxxx';
const headers = {
cookie: 'cookies:kdkjslkd424'
};
const options = {
url: glossary_url,
method: 'GET',
headers: headers
};
request(options, function (error, response, body) {
const newDom = new jsdom(body);
const $ = require('jquery')(newDom.window);
$('ul > li > span[style="line-height:1.6;"]').each(function (index, element) {
let text = element.textContent.trim(); // Get text from html
let splitText = text.split(' = '); // split text by =
//console.log(text);
if (splitText.length > 1) {
glossary.push({
key: splitText[0].trim(),
value: splitText[1],
value2: splitText[2]
});
}
});
//console.log(glossary);
findMatch('DPDL');
});
}
break init;
function findMatch (key){
for(i = 0; i < glossary.length ; i++) {
if (glossary[i].key === key){
// console.log (glossary[i].value );
// console.log(glossary[i].key);
// console.log(glossary[i].value2);
// console.log(key);
console.log(key + ' = ' + glossary[i].value + ' ' + glossary[i].value2 );
}
}
}
init();
break or skip the crawl function if user wants to search another value it will just find in the glossary array glossary = [] and not crawl again as it takes long time
The only way i found to do it through MLCP, where you transformation has to return an array of format [{"uri": "some_uri1", "value": "newly_created_doc1"},{"uri": "some_uri2", "value": "newly_created_doc2"}]. This way after applying transformation with content pump there will be created these two documents from let's say some original document. My question is how to achieve the same result via node.js api or java api. For example with node.js I am able to apply transformation and it is creating this file as a single document with array of the style above.
My transformation:
const common = require("/ext/home/common.sjs");
function createDocs(content, context) {
var bets = content.value.xpath('//selections').toArray();
var documents = [];
for (var bet in bets) {
var bookie_name = "Boylesports";
var sport_name = "Football";
var event_name = bets[bet].xpath('../../../../name').toString();
if (/^Over \d+\.\d+$/.test(event_name)) {
event_name = event_name.replace(/^Over (\d+)\.\d+$/, 1);
} else {
event_name;
}
var subevent_name = bets[bet].xpath('../../../../name').toString();
if (/^Over \d+\.\d+$/.test(subevent_name)) {
subevent_name = subevent_name.replace(/^Over (\d+)\.\d+$/, 1);
}
else {
subevent_name;
}
var subevent_id = bets[bet].xpath('../../../../idfoevent');
var start_date = xdmp.parseDateTime("[Y0001]-[M01]-[D01]T[h01]:[m01]:[s01]", bets[bet].xpath('../../../../tsstart'));
// var start_date = "2017-10-21T13:00:00Z";
var market_name = bets[bet].xpath('../../name').toString();
if (/^Handicap.+$/.test(market_name)) {
market_name = market_name.replace(/^Handicap.+$/, "Handicaps");
}
if (/^Match Betting$/.test(market_name)) {
market_name = market_name.replace(/^Match Betting$/, "Win Market");
}
else {
market_name;
}
var market_id = bets[bet].xpath('../../idfomarket');
var bet_name = bets[bet].xpath('name').toString();
if (/^Aston Villa$/.test(bet_name)) {
bet_name = bet_name.replace(/^Aston (Villa)$/, 1);
}
else {
bet_name;
}
var bet_id = bets[bet].xpath('idfoselection');
//Push to the list of documents
var document = {};
document.uri = common.convertToSlug(sport_name) + '/' + common.convertToSlug(event_name) + '/' + common.convertToSlug(subevent_name) + '/' + common.convertToSlug(market_name) + '/' + common.convertToSlug(bet_name);
document.value = {
'bookie_name': bookie_name,
'sport_name': sport_name,
'sport_slug': common.convertToSlug(sport_name),
'event_name': event_name,
'event_slug': common.convertToSlug(event_name),
'subevent_name': subevent_name,
'subevent_slug': common.convertToSlug(subevent_name),
'subevent_id': subevent_id,
'start_date': start_date,
'market_name': market_name,
'market_slug': common.convertToSlug(market_name),
'market_id': market_id,
'bet_name': bet_name,
'bet_slug': common.convertToSlug(bet_name),
'bet_id': bet_id
};
//Checks if objects with the same uri exist before pushing them
if (documents.findIndex(x => x.uri == document.uri) === -1) documents.push(document);
// documents.push(document);
}
return Sequence.from(documents);
};
exports.transform = createDocs;
My use in node.js:
const axios = require("axios");
const db_client = require("../../database/connect").dbWriter;
axios.get("http://cache.boylesports.com/feeds/EUROFOOT2.json")
.then(function (response) {
console.log(response.data);
var documents = [{uri: "http://cache.boylesports.com/feeds/EUROFOOT2.json",
content: response.data,
contentType: 'application/json',
collections: ["test"]}];
db_client.documents.write({documents: documents, transform: ['js-example']}).result(
function (response) {
response.documents.forEach(function (document) {
console.log('LOADED: ' + document.uri);
});
},
function (error) {
console.log(error);
}
);
})
.catch(function (error) {
console.log(error);
});
The Client API relies on REST transforms, that do not support the same kind of splitting into multiple documents. I was hoping to say that you could use DMSDK, but it looks like that relies on the same kind of transforms. Here an example that transforms documents after uploading into the database:
http://docs.marklogic.com/guide/java/data-movement#id_22702
Looks like you would have to go against the recommendation, and create a transform that would cause side-effects by doing xdmp:document-insert calls directly.
HTH!
I'm trying to get the first 5 pages of search results with google custom search API ...
So far I've tried to achieve the result using nested function but with no luck.
I know that I'm messing with callback but, so far I've not figure out the correct way (without using promises library) to solve my problem.
Could some of you point me out in the right direction?
Thanks.
app.get('/assesment', function(req, res){
console.log('route: /assesment');
var api_key = '';
var customsearch = google.customsearch('v1');
var response = "";
var number_of_pages = 5;
var next_page = 1;
var exit = 0
const CX = 'XXXXX';
const API_KEY = 'XXXXX';
const SEARCH = 'Test Query';
console.log('start');
// console.log('QUERY PAGE: '+pages);
doSearch(CX, SEARCH, API_KEY, next_page, function(resp){
res.send(resp);
});
//
// Functions
//
function doSearch(_cx, _search, _api_key, _start, callback ){
var response = '';
customsearch.cse.list({ cx: _cx, q: _search, auth: _api_key, start: _start }, function (err, resp) {
if (err) {
response = JSON.stringify(err);
} else {
// Got the response from custom search
console.log('Result: ' + resp.searchInformation.formattedTotalResults);
if (resp.items && resp.items.length > 0) {
console.log('First result of '+resp.items.length+' is ' + resp.items[0].title);
for (var i = 0; i < resp.items.length; i++) {
response += resp.items[i].title+"<br>";
response += resp.items[i].link +"<br><hr>";
}
}
res = {
response: response,
next_page: resp.queries.nextPage
}
// res =
}
_start += 1;
if (_start < 6 ) {
doSearch(_cx, _search, _api_key, _start, _start*10+1,
function(resp){
response += resp;
});
}
if (callback && typeof callback === "function") callback(response);
});
};
});
You can use a third-party service like SerpApi to scrape Google and get back structured JSON.
Example using the Node.js library to get 4 page of results:
var gsr = require('GoogleSearchResults')
let serp = new gsr.GoogleSearchResults("demo")
serp.json({
q: "Coffee",
num: 10,
start: 30,
location: "Portland"
}, (result) => {
console.log(result)
})
I have a nodejs program that requests a series of XML files, parses them and then puts the output in an array which is written to disk as a CSV file.
The program mostly works, however occasionally the files end up in the wrong order in the array.
I want the order of the results to be in the same as the order as the URLs. The URLs are stored in an array, so when I get the XML file I check what the index of the URL was in the source array and insert the results at the same index in the destination URL.
can anyone see the flaw that is allowing the results to end up in the wrong order?
addResult = function (url, value, timestamp) {
data[config.sources.indexOf(url)] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
fs.readFile("config.json", function (fileError, data) {
var eachSource, processResponse = function (responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
};
if (fileError) {
console.log(fileError);
} else {
config = JSON.parse(data); //read in config file
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
request(config.sources[eachSource], processResponse); //request each source
}
}
});
var writeOutputData, createOutputData, numResults = 0, data = [], eachDataPoint, multipliedFlow = 0;
writeOutputData = function (output, attempts) {
csv.writeToPath(config.outputFile, [ output ], {
headers : false
}).on("finish", function () {
console.log("successfully wrote data to: ", config.outputFile);
}).on("error", function (err) { //on write error
console.log(err);
if (attempts < 2) { //if there has been less than 3 attempts try writing again after 500ms
setTimeout(function () {
writeOutputData(output, attempts + 1);
}, 500);
}
});
};
createOutputData = function () {
var csvTimestamp, output = [];
if (config.hasOwnProperty("timestampFromSource")) {
csvTimestamp = data.filter(function (a) {
return a.url === config.sources[config.timestampFromSource];
})[0].timestamp.format("HHmm");
console.log("timestamp from source [" + config.timestampFromSource + "]:", csvTimestamp);
} else {
csvTimestamp = data.sort(function (a, b) { //sort results from oldest to newest
return a.timestamp.unix() - b.timestamp.unix();
});
csvTimestamp = csvTimestamp[0].timestamp.format("HHmm");//use the oldest date for the timestamp
console.log("timestamp from oldest source:", csvTimestamp);
}
//build array to represent data to be written
output.push(config.plDestVar); //pl var head address first
output.push(config.sources.length + 1); //number if vars to import
output.push(csvTimestamp); //the date of the data
for (eachDataPoint = 0; eachDataPoint < data.length; eachDataPoint++) { //add each data point
if (config.flowMultiplier) {
multipliedFlow = Math.round(data[eachDataPoint].value * config.flowMultiplier); //round to 1dp and remove decimal by *10
} else {
multipliedFlow = Math.round(data[eachDataPoint].value * 10); //round to 1dp and remove decimal by *10
}
if (multipliedFlow > 32766) {
multipliedFlow = 32766;
} else if (multipliedFlow < 0) {
multipliedFlow = 0;
}
output.push(multipliedFlow);
}
console.log(output);
writeOutputData(output, 0); //write the results, 0 is signalling first attempt
};
I think that the url to index code needs debugging.
Here is an example that uses an object that is pre-populated with keys in the for loop.
`
var http = require('http');
var fs = require("fs");
var allRequestsComplete = function(results){
console.log("All Requests Complete");
console.log(results);
};
fs.readFile("urls.json", function (fileError, data) {
var responseCount = 0;
if (fileError) {
console.log(fileError);
} else {
var allResponses = {};
config = JSON.parse(data); //read in config file
var requestComplete = function(url, fileData){
responseCount++;
allResponses[url] = fileData;
if(responseCount===config.sources.length){
allRequestsComplete(allResponses);
}
};
for (var eachSource = 0; eachSource < config.sources.length; eachSource++) {
(function(url){
allResponses[url] = "Waiting";
http.get({host: url,path: "/"}, function(response) {
response.on('error', function (chunk) {
requestComplete(url, "ERROR");
});
var str = ''
response.on('data', function (chunk) {
str += chunk;
});
response.on('end', function () {
requestComplete(url, str);
});
});
}(config.sources[eachSource].replace(/ /g, "%20").replace("http://", "")));
}
}
});
`
I agree with #Kevin B, you cannot assume that async callbacks will return in the same order of which you send them. However, you could ensure the order, by adding an index function on processResponse.
say you add the following to addResult
addResult = function (index, url, value, timestamp) {
data[index] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
and use an extra function to call your request
function doRequest(index, url) {
request(url, function(responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(index, response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
});
}
then you can also change your loop to:
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
doRequest(eachSource, config.sources[eachSource]); //request each source
}
I would like to replace the if(body.toString().indexOf("404") !== 0) block with some generic error handling code but I can't seem to see where it throws an error when the target host is down. So far, this is the only hacky method I've managed to put together that works.
app.get('/', function(req, res){
var sites = ["foo.com", "bar.com"];
var returnObj = [];
var index = 0;
getSites(index);
// Recursively add data from each site listed in "sites" array
function getSites(index) {
if(index < sites.length) {
var url = sites[index];
var _req = http.get({host: url}, function(_res) {
var bodyChunks = [];
_res.on('data', function(chunk) {
bodyChunks.push(chunk);
}).on('end', function() {
var body = Buffer.concat(bodyChunks);
if(body.toString().indexOf("404") !== 0) {
returnObj.push(JSON.parse(body));
}
getSites(++index);
});
});
_req.on('error', function(e) {
console.log('ERROR: ' + e.message);
});
} else {
res.json(returnObj);
res.end();
}
}
});
You can check the status code of the response.
if(_req.statusCode === 200) {
//Response okay.
}
Here's a list of the status codes.