I have an array of URLs I scraped from a webpage and then make an API call to validate the URLs to see if they are malicious. The only problem is I am limited to a certain amount of API calls per day and the array contains duplicate URLs. I am trying to loop through the array and used a saved API call for duplicate values and I am struggling to find the best way to do it since there could be multiple duplicates. If the loop encounters a duplicate value I want it to not make the API call and just return the already saved values from the previous API call. I included some basic sudo code inside the code below and I am unsure of what to populate the sudo code with.
/* urlToValidate is a list of URLS */
urlToValidate = ["ups.com", "redfin.com", "ups.com", "redfin.com", "redfin.com", "redfin.com"];
var isValid = false;
/* API Overview https://www.ipqualityscore.com/documentation/malicious-url-scanner-api/overview */
for (let i = 0; i < urlToValidate.length; i++) {
if (i == 0 || Is Not A DUPLICATE) {
$.getJSON('https://ipqualityscore.com/api/json/url/<API_KEY>/' + urlToValidate[i], function( json ) {
if (!json.phishing && !json.spamming && json.risk_score < 80) {
isValid = true;
returnMessage(isValid, json.risk_score, i)
} else {
isValid = false;
returnMessage(isValid, json.risk_score, i)
}
});
} else {
returnMessage(alreadySaved duplicateValue, alreadySaved duplicate risk_score, i)
}
}
Desired Output:
URL Valid: true Risk Rating: 0 Position: 7
or
Duplicate URL: true Risk Rating: 0 Position: 7
This is a simple matter of caching.
Outside of your for loop, maintain some kind of mapping of URLs to their corresponding fetch results. That way, you can store not only whether that URL has been called but also the result, if it exists. An easy way to do that is with a basic object, where the "keys" are strings corresponding to the URLs, and the "values" are the results of your fetches.
const resultCache = {};
Inside of your loop, before you do a fetch you should first check whether the cache already has a result for that URL.
let result;
if (resultCache[urlToFetch]) {
result = resultCache[urlToFetch];
} else {
// use the previous result
result = await fetch(/* whatever */);
// remember to also store result in cache
resultCache[urlToFetch] = result;
}
You have a few options.
First you could convert your urls to a Set which prevents any duplicates from occurring at all.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set
Another option would be to store the return in an object with the key being the url and in your if statement check to see if the value is not null.
*** UPDATE using a set ***
/* urlToValidate is a list of URLS */
urlToValidate = ["ups.com", "redfin.com", "ups.com", "redfin.com", "redfin.com", "redfin.com"];
var urls = new Set(urlToValidate);
var isValid = false;
/* API Overview https://www.ipqualityscore.com/documentation/malicious-url-scanner-api/overview */
for (let i = 0; i < urls.length; i++) {
$.getJSON('https://ipqualityscore.com/api/json/url/<API_KEY>/' + urls[i], function( json ) {
if (!json.phishing && !json.spamming && json.risk_score < 80) {
isValid = true;
returnMessage(isValid, json.risk_score, i)
} else {
isValid = false;
returnMessage(isValid, json.risk_score, i)
}
});
}
}
Related
So it seems I don't quite understand promises, but I've been using them in low code software my company uses for internal tools as a way to perform the same query on different data for a certain number of times.
Anyway, I'm currently using Promises with a Mailgun query, and when I try to resolve Promise.all(promises), I assume I'm hitting them too quickly and too much. So what I would like to do, without having to refactor the entirety of my code, is take what I have and then resolve those Promises one at a time.
let query = Mailgun_MailList_Add_Members;
//let arr = testEmailData.value;
let reps = repInfo.value;
let tableData = table1.selectedRow.data;
let finalResult = [];
for(let i = 0; i < reps.length; i++){
let emailArr = [];
let allRepEmails = [];
/* function that takes an array and checks inside for subarrays, pushing all subvalues into one new array */
let getAllRepEmails = (arr) => {
if(arr instanceof Array){
for(let i = 0; i < arr.length; i++){
getAllRepEmails(arr[i]);
}
}
else allRepEmails.push(arr);
}
for(let j = 0; j < tableData.length; j++){
/* check if current records owningrep is equal to current index of repinfos lastName */
if(tableData[j].owningrep.toUpperCase() == reps[i].lastName.toUpperCase()){
/* takes all the emails from table data in the crrent index and pushes them into array */
emailArr.push(tableData[j].Emails.replace(/;/g, ",").replace(/:/g, ",").replace(/ +/g, "").replace(/,+/g, ",").split(','));
}
}
/* check inside emailArr for subarrays of emails, pushing emails into new array */
getAllRepEmails(emailArr);
/* filters array of all emails for current rep to not include empty strings */
let noEmptyEmails = _.filter(allRepEmails, el => el != "");
/* loops over final array of all actual emails, creating objects for each rep with arrays of emails up to 1000 each per API req and pushing them into final array */
while(noEmptyEmails.length){
finalResult.push({
owningrep: reps[i].lastName.toUpperCase(),
/* converts final email array into JSON format as per API req */
Emails: JSON.stringify(noEmptyEmails.splice(0,1000))
});
}
}
/* maps finalResults to create the promises that perform the query for each record */
let promises = finalResult.map((item) => {
/* get lastName from repinfo for address variable */
let name = _.filter(repInfo.value, obj => obj.lastName == item.owningrep)[0].lastName.toLowerCase();
/* uses name variable and repinfo fromAddress to make address variable representing alias for the mail list we are adding members to */
let address = _.filter(repInfo.value, obj => obj.lastName == item.owningrep)[0].fromAddress.replace(/^[^#]*/, name + "test");
query.trigger({
additionalScope: {
members: finalResult[finalResult.indexOf(item)].Emails,
alias: address
}
})
}
);
return Promise.all(promises);
I'm tried using the different methods on Promise to see what happens, I've tried splicing Promises and resolving one. I think the only thing I've learned is that I don't understand Promises.
Does anyone have any ideas?
2 things:
your finalResult.map((item) => { don't seems to return any promise as TJ explained. I think you meant to do return query.trigger either way that map runs instantly (and in parallel) so the function you have written dosen't really wait for anything so it could be that other chained calls to your function is invoked immediately b/c Promise.all dose not really wait for anything.
The let promises = seems to be an array of undefined values? so again Promise.all(promises) dose nothing for you.
if you want to run one at the time, then remove finalResult.map((item) => and instead use something like a classic for loop and use async/await:
for (const item of finalResult) {
await query.trigger(...)
}
your function is required to have the async keyword if you want to use await, async function foo() { ... }
I'm trying to understand how to perform some action on each element of an array, but by working in portions of that array, until each element has been touched.
As a more specific example, let's assume I have an array of 990 elements and want to perform some action on each element, but in portions of 200. What would be the most efficient way to do this?
function foo(array) {
results = []
if (array.length > 200) {
// Loop over and perform action on first 200 elements, then next 200, and so on...
// for each element, push result to results array
}
return results;
}
EDIT:
For my specific use case, each element in the array is a URL. I'm making a GET request with each URL using Axios. There is potential for my array to contain thousands of URLs, so I don't want to make a request and wait for a response one at a time; however, the server I'm making the requests to can only handle so many requests at one time (about 200).
There are lots of ways to do this. Some ways better than others. But I will assume you dont want to modify the original array and want to handle 200 elements on different moments:
function stepArray(arr){
//... create your custom index on the array object
//..., so it will know where to continue from
if(typeof arr.myIndex == 'undefined'){ arr.myIndex = 0; }
for(var k=0; k<200; k++){
if(k + arr.myIndex >= arr.length){return;}
process(arr[k + arr.myIndex]);
}
}
To make multiple chunks you can use reduce, like that:
var perChunk = 200 // chunk size
var inputArray = [] // your array
const result = inputArray.reduce((resultArray, item, index) => {
const chunkIndex = Math.floor(index/perChunk)
if(!resultArray[chunkIndex]) {
resultArray[chunkIndex] = [] // new chunk
}
resultArray[chunkIndex].push(item)
return resultArray
}, [])
console.log(result);
Then you can iterate again over the sible chunks a make your axios request.
for(i=0; i< result.length; i++) {
let delay = 3000 * i
setTimeout(() => {
console.log(// your action with the Array arr[i])
}, delay)
}
While it may not be the most efficient solution per my initial request, I found the following to be easy-to-understand:
while (array.length != 0) {
array.splice(0, 200).forEach(function (url) {
// Perform some action
});
}
I don't use JavaScript much and I know this shouldn't be this difficult. Basically, is what I am trying to do is loop through an array of domain names that I am getting from the users input. ex. [gmail.com, yahoo.com, xyz.com, etc..]. I am using a for loop and if statements to run through it just to check if the array has a certain type of email. So if I am searching for yahoo.com I need to know if there is a gmail.com in there as well.
Here is what I have created so far but I cannot get it to check the second or third or fourth email.
function EmailFunction() {
var emailNames = [fdsg#gmail.com, gjitrerh#yahoo.com, jirg#aol.com];
var emailDomains = [];
var arrEmail = emailNames.split(', ');
var len = arrEmail.length;
for (var i = 0; i < len; i++) {
var domain = arrEmail[i].split("#").pop();
emailDomains.push(domain)
var unique = emailDomains.filter(onlyUnique);
for (var j = 0; j < unique.length; j++) {
if (unique[j] == "yahoo.com") {
var answer = confirm("* Please verify your Email addresses are being sent to the correct personnel.\n\n *** Please press CANCEL to Verify.\n *** Press OK to continue.");
if (answer == true) {
return true;
} else {
return false;
}
} else {
var answer = confirm("* Since you have emails that are not yahoo.COM please retype your emails.\n\n *** Please press CANCEL to Verify.\n *** Press OK to continue.");
if (answer == true) {
return true;
} else {
$('.txbx1').show();
return false;
}
}
}
}
> **Edit:**I edited my question with some temp data.
> I am getting the full email from the user ex. xyz#yahoo.com.
I am then splitting that at the # sign so that I get an
array of emails like = yahoo.com, aol.com, gmail.com.
> I am then need to loop through that
array `unique` to check if the array has a
certain email domain.
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
Make a set object and define all the domains you wish in it. Then just use has() method in your if statement like that:
if (mySet.has(unique[j])) {
Syntax here
I have a web page which allows to select geographical areas.
I expect than not more than 5000 areas are selected.
Each area has a code (9 characters long).
I want to load a collection of areas from DB and let the user edit it.
If a user has made any changes, I want to show the 'unsaved changes' message somewhere.
In order to do it, I want to compare the original and the current version of the collection.
Is it possible to calculate a hash code of this collection and compare just hash codes?
Order of elements is not important and should be ignored.
The collection is currently implemented in this way
function AreaHashTable(areaIdentifiers) {
//properties
this.hash = {};
this.addKeys(areaIdentifiers); }
AreaHashTable.prototype.getKeys = function () {
return this.hash; };
AreaHashTable.prototype.hasKey = function (key) {
if (this.getKeys().hasOwnProperty(key)) {
return true;
}
return false; };
AreaHashTable.prototype.addKey = function (gssCode) {
this.hash[gssCode] = gssCode;
};
AreaHashTable.prototype.addKeys = function (areaIdentifiers) {
var i;
for (i = 0; i < areaIdentifiers.length; i++) {
this.addKey(areaIdentifiers[i]);
} };
... etc
}
You can iterate over your string collection, generating a 9 character hash, by x-oring the characters of each string with the respective hash string character. This way, you receive an order independent hash value for your string collection.
something like:
var hash = [0,0,0,0,0,0,0,0,0];
for (var strg in strings) {
for(var i=0; i<9; i++) {
hash[i] ^= strg.charAt(i);
}
}
However, maybe encapsulating the data and maintaining a "changed" flag, would be another solution for the underlying problem.
I'm developing a javascript widget using the UWA widget format. Unfortunately this makes it impossible to jsFiddle my code but I've commented it in detail so that, hopefully, you can follow its fairly straightforward sequence.
HighestClass = {};
HighestClass.array = [];
HighestClass.url = "http://our.url.local/frog/pointsByWeek.php?cmd=highestClass&students=";
HighestClass.init = function(groupPrefix) {
var count = 0;
/* Using the group prefix, i.e. "CLS 9", from the drop-down box,
get a list of all of the classes in that year group */
/* First time round, count the number of groups that match this
syntax because there are no parameters available to filter
this API */
Frog.API.get('groups.getAll',{
'onSuccess': function(data){
for (var i = 0; i < data.length; i++) {
if (data[i].name.indexOf(groupPrefix) != -1)
count++;
}
});
/* Now that these classes have been counted, run through the API
call again to push each class ID through to another function */
var run_through = 0;
Frog.API.get('groups.getAll',{
'onSuccess': function(data){
for (var i = 0; i < data.length; i++) {
if (data[i].name.indexOf(groupPrefix) != -1) {
var end = false;
run_through++;
/* When it gets to the last class group, i.e. the run_through
variable becomes equal to count, let the getClassPoints
function know */
if( run_through == count )
end = true;
HighestClass.getClassPoints( data[i].name, data[i].id, end );
}
}
}
});
}
HighestClass.getClassPoints = function(name, id, end) {
var list = '';
/* Using the ID of the class group, create a comma-separated list
of students for use in our MySQL query */
Frog.API.get("users.search", {
"params": {
"group": id
},
"onSuccess": function (data){
for (var i = 0; i < data.length; i++)
list += data[i].id + ",";
}
});
/* If the list exists... */
if( typeof list === "string" && list.length > 0 ) {
list = list.slice(0,-1);
/* Run an AJAX call to our PHP script which simply returns an integer
value of the SUM of reward points earned by that list of students */
UWA.Data.getJson(HighestClass.url + list, function(res){
if (res === false || res === "") res = 0;
/* Push this data into an array of objects alongside the class name */
var obj = { "name": name, "points": res };
HighestClass.array.push(obj);
});
}
/* As this function is being called asynchronously multiple times we need to
determine when the last call is run so that we can deal with our array
of data. We do this thanks to the count/run_through variables in earlier
function which will trigger end=true in this function */
if( end === true )
HighestClass.display();
}
HighestClass.display = function() {
/* Once we've put our array of objects together, we need to sort it so that
the class with the highest number of points are at array entry 0 */
function compare(a,b) {
if (a.points < b.points)
return 1;
if (a.points > b.points)
return -1;
return 0;
}
/* IF I PUT AN ALERT HERE, INTERNET EXPLORER WORKS, LOL? */
HighestClass.array.sort(compare);
/* We can then display the data of array entry 0 which should be our highest
point-scoring class */
$('#display').html( '<h1>' + HighestClass.array[0].name + '</h1><h3>' + HighestClass.array[0].points + '</h3>' );
}
/* equivalent of document ready */
widget.onLoad = function(){
/* Choose the year group from the drop down box */
$("select").change(function(){
var val = $('select option:selected').val();
$("#display").html('<h1><img width="60" height="60" src="http://logd.tw.rpi.edu/files/loading.gif" />Loading...</h1>');
HighestClass.init(val);
});
}
In essence the script does the following:
Retrieve a list of students for each class group
Run an AJAX call to our PHP script/MySQL database to return the SUM of points for those students
Add the name and points info to an array of objects
Sort the array of objects so that the highest point-scoring class is our first array entry
Display the name of the class and their points from array entry 0
The problem is, the only way I can think about doing it (because of limitations of the APIs) is to run asynchronous API calls and chain AJAX calls off these. I then use a counting variable to determine when the last asynchronous call is made.
Now, importantly, this script works perfectly well in FireFox. However, in Internet Explorer - which is where I need it to work - the script displays our "loading" DIV/image and goes no further.
The strange thing is, if I put an alert in the code (where I've commented it in capital letters), Internet Explorer works correctly.
This must be an issue with synchronicity and timing but I have no experience or knowledge of it.
Can anyone suggest a solution? Hacky is fine, if necessary.
Cheers,
First thing is: /!\ When use the callback pattern, your "flow" need to re-begin in the callback function.
I can see that you have problems with the Asynchronous and callback approach. When you $.getJSON but also every time you make a Frog.API call, example:
Frog.API.get("users.search", {
"params": {
"group": id
},
"onSuccess": function (data){
for (var i = 0; i < data.length; i++)
list += data[i].id + ",";
}
});
Here you retrieve data, and put them in a list with the onSuccess callback function. My guess is that this call is also asynchronous. If this call takes too long:
if( typeof list === "string" && list.length > 0 ) {
won't pass. So nothing will happening and your display will try to get values of an undefined object => error, the JavaScript stops, no update of your view.
You need to getJSON after your list is retrieve, in the onSuccess callback. And this will help because you make the same mistake after:
In what follow you ask to have a display, but you absolutely don't know if your calls are finished. The fact it asked for the last call does not mean any of the calls are finished.
if( end === true )
HighestClass.display();
So you just need to add that after:
HighestClass.array.push(obj);
which is in your $.getJSON call.
Ajax call are usually Asynchronous and your problem is that you try to update the display synchronously with the current flow, without waiting for your server to answer.
/!\ When use the callback pattern, your "flow" need to re-begin in the callback function. Using that, you will always be sure that the code you are running has all the data it needs to achieve it's duty.
PS: Here is all the code modified. I also modified your function init. You do not need to call your API again to redo the same thing. just loop twice on the data or put the only relevant data aside in an array then loop on it.
HighestClass = {};
HighestClass.array = [];
HighestClass.url = "http://our.url.local/frog/pointsByWeek.php?cmd=highestClass&students=";
HighestClass.init = function(groupPrefix) {
/* Using the group prefix, i.e. "CLS 9", from the drop-down box,
get a list of all of the classes in that year group */
Frog.API.get('groups.getAll',{
'onSuccess': function(data){
var i = 0,
l = 0,
count = 0,
group = [];
/* First time round, count the number of groups that match this
syntax because there are no parameters available to filter
this API */
for (i = 0, l = data.length; i < l; i++) {
if (data[i].name.indexOf(groupPrefix) != -1)
group.push(data[i]);
}
/* Now that these classes have been counted, run through the API
call again to push each class ID through to another function */
l = group.length;
count = l - 1;
for (i = 0; i < l; i++) {
// i == count will be true when it is the last one
HighestClass.getClassPoints( group[i].name, group[i].id, i == count);
}
});
}
HighestClass.getClassPoints = function(name, id, end) {
/* Using the ID of the class group, create a comma-separated list
of students for use in our MySQL query */
Frog.API.get("users.search", {
"params": {
"group": id
},
"onSuccess": function (data){
var list = '';
// We have data and build our string
for (var i = 0; i < data.length; i++)
list += data[i].id + ",";
/* If the list exists... */
if( typeof list === "string" && list.length > 0 ) {
list = list.slice(0,-1);
/* Run an AJAX call to our PHP script which simply returns an integer
value of the SUM of reward points earned by that list of students */
UWA.Data.getJson(HighestClass.url + list, function(res){
if (res === false || res === "") res = 0;
/* Push this data into an array of objects alongside the class name */
var obj = { "name": name, "points": res };
HighestClass.array.push(obj);
/* As this function is being called asynchronously multiple times we need to
determine when the last call is run so that we can deal with our array
of data. We do this thanks to the count/run_through variables in earlier
function which will trigger end=true in this function */
if( end === true )
HighestClass.display();
});
}
}
});
}
HighestClass.display = function() {
/* Once we've put our array of objects together, we need to sort it so that
the class with the highest number of points are at array entry 0 */
function compare(a,b) {
if (a.points < b.points)
return 1;
if (a.points > b.points)
return -1;
return 0;
}
/* IF I PUT AN ALERT HERE, INTERNET EXPLORER WORKS, LOL? */
HighestClass.array.sort(compare);
/* We can then display the data of array entry 0 which should be our highest
point-scoring class */
if (HighestClass.array.length > 0)
$('#display').html( '<h1>' + HighestClass.array[0].name + '</h1><h3>' + HighestClass.array[0].points + '</h3>' );
else
$('#display').html( '<h1>No data available</h1>' );
}
/* equivalent of document ready */
widget.onLoad = function(){
/* Choose the year group from the drop down box */
$("select").change(function(){
var val = $('select option:selected').val();
$("#display").html('<h1><img width="60" height="60" src="http://logd.tw.rpi.edu/files/loading.gif" />Loading...</h1>');
try {
HighestClass.init(val);
} catch (e) {
$("#display").html('<h1>Sorry, an error occured while retrieving data</h1>');
}
});
}
The fact that an alert "fixes" the problem does indicate that it's something to do with a timing issue. It looks like one of your functions isn't returning in time and not populating the array variable correctly.
Try making the count and end variables global and seeing if that helps. I think it's something to do with scope.
It's most likely because your Ajax call is async here:
UWA.Data.getJson(HighestClass.url + list, function(res){
if (res === false || res === "") res = 0;
/* Push this data into an array of objects alongside the class name */
var obj = { "name": name, "points": res };
HighestClass.array.push(obj);
});
and HighestClass.array is empty at when HighestClass.display(); is called unless you wait for your ajax call to complete. You can make your ajax call synchronous or put this HighestClass.display(); in the Ajax callback.