I am trying to hash the name together with the path of all the files from a specific directory. After hashing I am saving the hash into the array(hash_orig). Here is what I have done.
var fs = require('fs');
var Hashes = require('jshashes');
var path = process.argv[2];
var path_elements = new Array();
var hash_orig = new Array();
fs.readdir(path, function(err, items) {
console.log('\nNumber of files in the directory: '+items.length);
console.log('Hashing algorithm: SHA1')
for (var i=0; i<items.length; i++) {
path_elements.push(path+"/"+items[i])
var SHA1 = new Hashes.SHA1().b64(path_elements[i])
console.log([i+1]+':\t'+items[i]+"\t" + SHA1)
hash_orig.push(SHA1);
}
});
console.log(hash_orig)
The problem:
The problem is that when I am pushing the hash into the hash_orig array and trying to access it outside the function fs.readdir the array hash_orig is empty. (console.log(hash_orig))
I need to access it outside in order to perform further some comparison operation to determine if the hash has been changed to Verifying Integrity of the files name and path.
What I am doing wrong?
Thank you.
fs.readdiris a async function. At the time when console.log(hash_orig)is reached, the callback of readdir is not called yet.
Put the log-statement at the end of your callback and you will see the results.
Yes you are simply missing that your fs.readdir function is a asynchronous callback.
Thus, when you call console.log(hash_orig), from outside, in fact the callback isnt completed yet.
Use a timeout and your array will filled :
setTimeout(function(){
console.log(hash_orig);
},500);
Related
I have a Lambda function that is triggered when a folder object -- for example,
67459e53-20cb-4e7d-8b7a-10e4cd165a44
is created in the root bucket.
Also in the root is index.json, the content index -- a simple array of these folders. For example, { folder1, folder2, ..., folderN }.
Every time a folder object (like above) is added, the Lambda function triggers, gets index.json, adds the new folder object to the JSON array, and then puts index.json back.
Obviously, this createObject event is going to trigger the same Lambda function.
My code, below, should only process the event object if it's a folder; i.e., a key object with a / at the end. (A stackoverflow user was kind enough to help me with this solution.)
I have tested this code locally with lambda-local and everything looks good. My concern is (fear of God) that I could have RUNAWAY EXECUTION.
I have scoured the Lambda best practices and googled for "infinite loops" and the like, but cannot find a way to ENSURE that my Lambda won't execute more than, say, 50 times per day.
Yes, I could have the Lambda that actually creates the folder also write to index.json but that Lambda is part of the AWS Video-on-Demand reference example, and I don't really understand it yet.
Two questions: Can I configure notifications in S3 such that it filters on a (random folder key name with a) suffix of / as described
here? And/Or how can I configure this Lambda in the console to absolutely prevent runaway execution?
// dependencies
var async = require('async');
var AWS = require('aws-sdk');
var util = require('util');
// constants
const VOD_DEST_FOLDER = 'my-triggering-bucket'; //not used bc part of event object
const CONTENT_INDEX_FILENAME = 'index.json';
// get reference to S3 client
var s3 = new AWS.S3();
exports.handler = async (event) => {
try {
console.log('Event', JSON.stringify(event));
// Bucket name.
const triggerBucket = event.Records[0].s3.bucket.name;
// New folder key added.
const newKey = event.Records[0].s3.object.key;
// Add newKey to content index ONLY if it is a folder object. If any other object
// is added in the bucket root then it won't result in new write.
if (newKey.indexOf('/') > -1) {
// Get existing data.
let existing = await s3.getObject({
Bucket: triggerBucket,
Key: CONTENT_INDEX_FILENAME
}).promise();
// Parse JSON object.
let existingData = JSON.parse(existing.Body);
// Get the folder name.
const folderName = newKey.substring(0, newKey.indexOf("/"));
// Check if we have an array.
if (!Array.isArray(existingData)) {
// Create array.
existingData = [];
}
existingData.push(folderName);
await s3.putObject({
Bucket: triggerBucket,
Key: CONTENT_INDEX_FILENAME,
Body: JSON.stringify(existingData),
ContentType: 'application/json'
}).promise();
console.log('Added new folder name ' + folderName);
return folderName;
} else {
console.log('Not a folder.');
return 'Ignored';
}
}
catch(err) {
return err;
}
};
You can configure the S3 notifications with key name filtering. Here's a step by step guide on how to do it in the web console. I think if you add a / suffix filter to the notification that triggers your Lambda, you will achieve your goal.
I am trying to pull the source code to several webpages at once. The links are fed into the array via a source text file. I am able to iterate through the array and print out the links and confirm they are there, but when trying to pass them through a function, they become undefined after the first iteration.
My ultimate goal is to have it save the source of each page to its own document. It does the first page correctly, but subsequent attempts are undefined. I've searched for hours but would appreciate it if someone could point me in the right direction.
var fs = require('fs');
var pageContent = fs.read('input.txt');
var arrdata = pageContent.split(/[\n]/);
var system = require('system');
var page = require('webpage').create();
var args = system.args;
var imagelink;
var content = " ";
function handle_page(file, imagelink){
page.open(file,function(){
var js = page.evaluate(function (){
return document;
});
fs.write(imagelink, page.content, 'w');
setTimeout(next_page(),500);
});
}
function next_page(imagelink){
var file = imagelink;
if(!file){phantom.exit(0);}
handle_page(file, imagelink);
}
for(var i in arrdata){
next_page(arrdata[i]);
}
I realize now that having that the for loop will only iterate once, then the other two functions make their own loop, so that makes sense, but still having issues getting it running.
PhantomJS's page.open() is asynchronous (that's why there is a callback). The other thing is that page.open() is a long operation. If two such calls are made the second will overwrite the first one, because you're operating on the same page object.
The best way would be to use recursion:
function handle_page(i){
if (arrdata.length === i) {
phantom.exit();
return;
}
var imageLink = arrdata[i];
page.open(imageLink, function(){
fs.write("file_"+i+".html", page.content, 'w');
handle_page(i+1);
});
}
handle_page(0);
Couple of other things:
setTimeout(next_page(),500); immediately invokes next_page() without waiting. You wanted setTimeout(next_page, 500);, but then it also wouldn't work, because without an argument next_page simply exits.
fs.write(imagelink, page.content, 'w') that imagelink is probably a URL in which case, you probably want to define another way to devise a filename.
While for(var i in arrdata){ next_page(arrdata[i]); } works here be aware that this doesn't work on all arrays and array-like objects. Use dumb for loops like for(var i = 0; i < length; i++) or array.forEach(function(item, index){...}) if it is available.
page.evaluate() is sandboxed and provides access to the DOM, but everything that is not JSON serializable cannot be passed out of it. You will have to put that into a serializable format before passing it out of evaluate().
I need to traverse forms on a site and save intermediate results to files. I'm using phantomjs' page.evaluate, but I'm having trouble accessing the filesystem from within page.evaluate's sandboxed environment. I have something like this:
for (var i = 0; i<option1.length; i++){
for (var ii = 0; ii<option2.length; ii++){
for (var iii = 0; iii<option3.length; iii++){
...
//I found what I want to save
fs.write("someFileName", someData);
}
}
}
Obviously, I don't have access to nodejs' fs from within page.evaluate, so the above does not work. I seem to have a few options:
Store everything I need to write to an array, and return that from the page.evaluate context into the outer, nodejs context, then save it from there. This would require memory I don't have.
Break up the above logic into smaller page.evaluate methods that return singe pieces of data to save to the filesytem.
Somehow pass into the page.evaluate a magic function to write to the filesystem. This seems to not be possible (if I try to pass in a function that calls fs.writeFile for example, I get that fs is undefined, even if fs is a free variable in the function I passed?)
Return an iterator which, when pulled, yields the next piece of data to be written
Setup a trivial web server on the localhost that simply accepts POST requests and writes their contents into the filesystem. The page.evaluate code would then make those requests to the localhost. I almost try this but I'm not sure I'll be affected by the same-origin policy.
What are my options here?
Your evaluation is sound, but you forgot one type: onCallback. You can register to the event handler in the phantom context and push your data from page context to a file through this callback:
page.onCallback = function(data) {
if (!data.file) {
data.file = "defaultFilename.txt";
}
if (!data.mode) {
data.mode = "w";
}
fs.write(data.file, data.str, data.mode);
};
...
page.evaluate(function(){
for (var i = 0; i<option1.length; i++){
for (var ii = 0; ii<option2.length; ii++){
for (var iii = 0; iii<option3.length; iii++){
...
// save data
if (typeof window.callPhantom === 'function') {
window.callPhantom({ file: "someFileName", str: someData, mode: "a" }); // append
}
}
}
}
});
Note that PhantomJS does not run in Node.js. Although, there are bridges between Node.js and PhantomJS. See also my answer here.
I need to save 4 files in html output.
here is the code in phantomjs:
var i = 0;
while (i<4)
{
var page = require('webpage').create();
var fs = {};
fs = require('fs');
if(i==0)
{
var url = 'http://www.lamoda.ru/shoes/dutiki-i-lunohody/?sitelink=leftmenu&sf=16&rdr565=1#sf=16';
} else {
var url = 'http://www.lamoda.ru/shoes/dutiki-i-lunohody/?sitelink=leftmenu&sf=16&rdr565=1#sf=16&p='+i;
}
page.open(url, function (status) {
var js = page.evaluate(function () {
return document;
});
console.log(js.all[0].outerHTML);
page.render('export'+i+'.png');
fs.write(i+'.html', js.all[0].outerHTML, 'w');
phantom.exit();
});
i++;
}
It seems that I need to change the FS variable, but I don't know how... I don't need create fs1,fs2,fs3,fs4... I need to find you the better solution, hope you will help, thank you)
Is it okay if your requests are serial, so page 2 is not requested until page 1 has returned? If so I recommend you base your code of this multi-url sample in the documentation.
If you want the requests to run in parallel then you need to use a JavaScript closure to protect the local variables (see https://stackoverflow.com/a/17619716/841830 for an example of how to do that). Once you are doing that you can then either parse "url" to find out if it ends in p=1, p=2, etc. Or assign i inside the page object, and access it with this.i.
I'm new to JavaScript and creating classes/objects. I'm trying to wrap an open source library's code with some simple methods for me to use in my routes.
I have the below code that is straight from the source (sjwalter's Github repo; thanks Stephen for the library!).
I'm trying to export a file/module to my main app/server.js file with something like this:
var twilio = require('nameOfMyTwilioLibraryModule');
or whatever it is I need to do.
I'm looking to create methods like twilio.send(number, message)that I can easily use in my routes to keep my code modular. I've tried a handful of different ways but couldn't get anything to work. This might not be a great question because you need to know how the library works (and Twilio too). The var phone = client.getPhoneNumber(creds.outgoing); line makes sure that my outgoing number is a registered/paid for number.
Here's the full example that I'm trying to wrap with my own methods:
var TwilioClient = require('twilio').Client,
Twiml = require('twilio').Twiml,
creds = require('./twilio_creds').Credentials,
client = new TwilioClient(creds.sid, creds.authToken, creds.hostname),
// Our numbers list. Add more numbers here and they'll get the message
numbers = ['+numbersToSendTo'],
message = '',
numSent = 0;
var phone = client.getPhoneNumber(creds.outgoing);
phone.setup(function() {
for(var i = 0; i < numbers.length; i++) {
phone.sendSms(numbers[i], message, null, function(sms) {
sms.on('processed', function(reqParams, response) {
console.log('Message processed, request params follow');
console.log(reqParams);
numSent += 1;
if(numSent == numToSend) {
process.exit(0);
}
});
});
}
});`
Simply add the function(s) you wish to expose as properties on the exports object. Assuming your file was named mytwilio.js and stored under app/ and looks like,
app/mytwilio.js
var twilio = require('twilio');
var TwilioClient = twilio.Client;
var Twiml = twilio.Twiml;
var creds = require('./twilio_creds').Credentials;
var client = new TwilioClient(creds.sid, creds.authToken, creds.hostname);
// keeps track of whether the phone object
// has been populated or not.
var initialized = false;
var phone = client.getPhoneNumber(creds.outgoing);
phone.setup(function() {
// phone object has been populated
initialized = true;
});
exports.send = function(number, message, callback) {
// ignore request and throw if not initialized
if (!initialized) {
throw new Error("Patience! We are init'ing");
}
// otherwise process request and send SMS
phone.sendSms(number, message, null, function(sms) {
sms.on('processed', callback);
});
};
This file is mostly identical to what you already have with one crucial difference. It remembers whether the phone object has been initialized or not. If it hasn't been initialized, it simply throws an error if send is called. Otherwise it proceeds with sending the SMS. You could get fancier and create a queue that stores all messages to be sent until the object is initialized, and then sends em' all out later.
This is just a lazy approach to get you started. To use the function(s) exported by the above wrapper, simply include it the other js file(s). The send function captures everything it needs (initialized and phone variables) in a closure, so you don't have to worry about exporting every single dependency. Here's an example of a file that makes use of the above.
app/mytwilio-test.js
var twilio = require("./mytwilio");
twilio.send("+123456789", "Hello there!", function(reqParams, response) {
// do something absolutely crazy with the arguments
});
If you don't like to include with the full/relative path of mytwilio.js, then add it to the paths list. Read up more about the module system, and how module resolution works in Node.JS.