How to hash a download stream in Node js - javascript

I want to know how to hash the download stream of a file using node js
Because I wanna hash the file before I store in to mongo db in order to avoid duplicates , I am using mongo grid fs by the way. https://github.com/aheckmann/gridfs-stream
downloading file
var download = function (url, dest, callback) {
request.get(url)
.on('error', function (err) { console.log(err) })
.pipe(fs.createWriteStream(dest))
.on('close', callback);
};
final_list.forEach(function (str) {
var filename = str.split('/').pop();
console.log('Downloading ' + filename);
download(str, filename, function () { console.log('Finished Downloading' + "" + filename) });
});

function getHash(dest, filename) {
let crypto = require('crypto');
let hash = crypto.createHash('sha256').setEncoding('hex');
let fileHash = "";
let filePath = `${dest}/${filename}`
fs.createReadStream(filePath)
.pipe(hash)
.on('finish', function() {
fileHash = hash.read();
console.log(`Filehash calculated for ${filename} is ${fileHash}.`);
// insert into mongo db here
});
}

Related

nodeJS await to move files then compress images

I wrote this code that checks image files sizes in a folder, if the file are bigger than 30000 bytes then moves to a temporary folder called 'before-compress'. The compressImages() function iterates over the 'before-compress' folder and returns the compressed images to the original folder. My question is: How can i await the process of move the exceeded size files and then call the compressImage() function?, as you can see in the code i handle this with a setTimeout once the forEach reaches the last item. Thanks in advance.
const fs = require('fs');
const path = require('path');
const imagemin = require("imagemin");
const imageminMozjpeg = require("imagemin-mozjpeg");
const imageminPngquant = require("imagemin-pngquant");
const imageminGifsicle = require('imagemin-gifsicle');
const directoryPath = path.join(__dirname, 'uploads');
fs.readdir(`${directoryPath}/products`, function (err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function (file) {
console.log(`File: ${file} - Size: ${getFilesizeInBytes(file)} bytes`);
if(getFilesizeInBytes(file) > 30000){
moveFile(file)
}
if(files.indexOf(file) == files.length - 1){
//console.log('last index');
setTimeout(() => compressImages(), 4000);
}
});
});
function getFilesizeInBytes(fileName) {
var stats = fs.statSync(`${directoryPath}/products/${fileName}`);
var fileSizeInBytes = stats.size;
return fileSizeInBytes;
}
function moveFile(file){
var oldPath = `${directoryPath}/products/${file}`;
var newPath = `${directoryPath}/before-compress/${file}`;
fs.rename(oldPath, newPath, function (err) {
if (err) throw err;
console.log(`File ${file} moved!`);
})
}
function compressImages(){
fs.readdir(`${directoryPath}/before-compress`, function (err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function (file) {
console.log(`File to compress: ${file}`);
let fileExt = file.split('.')[1];
let compressPlugin = fileExt == 'jpg' || fileExt == 'jpeg' ? imageminMozjpeg({quality: 40}) :
fileExt == 'png' ? imageminPngquant({quality: [0.5, 0.6]}) :
fileExt == 'gif' ? imageminGifsicle() : 0;
(async () => {
const files = await imagemin([`./uploads/before-compress/${file}`], {
destination: './uploads/products',
plugins: [ compressPlugin ]
});
fs.unlink(`${directoryPath}/before-compress/${file}`, err => err ? console.log(err) : 0);
})();
});
});
}
This kind of code would become much more readable if you would convert all the functions from using callbacks to using async.
If you want to keep using callbacks however, there are two options:
Make moveFile() to use fs.renameSync() instead of fs.rename(). Normally I would advise against that, but since you are already using fs.statSync() and I suppose you run this as a script with nothing in parallel, maybe that would be an acceptable solution.
Or make moveFile() accept a callback:
function moveFile(file, callback){
// [...]
fs.rename(oldPath, newPath, callback)
}
Now you can use this callback to detect when the file has been moved, for example like this:
// [...]
var done = 0;
var error = false;
files.forEach(function (file) {
if(error) return;
if(getFilesizeInBytes(file) > 30000){
moveFile(file, function(err) {
if (err) { console.log(err); error = true; }
done++;
});
} else {
done++;
}
if(done == files.length) {
compressImages(), 4000);
}
});
});

How to apply promises in a node.js function

I'm trying to write a program that can unzip a zip file, read the images in the file and apply grayscale to them.
right now i have these two functions :
var fs = require('fs'),
PNG = require('pngjs').PNG
const unzipper = require('unzipper')
PNG = require('pngjs').PNG
const dir = __dirname + "/";
const myFile = (fileName) => {
let createdFile = dir + fileName
fs.createReadStream(createdFile)
.pipe(unzipper.Extract({ path: 'myfile' }));
console.log('file unzipped')
}
myFile("myfile.zip")
function applyFilter(Name) {
fs.readdir(Name, 'utf-8', (err, data) => {
if (err) {
console.log(err)
} else {
data.forEach(function (file) {
if (file.includes('png')) {
let greyPNG = (__dirname + '/' + 'myfile' + '/' + file)
console.log (greyPNG)
fs.createReadStream(greyPNG)
.pipe(new PNG({
colorType: 0,
}))
.on('parsed', function () {
this.pack().pipe(fs.createWriteStream(__dirname + "/" + "myfile" + "/" + file));
});
}
})
}
})
}
applyFilter ('myfile')
these two function works fine individually, however, it will not run together, if I comment out "applyFilter". A zip file will be unzipped. if there is a file in the directory, "applyFilter" will apply grayscale on those pictures. I know that this is because both functions runs at the same time which causes the problem. So how do I implement promises to solve this issue. I know that I can use "Sync" version of the functions. I just want to know how to do it in promises.
There are Example's in official documentation about ‘promisify’:
https://nodejs.org/dist/latest-v12.x/docs/api/util.html#util_util_promisify_original
which will gives you a "promised" version of the same function (as long as the original function has a standard signature / a custom promisified definition).
const util = require('util');
const fs = require('fs');
const stat = util.promisify(fs.stat);
async function callStat() {
const stats = await stat('.');
console.log(`This directory is owned by ${stats.uid}`);
}
You can also implement your own, just return a promise (this example is reading a http request):
function read_request(request) {
request.setEncoding("utf-8");
return new Promise((resolve, reject) => {
var cache = "";
request.on("data", (chunk) => {
cache += cache;
}).on("end", () => {
resolve(cache);
}).on("error", reject);
});
}
You can use the stream finish event to determine when the file unzip is complete. We can then use promises and async / await to ensure we don't try to apply the filter before the files are ready.
const fs = require('fs');
const PNG = require('pngjs').PNG;
const unzipper = require('unzipper');
const dir = __dirname + "/";
function unzipFile(fileName, outputPath) {
return new Promise((resolve, reject) => {
let createdFile = dir + fileName
let stream = fs.createReadStream(createdFile)
.pipe(unzipper.Extract({ path: outputPath }));
stream.on('finish', () => {
console.log('file unzipped');
resolve(outputPath);
});
});
}
function applyFilter(Name) {
fs.readdir(dir, 'utf-8', (err, data) => {
if (err) {
console.log(err)
} else {
data.filter(file => file.includes("png")).forEach(file => {
let greyPNG = (__dirname + '/' + Name + '/' + file)
console.log (greyPNG)
fs.createReadStream(greyPNG)
.pipe(new PNG({
colorType: 0,
}))
.on('parsed', function () {
this.pack().pipe(fs.createWriteStream(greyPNG));
});
})
}
})
}
async function unzipAndApplyFilter(zipFile, outputPath) {
await unzipFile(zipFile, outputPath); // Wait until unzip is complete.
applyFilter(outputPath);
}
unzipAndApplyFilter('myfile.zip', 'myfile');

How to unzip file to disk and save path to database

I'm trying to create an app where a user can upload a zipped file, the app will unzip the file and save it to disk, and a path to the file will be saved to MongoDB for later retrieval.
I'm having a hard time getting the upload from form, unzipping, saving to disk, and uploading path of the unzipped file to the database all in one function. I'm really new to this and and am trying to learn about callbacks and such, I can't find any working solution for what I'm trying to do.
This is what my functions currently looks like:
// Multer is a form handling middleware
var storage = multer.diskStorage({
destination: function (req, file, cb) {
console.log(file)
cb(null, './uploads/unzip')
},
filename: function (req, file, cb) {
cb(null, file.fieldname + '-' + Date.now() + path.extname(file.originalname))
},
})
const upload = multer({ storage }).single('file'); //this is the 1st func in the route
const unzipp = async (req, res, next) => { //second func in route
try {
const dir = 'uploads/unzipped/';
var stream = fs.createReadStream(req.file.path)
stream.pipe(unzip.Extract({path: dir}))
.on('entry', function () {
var fileName = entry.path;
var type = entry.type;
var size = entry.size;
console.log(fileName, type, size)
if (type.isDirectory) {
postfile() //TRYING TO CALL POSTFILE() HERE
console.log('unzipped and path saved')
} else {
res.error('Failed unzipping')
}
fs.unlink(req.file.path, function (e) {
if (e) throw e;
console.log('successfully deleted '+req.file.path);
});
})
} catch (e) {
console.error(e)
}
next();
}
//Upload is a mongoDB cluster Schema
async function postfile () {
try{
let newUpload = new Upload(req.body); //new instance of uplaod based on the model based on req.body
newUpload.title = req.body.title;
newUpload.description = req.body.description;
newUpload.labels = req.body.labels;
newUpload.filePath = fileName; //ASSIGN FILEPATH IN DB SCHEMA TO UNZIPPED FILE PATH
console.log("filePath saved")
newUpload.save()
.then(newUpload => {
res.status(200).json({file: "File added successfully"})
})
.catch(err => {
res.status(400).send('File upload failed to save to DB :(')
})
} catch (e) {
console.error(e);
}
}
As you can see I'm trying to call the function to save the mongo schema in unzipp function. This is the post route in a separate folder:
router.post('/upload', FileCtrl.upload, FileCtrl.unzipp)
I've also tried saving the entry path of the unzipped file as a global var (fileName) and assigning the path in the Schema as fileName, but it doesn't work either:
const unzipp = async (req, res, next) => {
try {
const dir = 'uploads/unzipped/';
var stream = fs.createReadStream(req.file.path)
stream.pipe(unzip.Extract({path: dir}))
.on('entry', function () {
fileName = entry.path;
type = entry.type;
size = entry.size;
console.log(fileName, type, size)
// if (type.isDirectory) {
// console.log('unzipped and path saved')
// } else {
// res.error('Failed unzipping')
// }
result = {
file: fileName,
message:"File has been extracted"
};
//var file = req.file
fs.unlink(req.file.path, function (e) {
if (e) throw e;
console.log('successfully deleted '+req.file.path);
});
res.json(result);
})
} catch (e) {
console.error(e)
}
next();
}
const postfile = async (req, res) => {
try{
console.log("Posting to DB")
let newUpload = new Upload(req.body); //new instance of uplaod based on the model based on req.body
newUpload.title = req.body.title;
newUpload.description = req.body.description;
newUpload.labels = req.body.labels;
newUpload.filePath = fileName;
console.log("Ok so far")
newUpload.save()
.then(newUpload => {
res.status(200).json({file: "File added successfully"})
})
.catch(err => {
res.status(400).send('File upload failed to save to DB :(')
})
} catch (e) {
console.error(e);
}
}
this gives the error " ReferenceError: fileName is not defined "
the new route looks like this:
router.post('/upload', FileCtrl.upload, FileCtrl.unzipp, FileCtrl.postfile)
I've been trying to solve this for a really long time and would really appreciate some advice.
EDIT:
For testing purposes I hardcoded the filepath and it saved to the DB perfectly...
const postfile = async (req, res) => {
try{
console.log("Posting to DB")
//var stream = fs.readdirSync('./uploads/unzipped/Nancy_Collins_118226967_v2')
let newUpload = new Upload(req.body); //new instance of uplaod based on the model based on req.body
newUpload.title = req.body.title;
newUpload.description = req.body.description;
newUpload.labels = req.body.labels;
newUpload.filePath = './uploads/unzipped/Nancy_Collins_118226967_v2';
console.log("Ok so far")
newUpload.save()
.then(newUpload => {
res.status(200).json({file: "File added successfully"})
})
.catch(err => {
res.status(400).send('File upload failed to save to DB :(')
})
} catch (e) {
console.error(e);
}
}
Obviously this isn't practical or dynamic, but it's possible.

How to create object from two different fs methods?

I want to create object with filename and fileStat so in below code i am checking stats with async and for filename i used readDir now once i have values how can i create array of object ? I am trying to get filename and its created date and send it the client objToReturn. Any idea how can i acheive that task ?
app.js
function readDirectory(callback) {
var dirPath = './logs/ditLogs';
//this will get you list of all files. in directory
var files = fs.readdirSync(dirPath);
var objToReturn = [{
fileName: '',
fileStat: ''
}];
//then using async do like this
async.eachSeries(files, function(file, callback) {
var filePath = path.join(dirPath, file);
fs.stat(filePath, function(err, stats) {
objToReturn.fileStat = stats;
//write stats data into objToReturn
fs.readdir(path, function(err, items) {
objToReturn.filename = items;
});
callback();
});
}, function(err) {
//final callback when all files completed here send objToReturn to client
callback(objToReturn);
});
Okay, I think I see what you're trying to do here. You want to read all of the file paths in the given directory and then for each file, collect information before returning this information to a given callback. It appears right now that you are adding these props to the array objToReturn, instead of to each object and pushing it to objToReturn. In this way, you overwrite the properties on each async read.
You also use fs.readdir(path, function(err, items) { where I think you mean to refer to filePath that you declare earlier.
Try something like:
async.eachSeries(files, function(file, callback) {
var filePath = path.join(dirPath, file);
var fileInfo = {};
fs.stat(filePath, function(err, stats) {
fileInfo.fileStat = stats;
//write stats data into objToReturn
fs.readdir(filePath, function(err, items) {
fileInfo.filename = items;
});
objToReturn.push(fileInfo);
callback();
});
}, function(err) {
You can try this:
function readDirectory(callback) {
var dirPath = './logs/ditLogs';
//this will get you list of all files. in directory
var files = fs.readdirSync(dirPath);
var objToReturn = [];
//then using async do like this
async.eachSeries(files, function(file, callback) {
var filePath = path.join(dirPath, file);
fs.stat(filePath, function(err, stats) {
objToReturn.fileStat = stats;
//write stats data into objToReturn
fs.readdir(path, function(err, items) {
objToReturn.filename = items;
objToReturn.push({
fileName: items,
fileStat: stats
})
});
callback();
});
}, function(err) {
//final callback when all files completed here send objToReturn to client
callback(objToReturn);
});
}
Hope this should work.

Upload entire directory tree to S3 using AWS sdk in node js

I currently upload single objects to S3 using like so:
var options = {
Bucket: bucket,
Key: s3Path,
Body: body,
ACL: s3FilePermissions
};
S3.putObject(options,
function (err, data) {
//console.log(data);
});
But when I have a large resources folder for example, I use the AWS CLI tool.
I was wondering, is there a native way to do the same thing with the aws sdk (upload entire folders to s3)?
Old-school recursive way I whipped up in a hurry. Only uses core node modules and standard AWS sdk.
var AWS = require('aws-sdk');
var path = require("path");
var fs = require('fs');
const uploadDir = function(s3Path, bucketName) {
let s3 = new AWS.S3();
function walkSync(currentDirPath, callback) {
fs.readdirSync(currentDirPath).forEach(function (name) {
var filePath = path.join(currentDirPath, name);
var stat = fs.statSync(filePath);
if (stat.isFile()) {
callback(filePath, stat);
} else if (stat.isDirectory()) {
walkSync(filePath, callback);
}
});
}
walkSync(s3Path, function(filePath, stat) {
let bucketPath = filePath.substring(s3Path.length+1);
let params = {Bucket: bucketName, Key: bucketPath, Body: fs.readFileSync(filePath) };
s3.putObject(params, function(err, data) {
if (err) {
console.log(err)
} else {
console.log('Successfully uploaded '+ bucketPath +' to ' + bucketName);
}
});
});
};
uploadDir("path to your folder", "your bucket name");
Special thanks to Ali from this post with helping get the filenames
async/await + Typescript
If you need a solution that uses modern JavaScript syntax and is compatible with TypeScript, I came up with the following code. The recursive getFiles is borrowed from this answer (After all that years, recursion still gives me headache, lol).
import { promises as fs, createReadStream } from 'fs';
import * as path from 'path';
import { S3 } from 'aws-sdk';
async function uploadDir(s3Path: string, bucketName: string) {
const s3 = new S3();
// Recursive getFiles from
// https://stackoverflow.com/a/45130990/831465
async function getFiles(dir: string): Promise<string | string[]> {
const dirents = await fs.readdir(dir, { withFileTypes: true });
const files = await Promise.all(
dirents.map((dirent) => {
const res = path.resolve(dir, dirent.name);
return dirent.isDirectory() ? getFiles(res) : res;
})
);
return Array.prototype.concat(...files);
}
const files = (await getFiles(s3Path)) as string[];
const uploads = files.map((filePath) =>
s3
.putObject({
Key: path.relative(s3Path, filePath),
Bucket: bucketName,
Body: createReadStream(filePath),
})
.promise()
);
return Promise.all(uploads);
}
await uploadDir(path.resolve('./my-path'), 'bucketname');
here is a cleaned up/debugged/working version of #Jim's solution
function uploadArtifactsToS3() {
const artifactFolder = `logs/${config.log}/test-results`;
const testResultsPath = './test-results';
const walkSync = (currentDirPath, callback) => {
fs.readdirSync(currentDirPath).forEach((name) => {
const filePath = path.join(currentDirPath, name);
const stat = fs.statSync(filePath);
if (stat.isFile()) {
callback(filePath, stat);
} else if (stat.isDirectory()) {
walkSync(filePath, callback);
}
});
};
walkSync(testResultsPath, async (filePath) => {
let bucketPath = filePath.substring(testResultsPath.length - 1);
let params = {
Bucket: process.env.SOURCE_BUCKET,
Key: `${artifactFolder}/${bucketPath}`,
Body: fs.readFileSync(filePath)
};
try {
await s3.putObject(params).promise();
console.log(`Successfully uploaded ${bucketPath} to s3 bucket`);
} catch (error) {
console.error(`error in uploading ${bucketPath} to s3 bucket`);
throw new Error(`error in uploading ${bucketPath} to s3 bucket`);
}
});
}
I was just contemplating this problem the other day, and was thinking something like this:
...
var async = require('async'),
fs = require('fs'),
path = require("path");
var directoryName = './test',
directoryPath = path.resolve(directoryName);
var files = fs.readdirSync(directoryPath);
async.map(files, function (f, cb) {
var filePath = path.join(directoryPath, f);
var options = {
Bucket: bucket,
Key: s3Path,
Body: fs.readFileSync(filePath),
ACL: s3FilePermissions
};
S3.putObject(options, cb);
}, function (err, results) {
if (err) console.error(err);
console.log(results);
});
Here's a version that contains a Promise on the upload method. This version allows you to perform an action when all uploads are complete Promise.all().then...
const path = require('path');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const directoryToUpload = 'directory-name-here';
const bucketName = 'name-of-s3-bucket-here';
// get file paths
const filePaths = [];
const getFilePaths = (dir) => {
fs.readdirSync(dir).forEach(function (name) {
const filePath = path.join(dir, name);
const stat = fs.statSync(filePath);
if (stat.isFile()) {
filePaths.push(filePath);
} else if (stat.isDirectory()) {
getFilePaths(filePath);
}
});
};
getFilePaths(directoryToUpload);
// upload to S3
const uploadToS3 = (dir, path) => {
return new Promise((resolve, reject) => {
const key = path.split(`${dir}/`)[1];
const params = {
Bucket: bucketName,
Key: key,
Body: fs.readFileSync(path),
};
s3.putObject(params, (err) => {
if (err) {
reject(err);
} else {
console.log(`uploaded ${params.Key} to ${params.Bucket}`);
resolve(path);
}
});
});
};
const uploadPromises = filePaths.map((path) =>
uploadToS3(directoryToUpload, path)
);
Promise.all(uploadPromises)
.then((result) => {
console.log('uploads complete');
console.log(result);
})
.catch((err) => console.error(err));
You might try the node-s3-client.
UPDATE: Available on npm here
From the sync a directory to s3 docs:
UPDATE: Added client inialization code.
var client = s3.createClient({
maxAsyncS3: 20, // this is the default
s3RetryCount: 3, // this is the default
s3RetryDelay: 1000, // this is the default
multipartUploadThreshold: 20971520, // this is the default (20 MB)
multipartUploadSize: 15728640, // this is the default (15 MB)
s3Options: {
accessKeyId: "YOUR ACCESS KEY",
secretAccessKey: "YOUR SECRET ACCESS KEY"
}
});
var params = {
localDir: "some/local/dir",
deleteRemoved: true, // default false, whether to remove s3 objects
// that have no corresponding local file.
s3Params: {
Bucket: "s3 bucket name",
Prefix: "some/remote/dir/",
// other options supported by putObject, except Body and ContentLength.
// See: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property
},
};
var uploader = client.uploadDir(params);
uploader.on('error', function(err) {
console.error("unable to sync:", err.stack);
});
uploader.on('progress', function() {
console.log("progress", uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function() {
console.log("done uploading");
});
This works for me (you'll need to add walkSync package):
async function asyncForEach(array, callback) {
for (let index = 0; index < array.length; index++) {
await callback(array[index], index, array);
}
}
const syncS3Directory = async (s3Path, endpoint) => {
await asyncForEach(walkSync(s3Path, {directories: false}), async (file) => {
const filePath = Path.join(s3Path, file);
const fileContent = fs.readFileSync(filePath);
const params = {
Bucket: endpoint,
Key: file,
Body: fileContent,
ContentType: "text/html",
};
let s3Upload = await s3.upload(params).promise();
s3Upload ? undefined : Logger.error("Error synchronizing the bucket");
});
console.log("S3 bucket synchronized!");
};
const AWS = require("aws-sdk");
const fs = require("fs");
const path = require("path");
const async = require("async");
const readdir = require("recursive-readdir");
// AWS CRED
const ID = "<accessKeyId>";
const SECRET = "<secretAccessKey>";
const rootFolder = path.resolve(__dirname, "../");
const uploadFolder = "./sources";
// The name of the bucket that you have created
const BUCKET_NAME = "<Bucket_Name>";
const s3 = new AWS.S3({
accessKeyId: ID,
secretAccessKey: SECRET
});
function getFiles(dirPath) {
return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}
async function uploadToS3(uploadPath) {
const filesToUpload = await getFiles(path.resolve(rootFolder, uploadPath));
console.log(filesToUpload);
return new Promise((resolve, reject) => {
async.eachOfLimit(
filesToUpload,
10,
async.asyncify(async file => {
const Key = file.replace(`${rootFolder}/`, "");
console.log(`uploading: [${Key}]`);
return new Promise((res, rej) => {
s3.upload(
{
Key,
Bucket: BUCKET_NAME,
Body: fs.readFileSync(file)
},
err => {
if (err) {
return rej(new Error(err));
}
res({ result: true });
}
);
});
}),
err => {
if (err) {
return reject(new Error(err));
}
resolve({ result: true });
}
);
});
}
uploadToS3(uploadFolder)
.then(() => {
console.log("upload complete!");
process.exit(0);
})
.catch(err => {
console.error(err.message);
process.exit(1);
});

Categories

Resources