Node.js read a file in a zip without unzipping it - javascript

I have a zip file (actually it's an epub file) I need to loop through the files in it and read them without unzipping them to the disk.
I tried to use a Node.js library called JSZip but the content of each file is stored in memory in Buffer and whenever I try to decode the buffer content to string the content returned is unreadable
Here's the code I tried:
const zip = new JSZip();
// read a zip file
fs.readFile(epubFile, function (err, data) {
if (err) throw err;
zip.loadAsync(data).then(function (zip) {
async.eachOf(zip.files, function (content, fileName, callback) {
if (fileName.match(/json/)) {
var buf = content._data.compressedContent;
console.log(fileName);
console.log((new Buffer(buf)).toString('utf-8'));
}
callback();
}, function (err) {
if (err) {
console.log(err);
}
});
});
});

Since unzip seems to be abandoned, I used node-stream-zip with pretty good success.
npm install node-stream-zip
Reading files be all like:
const StreamZip = require('node-stream-zip');
const zip = new StreamZip({
file: 'archive.zip',
storeEntries: true
});
zip.on('ready', () => {
// Take a look at the files
console.log('Entries read: ' + zip.entriesCount);
for (const entry of Object.values(zip.entries())) {
const desc = entry.isDirectory ? 'directory' : `${entry.size} bytes`;
console.log(`Entry ${entry.name}: ${desc}`);
}
// Read a file in memory
let zipDotTxtContents = zip.entryDataSync('path/inside/zip.txt').toString('utf8');
console.log("The content of path/inside/zip.txt is: " + zipDotTxtContents);
// Do not forget to close the file once you're done
zip.close()
});

npm install unzip
https://www.npmjs.com/package/unzip
fs.createReadStream('path/to/archive.zip')
.pipe(unzip.Parse())
.on('entry', function (entry) {
var fileName = entry.path;
var type = entry.type; // 'Directory' or 'File'
var size = entry.size;
if (fileName === "this IS the file I'm looking for") {
entry.pipe(fs.createWriteStream('output/path'));
} else {
entry.autodrain();
}
});

Related

need to do a file system at particular file location in nodejs

Actually I am trying to do zip conversion and need to save zip at particular folder as zip_folder created with my project folder. This is happening when I call some api. I can't able to do but if I use __dirname its working properly. Can anyone help me to comeout from this by giving your solutions. Thank you.
const fs = require('fs');
const archiver = require('archiver');
var file1 = '../zip_folder/scorm.zip';
var onlyPath = require('path').dirname('C:\Users\is9115\Desktop\node_moodle');
const mysql = require('../shared/connection');
// create a file to stream archive data to.
const archive = archiver('zip', {
zlib: { level: 9 } // Sets the compression level.
});
async function createzip()
{
const output = fs.createWriteStream(file1); // this is not working at file location
const output = fs.createWriteStream(__dirname+'/scorm.zip');//working but creating at root folder itself
fs.readFile('imsmanifest.xml', 'utf-8',async function(err, data) {
if (err) throw err;
var newValue = data.replace(/Var_Title/g, 'Golf');
fs.writeFile('imsmanifest1.xml', newValue, 'utf-8', function(err, data) {
if (err) throw err;
console.log('Done!');
})
})
archive.pipe(output);
const file2 = __dirname + '/imsmanifest1.xml';
archive.append(fs.createReadStream(file2), { name: 'imsmanifest.xml' });
archive.append('string cheese!', { name: 'file2.txt' });
archive.directory('scorm12schemadefinition/', false);
archive.file('imsmainfest1.xml', { name: 'imsmanifest.xml' });
archive.finalize();
}

How could I check If a zip file is corrupted in NodeJS?

I would check if a ZIP file is corrupted using NodeJS using less CPU and memory as possible.
How to corrupt a ZIP file:
Download a ZIP file
Open the ZIP file using a text editor optimized like Notepad++
Rewrite the header. Only put random characters.
I am trying to reach this goal using the NPM library "node-stream-zip"
private async assertZipFileIntegrity(path: string) {
try {
const zip = new StreamZip.async({ file: path });
const stm = await zip.stream(path);
stm.pipe(process.stdout);
stm.on('end', () => zip.close());
} catch (error) {
throw new Error();
}
}
However, when I run the unit tests I receive an error inside an array:
Rejected to value: [Error]
import zip from 'yauzl';
import path from 'path';
const invalidZipPath = path.resolve('invalid.zip');
const validZipPath = path.resolve('valid.zip');
const isValidZipFile = (filePath) => {
return zip.open(filePath, { lazyEntries: true }, (err, stream ) => {
if (err) {
console.log('fail to read ', filePath);
return false;
}
console.log('success read ', filePath);
return true;
});
}
isValidZipFile(validZipPath);
isValidZipFile(invalidZipPath);

Bulk convert XML to JSON with particular output

How do I convert approximately 3000 XML files into JSON files using node?
I've been able to get a script below to a single XML to JSON file in the format that I want, and I've been attempting to promisify the script using bluebird, but I keep getting errors. I've been able to get the script below to list the filenames, but then I get the error "Unhandled rejection Error: ENOENT: no such file or directory, open 'journal-article-10.2307_357359.xml'"
var Promise = require('bluebird');
var fs = require('fs');
var convert = require('xml-js');
fs.readdirAsync = function(dirname) {
return new Promise(function(resolve, reject) {
fs.readdir(dirname, function(err, filenames){
if (err)
reject(err);
else
resolve(filenames);
});
});
};
fs.readFileAsync = function(filename, enc) {
return new Promise(function(resolve, reject) {
fs.readFile(filename, enc, function(err, data){
if (err)
reject(err);
else
resolve(data);
});
});
};
function getFile(filename) {
return fs.readFileAsync(filename, 'utf8');
}
fs.readdirAsync('./metadata/').then(function (filenames){
console.log(filenames);
return Promise.all(filenames.map(getFile));
}).then(function (files){
files.forEach(function(files){
function nativeType(value) {
var nValue = Number(value);
if (!isNaN(nValue)) {
return nValue;
}
var bValue = value.toLowerCase();
if (bValue === 'true') {
return true;
} else if (bValue === 'false') {
return false;
}
return value;
}
var removeJsonTextAttribute = function(value, parentElement) {
try {
var keyNo = Object.keys(parentElement._parent).length;
var keyName = Object.keys(parentElement._parent)[keyNo - 1];
parentElement._parent[keyName] = nativeType(value);
} catch (e) {}
};
var options = {
compact: true,
trim: true,
ignoreDeclaration: true,
ignoreInstruction: true,
ignoreAttributes: true,
ignoreComment: true,
ignoreCdata: true,
ignoreDoctype: true,
textFn: removeJsonTextAttribute,
spaces: 2
};
fs.writeFile("./json/" + fileaname + ".json", convert.xml2json(options));
});
});
I would like to be able to convert the entire folder of XML files to JSON (to upload to couchDB).
ENOENT is a standard POSIX error code that means the path to the file is not found. You've tried to open the name of a file or directory that doesn't exist. In this case, fs.readdir returns names that are not fully qualified file names, so you'll need to prefix them with the path that you gave it, specifically: './metadata/'. The error message that you're seeing tells you the file that was opened: journal-article-10.2307_357359.xml, but in this case, you probably want to open ./metadata/journal-article-10.2307_357359.xml.
You can see this with the following simple example:
# Create a dummy directory named `garbage` that contains only 3 entries
$ mkdir -p garbage/{foo,bar,baz}
# Run `node` interactively
$ node
> const fs = require('fs');
undefined
> fs.readdirSync('./garbage')
[ 'bar', 'baz', 'foo' ]
It doesn't make sense to open 'bar' since that doesn't exist. You'll need to open './garbage/bar' for it to work correctly.

Extracting zipped files using JSZIP in javascript

In my webpage, a user is supposed to upload a zipped file. Within the zipped file are 2 files: another zip file and a txt file. On my server, after receiving the zip, I want to unzip the zip file to extract the zip & txt file, then move those 2 files to a predefined folder. I have a piece of code that extracts the zip file, but the data doesn't seem correct. Firstly, it unzipped a zip and 2 txt file when there should only be 1 txt file. It created an additional 'undefined' txt file. Also, in my txt file, instead of the original data, it was replaced with the following text: '[undefined] [undefined]'.
Can anyone help me on this? The following is my code:
var JSZip = require('JSZip');
fs.readFile( filePath, function(err, data){
if (!err){
var zip = new JSZip();
JSZip.loadAsync(data).then(function(zip){
object.keys(zip.files).forEach(function(filename){
var content = zip.files[filename];
var dest = path + filename;
fs.writeFileSync(dest, content);
});
});
}
});
This is a working version I am using:
var jsZip = require('jszip')
jsZip.loadAsync(file).then(function (zip) {
Object.keys(zip.files).forEach(function (filename) {
zip.files[filename].async('string').then(function (fileData) {
console.log(fileData) // These are your file contents
})
})
})
You can get most of the information you need from http://stuk.github.io/jszip/documentation/examples.html but it's a little hard to get in one place, you have to look around a bit.
It took a bit of digging in their documentation but they have an example that shows how to read the file contents from a ZIP.
You are getting the object that describes the ZIP contents but not the actual content. Here is an adjusted version:
var JSZip = require('JSZip');
fs.readFile(filePath, function(err, data) {
if (!err) {
var zip = new JSZip();
zip.loadAsync(data).then(function(contents) {
Object.keys(contents.files).forEach(function(filename) {
zip.file(filename).async('nodebuffer').then(function(content) {
var dest = path + filename;
fs.writeFileSync(dest, content);
});
});
});
}
});
Here was my strategy in Angular 10 to write a single file to a zip with a custom extension, then later read that same zip to retrieve the json.
Package the file into a zip (custom file endings supported)
import { saveAs } from 'file-saver';
import * as JSZip from 'jszip';
export async function exportJson(
filename: string,
jsonToExport: string,
fileNameEnding = '.zip'
): Promise<string> {
const jsonFile = new Blob([jsonToExport], {
type: 'application/json',
});
if (!jsonFile) return Promise.reject('Error converting file to JSON');
const zipper = new JSZip();
zipper.file(`${filename}.json`, jsonFile);
const zippedFile = await zipper.generateAsync({ type: 'blob' });
const exportFilename = `${filename}${fileNameEnding}`;
saveAs(zippedFile, exportFilename);
return Promise.resolve(exportFilename);
}
Read the file contents from the zip
// file parameter retrieved from an input type=file
export async function readExportedJson(file: File): Promise<Blob> {
const zipper = new JSZip();
const unzippedFiles = await zipper.loadAsync(file);
return Promise.resolve(unzippedFiles).then(unzipped => {
if (!Object.keys(unzipped.files).length) {
return Promise.reject('No file was found');
}
return unzipped.files[Object.keys(unzipped.files)[0]];
}).then(unzippedFile => zipper.file(unzippedFile.name).async('string'));
}
This answer is cordova-plugin-file specific.
As stated in the docs:
Directory entries have to be created successively. For example, the
call fs.root.getDirectory('dir1/dir2', {create:true}, successCallback,
errorCallback) will fail if dir1 did not exist.
I am almost certain that the currently accepted answer cannot guarantee that file content/folders are always retrieved in the same order. This could result in problems with an API such as cordova-plugin-file. Especially when you invoke another async function to asynchronously create the directory on the filesystem.
You may want to filter the directories of your zip archive first and create them in a sync manner before continuing to extract other files as already answered:
const directoryNames = Object.keys(zip.files).filter(name => zip.files[name].dir);
for (const directoryName of directoryNames) {
await this.createDirectory(directoryName, dirEntry);
}
// ...
private createDirectory = (dirName: string, dirEntry: DirectoryEntry) => {
const promise = new Promise<DirectoryEntry>(resolve, reject) => {
dirEntry.getDirectory(dirName, { create: true }, dirEntry => {
resolve(dirEntry);
}, fileError => reject(fileError));
});
return promise;
}

Looping through files in a folder Node.JS

I am trying to loop through and pick up files in a directory, but I have some trouble implementing it. How to pull in multiple files and then move them to another folder?
var dirname = 'C:/FolderwithFiles';
console.log("Going to get file info!");
fs.stat(dirname, function (err, stats) {
if (err) {
return console.error(err);
}
console.log(stats);
console.log("Got file info successfully!");
// Check file type
console.log("isFile ? " + stats.isFile());
console.log("isDirectory ? " + stats.isDirectory());
});
Older answer with callbacks
You want to use the fs.readdir function to get the directory contents and the fs.rename function to actually do the renaming. Both these functions have synchronous versions if you need to wait for them to finishing before running the code afterwards.
I wrote a quick script that does what you described.
var fs = require('fs');
var path = require('path');
// In newer Node.js versions where process is already global this isn't necessary.
var process = require("process");
var moveFrom = "/home/mike/dev/node/sonar/moveme";
var moveTo = "/home/mike/dev/node/sonar/tome"
// Loop through all the files in the temp directory
fs.readdir(moveFrom, function (err, files) {
if (err) {
console.error("Could not list the directory.", err);
process.exit(1);
}
files.forEach(function (file, index) {
// Make one pass and make the file complete
var fromPath = path.join(moveFrom, file);
var toPath = path.join(moveTo, file);
fs.stat(fromPath, function (error, stat) {
if (error) {
console.error("Error stating file.", error);
return;
}
if (stat.isFile())
console.log("'%s' is a file.", fromPath);
else if (stat.isDirectory())
console.log("'%s' is a directory.", fromPath);
fs.rename(fromPath, toPath, function (error) {
if (error) {
console.error("File moving error.", error);
} else {
console.log("Moved file '%s' to '%s'.", fromPath, toPath);
}
});
});
});
});
Tested on my local machine.
node testme.js
'/home/mike/dev/node/sonar/moveme/hello' is a file.
'/home/mike/dev/node/sonar/moveme/test' is a directory.
'/home/mike/dev/node/sonar/moveme/test2' is a directory.
'/home/mike/dev/node/sonar/moveme/test23' is a directory.
'/home/mike/dev/node/sonar/moveme/test234' is a directory.
Moved file '/home/mike/dev/node/sonar/moveme/hello' to '/home/mike/dev/node/sonar/tome/hello'.
Moved file '/home/mike/dev/node/sonar/moveme/test' to '/home/mike/dev/node/sonar/tome/test'.
Moved file '/home/mike/dev/node/sonar/moveme/test2' to '/home/mike/dev/node/sonar/tome/test2'.
Moved file '/home/mike/dev/node/sonar/moveme/test23' to '/home/mike/dev/node/sonar/tome/test23'.
Moved file '/home/mike/dev/node/sonar/moveme/test234' to '/home/mike/dev/node/sonar/tome/test234'.
Update: fs.promises functions with async/await
Inspired by ma11hew28's answer (shown here), here is the same thing as above but with the async functions in fs.promises. As noted by ma11hew28, this may have memory limitations versus fs.promises.opendir added in v12.12.0.
Quick code below.
//jshint esversion:8
//jshint node:true
const fs = require( 'fs' );
const path = require( 'path' );
const moveFrom = "/tmp/movefrom";
const moveTo = "/tmp/moveto";
// Make an async function that gets executed immediately
(async ()=>{
// Our starting point
try {
// Get the files as an array
const files = await fs.promises.readdir( moveFrom );
// Loop them all with the new for...of
for( const file of files ) {
// Get the full paths
const fromPath = path.join( moveFrom, file );
const toPath = path.join( moveTo, file );
// Stat the file to see if we have a file or dir
const stat = await fs.promises.stat( fromPath );
if( stat.isFile() )
console.log( "'%s' is a file.", fromPath );
else if( stat.isDirectory() )
console.log( "'%s' is a directory.", fromPath );
// Now move async
await fs.promises.rename( fromPath, toPath );
// Log because we're crazy
console.log( "Moved '%s'->'%s'", fromPath, toPath );
} // End for...of
}
catch( e ) {
// Catch anything bad that happens
console.error( "We've thrown! Whoops!", e );
}
})(); // Wrap in parenthesis and call now
fs.readdir(path[, options], callback) (which Mikey A. Leonetti used in his answer) and its variants (fsPromises.readdir(path[, options]) and fs.readdirSync(path[, options])) each reads all of a directory's entries into memory at once. That's good for most cases, but if the directory has very many entries and/or you want to lower your application's memory footprint, you could instead iterate over the directory's entries one at a time.
Asynchronously
Directories are async iterable, so you could do something like this:
const fs = require('fs')
async function ls(path) {
const dir = await fs.promises.opendir(path)
for await (const dirent of dir) {
console.log(dirent.name)
}
}
ls('.').catch(console.error)
Or, you could use dir.read() and/or dir.read(callback) directly.
Synchronously
Directories aren't sync iterable, but you could use dir.readSync() directly. For example:
const fs = require('fs')
const dir = fs.opendirSync('.')
let dirent
while ((dirent = dir.readSync()) !== null) {
console.log(dirent.name)
}
dir.closeSync()
Or, you could make directories sync iterable. For example:
const fs = require('fs')
function makeDirectoriesSyncIterable() {
const p = fs.Dir.prototype
if (p.hasOwnProperty(Symbol.iterator)) { return }
const entriesSync = function* () {
try {
let dirent
while ((dirent = this.readSync()) !== null) { yield dirent }
} finally { this.closeSync() }
}
if (!p.hasOwnProperty(entriesSync)) { p.entriesSync = entriesSync }
Object.defineProperty(p, Symbol.iterator, {
configurable: true,
enumerable: false,
value: entriesSync,
writable: true
})
}
makeDirectoriesSyncIterable()
And then, you could do something like this:
const dir = fs.opendirSync('.')
for (const dirent of dir) {
console.log(dirent.name)
}
Note: "In busy processes, use the asynchronous versions of these calls. The synchronous versions will block the entire process until they complete, halting all connections."
References:
Node.js Documentation: File System: Class fs.Dir
Node.js source code: fs.Dir
GitHub: nodejs/node: Issues: streaming / iterative fs.readdir #583
Read all folders in a directory
const readAllFolder = (dirMain) => {
const readDirMain = fs.readdirSync(dirMain);
console.log(dirMain);
console.log(readDirMain);
readDirMain.forEach((dirNext) => {
console.log(dirNext, fs.lstatSync(dirMain + "/" + dirNext).isDirectory());
if (fs.lstatSync(dirMain + "/" + dirNext).isDirectory()) {
readAllFolder(dirMain + "/" + dirNext);
}
});
};
The answers provided are for a single folder. Here is an asynchronous implementation for multiple folders where all the folders are processed simultaneously but the smaller folders or files gets completed first.
Please comment if you have any feedback
Asynchronously Multiple Folders
const fs = require('fs')
const util = require('util')
const path = require('path')
// Multiple folders list
const in_dir_list = [
'Folder 1 Large',
'Folder 2 Small', // small folder and files will complete first
'Folder 3 Extra Large'
]
// BEST PRACTICES: (1) Faster folder list For loop has to be outside async_capture_callback functions for async to make sense
// (2) Slower Read Write or I/O processes best be contained in an async_capture_callback functions because these processes are slower than for loop events and faster completed items get callback-ed out first
for (i = 0; i < in_dir_list.length; i++) {
var in_dir = in_dir_list[i]
// function is created (see below) so each folder is processed asynchronously for readFile_async that follows
readdir_async_capture(in_dir, function(files_path) {
console.log("Processing folders asynchronously ...")
for (j = 0; j < files_path.length; j++) {
file_path = files_path[j]
file = file_path.substr(file_path.lastIndexOf("/") + 1, file_path.length)
// function is created (see below) so all files are read simultaneously but the smallest file will be completed first and get callback-ed first
readFile_async_capture(file_path, file, function(file_string) {
try {
console.log(file_path)
console.log(file_string)
} catch (error) {
console.log(error)
console.log("System exiting first to catch error if not async will continue...")
process.exit()
}
})
}
})
}
// fs.readdir async_capture function to deal with asynchronous code above
function readdir_async_capture(in_dir, callback) {
fs.readdir(in_dir, function(error, files) {
if (error) { return console.log(error) }
files_path = files.map(function(x) { return path.join(in_dir, x) })
callback(files_path)
})
}
// fs.readFile async_capture function to deal with asynchronous code above
function readFile_async_capture(file_path, file, callback) {
fs.readFile(file_path, function(error, data) {
if (error) { return console.log(error) }
file_string = data.toString()
callback(file_string)
})
}

Categories

Resources