I am creating a program that...
1. Detects all of the drives on any given system.
2. Scans those drives for files of specific file types. For example, it may search all of the drives for any jpeg, png, and svg files.
3. The results are then stored in a JSON file in the following desired format.
{
"C:": {
"jpeg": [
...
{
"path": "C:\\Users\\John\\Pictures\\example.jpeg",
"name": "example",
"type": "jpeg",
"size": 86016
},
...
],
"png": [],
"svg": []
},
...
}
The code...
async function scan(path, exts) {
try {
const stats = await fsp.stat(path)
if (stats.isDirectory()) {
const
childPaths = await fsp.readdir(path),
promises = childPaths.map(
childPath => scan(join(path, childPath), exts)
),
results = await Promise.all(promises)
// Likely needs to change.
return [].concat(...results)
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '')
if (exts.includes(fileExt)){
// Likely needs to change.
return {
"path": path,
"name": basename(path, fileExt).slice(0, -1),
"type": fileExt,
"size": stats.size
}
}
}
return []
}
catch (error) {
return []
}
}
const results = await Promise.all(
config.drives.map(drive => scan(drive, exts))
)
console.log(results) // [ Array(140), Array(0), ... ]
// And I would like to do something like the following...
for (const drive of results) {
const
root = parse(path).root,
fileExt = extname(path).replace('.', '')
data[root][fileExt] = []
}
await fsp.writeFile('./data.json', JSON.stringify(config, null, 2))
The global results is of course divided into individual arrays that correspond to each drive. But currently it combines all of the objects into one giant array despite their corresponding file types. There is also currently no way for me to know which array belongs to each drive, especially if the drive's array does not contain any items that I can parse to retrieve the root directory.
I can obviously map or loop thru the global results again, and then sort everything out, as illustrated below, but it would be a lot cleaner to have scan() handle everything from the get go.
// Initiate scan sequence.
async function initiateScan(exts) {
let
[config, data] = await Promise.all([
readJson('./config.json'),
readJson('./data.json')
]),
results = await Promise.all(
// config.drives.map(drive => scan(drive, exts))
['K:', 'D:'].map(drive => scan(drive, exts))
)
for (const drive of results) {
let root = false
for (const [i, file] of drive.entries()) {
if (!root) root = parse(file.path).root.slice(0,-1)
if (!data[root][file.type] || !i) data[root][file.type] = []
data[root][file.type].push(file)
}
}
await fsp.writeFile('./data.json', JSON.stringify(config, null, 2))
}
Due to my lack of experience with asynchronicity and objects in general, I am not quite sure how to best handle the data in map( ... )/scan. I am really not even sure how to best structure the output of scan() so that the structure of the global results is easily manipulable.
Any help would be greatly appreciated.
Mutating an outer object as asynchronously-derived results arrive is not particularly clean, however it can be done fairly simply and safely as follows:
(async function(exts, results) { // async IIFE wrapper
async function scan(path) { // lightly modified version of scan() from the question.
try {
const stats = await fsp.stat(path);
if (stats.isDirectory()) {
const childPaths = await fsp.readdir(path);
const promises = childPaths.map(childPath => scan(join(path, childPath)));
return Promise.all(promises);
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '');
if (results[path] && results[path][fileExt]) {
results[path][fileExt].push({
'path': path,
'name': basename(path, fileExt).slice(0, -1),
'type': fileExt,
'size': stats.size
});
}
}
}
catch (error) {
console.log(error);
// swallow error by not rethrowing
}
}
await Promise.all(config.drives.map(path => {
// Synchronously seed the results object with the required data structure
results[path] = {};
for (fileExt of exts) {
results[path][fileExt] = []; // array will populated with data, or remain empty if no qualifying data is found.
}
// Asynchronously populate the results[path] object, and return Promise to the .map() callback
return scan(path);
}));
console.log(results);
// Here: whatever else you want to do with the results.
})(exts, {}); // pass `exts` and an empty results object to the IIFE function.
The results object is synchronously seeded with empty data structures, which are then populated asynchronously.
Everything is wrapped in an async Immediately Invoked Function Expression (IIFE), thus:
avoiding the global namespace (if not already avoided)
ensuring availabillty of await (if not already available)
making a safe closure for the results object.
This still needs some work, and it is iterating through the generated files collection a second time.
// This should get you an object with one property per drive
const results = Object.fromEntries(
(await Promise.all(
config.drives.map(async drive => [drive, await scan(drive, exts)])
)
)
.map(
([drive, files]) => [
drive,
// we reduce each drive's file array to an object with
// one property per file extension
files.reduce(
(acc, file) => {
acc[file.type].push(file)
return acc
},
Object.fromEntries(exts.map(ext => [ext, []]))
)
]
)
)
nodejs supports Object.fromEntries from version 12.0.0, so if you can guarantee your application will always be run in that version or a later one, Object.fromEntries should be fine here.
You can use the glob npm library to get all of the filenames and then just transform that array to your object like this:
import {basename, extname} from 'path';
import {stat} from 'fs/promises'; // Or whichever library you use to promisify fs
import * as glob from "glob";
function searchForFiles() {
return new Promise((resolve, reject) => glob(
"/**/*.{jpeg,jpg,png,svg}", // The files to search for and where
{ silent: true, strict: false}, // No error when eg. something cannot be accessed
(err, files) => err ? reject() : resolve(files)
));
}
async function getFileObject() {
const fileNames = await searchForFiles(); // An array containing all file names (eg. ['D:\\my\path\to\file.jpeg', 'C:\\otherfile.svg'])
// An array containing all objects describing your file
const fileObjects = await Promise.all(fileNames.map(async filename => ({
path: filename,
name: basename(path, fileExt).slice(0, -1),
type: extname(path).replace('.', ''),
size: stat(path).size,
drive: `${filename.split(':\\')[0]}:`
})));
// Create your actual object
return fileObjects.reduce((result, {path, name, type, size, drive}) => {
if (!result[drive]) { // create eg. { C: {} } if it does not already exist
result.drive = {};
}
if (!result[drive][type]) { // create eg. {C: { jpeg: [] }} if it does not already exist
result[drive][type] = [];
}
// Push the object to the correct array
result[drive][type].push({path, name, type, size});
return result;
}, {});
}
The function must traverse the file system recursively, looking for files that match your criteria. The recursion can be simplified by the fact that the result doesn't need to retain any hierarchy, so we can just carry a flat array (files) as a parameter.
let exts = [...]
async function scan(path, files) {
const stats = await fsp.stat(path)
if (stats.isDirectory()) {
childPaths = await fsp.readdir(path)
let promises = childPaths.map(childPath => {
return scan(join(path, childPath), files)
})
return Promise.all(promises)
} else if (stats.isFile()) {
const fileExt = extname(path).replace('.', '')
if (exts.includes(fileExt)) {
files.push({
path: path,
name: basename(path, fileExt).slice(0, -1),
type: fileExt,
size: stats.size
})
}
}
}
let files = []
await scan('/', files)
console.log(files)
Related
I have a Next.js application here which needs to read a CSV file from a URL in the same repo in multiple places, but I cannot seem to be able to retrieve this data. You can find the relevant file in my repo here.
Note, the URL I'm trying to pull data from is this: https://raw.githubusercontent.com/ivan-rivera/balderdash-next/main/public/test_rare_words.csv
Here is what I've tried so far:
Approach 1: importing the data
let vocab = {};
...
async function buildVocab() {
const words = await import(VOCAB_URL); // this works when I point to a folder in my directory, but it does not work when I deploy this app. If I point at the URL address, I get an error saying that it cannot find the module
for (let i = 0; i < words.length; i++) {
vocab[words[i].word] = words[i].definition;
}
}
Approach 2: papaparse
const papa = require("papaparse");
let vocab = {};
...
export async function buildVocab() {
await papa.parse(
VOCAB_URL,
{
header: true,
download: true,
delimiter: ",",
step: function (row) {
console.log("Row:", row.data); // this prints data correctly
},
complete: function (results) {
console.log(results); // this returns an object with several attributes among which is "data" and "errors" and both are empty
},
}
);
// this does not work because `complete` does not return anything
vocab = Object.assign({}, ...raw.map((e) => ({ [e.word]: e.definition })));
console.log(vocab);
}
Approach 3: needle
const csvParser = require("csv-parser");
const needle = require("needle");
let vocab = {};
...
let result = [];
needle
.get(VOCAB_URL)
.pipe(csvParser())
.on("data", (data) => {
result.push(data);
});
vocab = Object.assign({}, ...result.map((e) => ({ [e.word]: e.definition })));
// This approach also returns nothing, however, I noticed that if I force it to sleep, then I do get the results I want:
setTimeout(() => {
console.log(result);
}, 1000); // now this prints the data I'm looking for
What I cannot figure out is how to force this function to wait for needle to retrieve the data. I've declared it as an async function and I'm calling it with await buildVocab() but it doesn't help.
Any ideas how I can fix this? Sorry, I'm a JS beginner, so it's probably something fundamental that I'm missing :(
After spending hours on this, I think I finally found a solution:
let vocab = {};
export async function buildVocab() {
await fetch(VOCAB_URL)
.then((resp) => resp.text())
.then((text) => {
papa.parse(text, { header: true }).data.forEach((row) => {
vocab[row.word] = row.definition;
});
});
}
The only oddity that I still can't work out is this: I'm calling my buildVocab function inside another async function and I noticed that if I do not include a console.log statement in that function, then the vocab still does not get populated in time. Here is the function:
export async function sampleWord() {
await buildVocab();
const keys = Object.keys(vocab);
const index = Math.floor(Math.random() * keys.length);
console.log(`selected word: ${keys[index]}`); // this is important!
return keys[index];
}
I'm trying to use stream-json to read a zip, unzip it, and then write it to file. I don't think I understand how to use the library.
Based on the link above, they have this example:
const {chain} = require('stream-chain');
const {parser} = require('stream-json');
const {pick} = require('stream-json/filters/Pick');
const {ignore} = require('stream-json/filters/Ignore');
const {streamValues} = require('stream-json/streamers/StreamValues');
const fs = require('fs');
const zlib = require('zlib');
const pipeline = chain([
fs.createReadStream('sample.json.gz'),
zlib.createGunzip(),
parser(),
pick({filter: 'data'}),
ignore({filter: /\b_meta\b/i}),
streamValues(),
data => {
const value = data.value;
// keep data only for the accounting department
return value && value.department === 'accounting' ? data : null;
}
]);
let counter = 0;
pipeline.on('data', () => ++counter);
pipeline.on('end', () =>
console.log(`The accounting department has ${counter} employees.`));
However I don't want to count anything, I just want to write to file. Here is what I have that works:
function unzipJson() {
const zipPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json.zip');
const jsonPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json');
console.info('Attempting to read zip');
return new Promise((resolve, reject) => {
let error = null;
Fs.readFile(zipPath, (err, data) => {
error = err;
if (!err) {
const zip = new JSZip();
zip.loadAsync(data).then((contents) => {
Object.keys(contents.files).forEach((filename) => {
console.info(`Writing ${filename} to disk...`);
zip.file(filename).async('nodebuffer').then((content) => {
Fs.writeFileSync(jsonPath, content);
}).catch((writeErr) => { error = writeErr; });
});
}).catch((zipErr) => { error = zipErr; });
resolve();
} else if (error) {
console.log(error);
reject(error);
}
});
});
}
However I can't easily add any processing to this, so I wanted to replace it with stream-json. This is my partial attempt, as I don't know how to finish:
function unzipJson() {
const zipPath = Path.resolve(__dirname, 'resources', 'myfile.json.zip');
const jsonPath = Path.resolve(__dirname, 'resources', 'myfile.json');
console.info('Attempting to read zip');
const pipeline = chain([
Fs.createReadStream(zipPath),
zlib.createGunzip(),
parser(),
Fs.createWriteStream(jsonPath),
]);
// use the chain, and save the result to a file
pipeline.on(/*what goes here?*/)
Later on I intend to add extra processing of the json file(s), but I want to learn the basics before I start throwing in extra functionality.
I can't produce a minimal example unfortunately, as I don't know what goes into the pipeline.on function. I'm trying to understand what I should do, not what I've done wrong.
I also looked at the related stream-chain, which has an example that ends like so:
// use the chain, and save the result to a file
dataSource.pipe(chain).pipe(fs.createWriteStream('output.txt.gz'));`
But at no point does the documentation explain where dataSource comes from, and I think my chain creates it's own by reading the zip from file?
How am I supposed to use these streaming libraries to write to file?
I don't want to count anything, I just want to write to file
In that case, you'll need to convert the token/JSON data stream back into a text stream that you can write to a file. You can use the library's Stringer for that. Its documentation also contains an example that seems to be more in line with what you want to do:
chain([
fs.createReadStream('data.json.gz'),
zlib.createGunzip(),
parser(),
pick({filter: 'data'}), // omit this if you don't want to do any processing
stringer(),
zlib.Gzip(), // omit this if you want to write an unzipped result
fs.createWriteStream('edited.json.gz')
]);
I think this more of a general async/await loop question, but I'm trying to do it within the bounds of an Airtable API request and within getStaticProps of Next.js so I thought that is important to share.
What I want to do is create an array of base IDs like ["appBaseId01", "appBaseId02", "appBaseId03"] and output the contents of a page. I have it working with 1 base, but am failing at getting it for multiple.
Below is the code for one static base, if anyone can help me grok how I'd want to loop over these. My gut says that I need to await each uniquely and then pop them into an array, but I'm not sure.
const records = await airtable
.base("appBaseId01")("Case Overview Information")
.select()
.firstPage();
const details = records.map((detail) => {
return {
city: detail.get("City") || null,
name: detail.get("Name") || null,
state: detail.get("State") || null,
};
});
return {
props: {
details,
},
};
EDIT
I've gotten closer to emulating it, but haven't figured out how to loop the initial requests yet.
This yields me an array of arrays that I can at least work with, but it's janky and unsustainable.
export async function getStaticProps() {
const caseOneRecords = await setOverviewBase("appBaseId01")
.select({})
.firstPage();
const caseTwoRecords = await setOverviewBase("appBaseId02")
.select({})
.firstPage();
const cases = [];
cases.push(minifyOverviewRecords(caseOneRecords));
cases.push(minifyOverviewRecords(caseTwoRecords));
return {
props: {
cases,
},
};
}
setOverviewBase is a helper that establishes the Airtable connection and sets the table name.
const setOverviewBase = (baseId) =>
base.base(baseId)("Case Overview Information");
You can map the array of base IDs and await with Promise.all. Assuming you have getFirstPage and minifyOverviewRecords defined as below, you could do the following:
const getFirstPage = (baseId) =>
airtable
.base(baseId)("Case Overview Information")
.select({})
.firstPage();
const minifyOverviewRecords = (records) =>
records.map((detail) => {
return {
city: detail.get("City") || null,
name: detail.get("Name") || null,
state: detail.get("State") || null,
};
});
export async function getStaticProps() {
const cases = await Promise.all(
["appBaseId01", "appBaseId02", "appBaseId03"].map(async (baseId) => {
const firstPage = await getFirstPage(baseId);
return minifyOverviewRecords(firstPage);
})
);
return {
props: {
cases
}
};
}
I created a function to fetch mp3 files that are inside user's storage drives that takes an array of directories that needed to be searched. After receiving full list of music files in an array, I used that as an argument in another function that fetches it's metadata (using music metadata). So far my code is working perfectly when there are no files it returns empty array otherwise and array of objects containing their metadata. Here is My code :
const find = require('find') ;
const mm = require('music-metadata') ;
const directories = ["C:\\Users\\{UserNamehere}\\Music\\"] // you can add more directories
async function parseMetadata(files){
let metadata ;
data = files.map(async (file) => {
metadata = await mm.parseFile(file,{duration:true}) ;
m = metadata.common ;
return m ;
}) ;
const musicarray = await Promise.all(data) ;
return musicarray ;
}
function fetchNewSongs(dirs){
let res,musicfiles = [] ;
dirs.forEach((path)=>{
res = find.fileSync(/\.mp3/i,path) ;
musicfiles.push(...res) ;
});
if (musicfiles.length !== 0){
return parseMetadata(musicfiles) ;
}
else{
return Promise.resolve([]) ;
}
}
fetchNewSongs(directories).then( value => {
console.log(value)
})
Problem arises when any music file is corrupted or it's metadata cannot be fetched by music-metadata causing the flow of parsing the metadata list to stop. I tried to rename a .txt to .mp3 to reconstruct situation of a corrupted file. What I want is whenever parsing the metadata of a particular music file if an error occurs it just return empty array and then continue searching for other files. After the process is complete then removing those elements of array having empty objects.
I think you are missing a try/catch in your map function:
Mocked version:
const mm = {
parseFile(file) {
return Promise.reject("Bad format");
},
};
async function parseMetadata(files) {
let metadata = files.map(async (file) => {
try {
metadata = await mm.parseFile(file, { duration: true });
} catch (error) {
return [];
}
m = metadata.common;
return m;
});
return Promise.all(metadata);
}
async function fetchNewSongs(dirs = ["foo", "bar", "baz"]) {
return parseMetadata(dirs);
}
fetchNewSongs().then(console.log, console.error);
// output : [ [], [], [] ]
As an addition you might go for a for loop and avoid having to filter your array afterward
const mm = {
parseFile(file) {
return Promise.reject("Bad format");
},
};
async function parseMetadata(files) {
const metadataCollection = [];
for (const file of files) {
try {
const metadata = await mm.parseFile(file, { duration: true });
metadataCollection.push(metadata);
} catch (error) {
console.warn(error);
}
}
return metadataCollection;
}
async function fetchNewSongs(dirs = ["foo", "bar", "baz"]) {
return parseMetadata(dirs);
}
fetchNewSongs().then(console.log, console.error);
// outputs:
// Bad format
// Bad format
// Bad format
// []
I am creating a program in which users can preserve search terms from session to session. If a search term is preserved by the user, then the data that corresponds to that search term is also preserved. At the beginning of each session, the script discards any old data that should be preserved if it no longer corresponds to an active or connected drive.
However, I am new to working with JSON and such objects in general, so I am wondering if there is a better way to accomplish this than the way below? Specifically, is there an approach that is more efficient or even prettier than the first for of loop and the heavily nested if,for,for,if block of code?
async function guaranteeData(drives){
const
config = await readJson('./config.json'),
data = await readJson('./data.json')
// Is there a better way to write this code?
let
json = {}
for (const [drive] of drives) {
json[drive] = {}
}
// if tags have been preserved
if (config.fileTypes.length > 0) {
// loop thru all current system drives
for (const [drive] of drives) {
// loop thru all preserved tags
for (const fileType of config.fileTypes) {
// if current drive has current tag data
if (data[drive].hasOwnProperty(fileType)) {
// preserve this data: data[drive][fileType]
json[drive][fileType] = data[drive][fileType]
}
}
}
}
////////////////////////////////////////////
json = JSON.stringify(json, null, 2)
await fsp.writeFile('./data.json', json, {
flag: 'w',
encoding: 'utf8'
})
.then(() => {
return true
})
.catch(error => {
if (error.code === 'EEXIST') {
return false
} else {
throw error
}
})
}
I would change two things in this
Remove the if (config.fileTypes.length > 0) check as it will be handled the for loop which is afterwards.
Remove the for loop which assigns the json[drive] to empty object inside the for loop where you have nested for loops. This will remove that for loop as well.
It will looks something like
async function guaranteeData(drives) {
const config = await readJson("./config.json");
const data = await readJson("./data.json");
const json = {};
// if tags have been preserved
// loop thru all current system drives
for (const [drive] of drives) {
json[drive] = {};
// loop thru all preserved tags
for (const fileType of config.fileTypes) {
// if current drive has current tag data
if (data[drive].hasOwnProperty(fileType)) {
// preserve this data: data[drive][fileType]
json[drive][fileType] = data[drive][fileType];
}
}
}
////////////////////////////////////////////
json = JSON.stringify(json, null, 2);
await fsp
.writeFile("./data.json", json, {
flag: "w",
encoding: "utf8"
})
.then(() => {
return true;
})
.catch(error => {
if (error.code === "EEXIST") {
return false;
} else {
throw error;
}
});
}