Read all files in a directory and parse them to JSON

Read all files in a directory and parse them to JSON - javascript

I have a directory full of txt files containing json content. I would like to read the whole directory and rename the files according to the json tag value label.
I know how to read a single file using the below code but how do you read a whole directory?
function readTextFile(file) {
var rawFile = new XMLHttpRequest();
rawFile.open("GET", file, false);
rawFile.onreadystatechange = function () {
if (rawFile.readyState === 4) {
if (rawFile.status === 200 || rawFile.status == 0) {
var allText = rawFile.responseText;
alert(allText);
}
}
}
rawFile.send(null);
}

These code gives you list of your files in folder:
var fs = require('fs');
var files = fs.readdirSync('/assets/photos/');
Then you can iterate these list and do your code.

Using the node filesystem (fs) module you can do what you want assuming it's all locally accessible and you have permissions. Here's a way it could work:
const fs = require("fs");
const dir = "/path/to/the/directory";
// get the directory contents
const files = fs.readdirSync(dir);
for (const file of files) {
// for each make sure it's a file (not a subdirectory)
const stat = fs.statSync(file);
if (stat.isFile()) {
// read in the file and parse it as JSON
const rawdata = fs.readFileSync(file);
try {
const json = JSON.parse(rawdata);
if (json.label) {
// build the new filename using 'label'
const newfile = `${dir}/${label}.json`;
fs.renameSync(file, newfile)
}
}
catch (err) {
console.log(`Error working with ${file}. Err: ${err}`);
}
}
}
That's the idea. Additional error checking can be done for safety like making sure the new filename doesn't already exist.

Related

Not able to read files content inside NodeJS

I have some markdown files inside /markdown folder. I am trying to read content of these files. I can see the file names inside the array. But when I try to read it, it doesn't return any data or error. What needs to be done here?
app.get("/", async(req, res) => {
const mdPath = "...path"
const data = await fs.readdirSync(mdPath);
console.log(data) // Return Array of files
for (let i = 0; i <= data.length; i++) {
const fileContent = fs.readFileSync(i, "utf-8");
return fileContent;
}
})

You should use something like path() to better handle the filesystem side.
This could work your way:
const fs = require('fs') // load nodejs fs lib
const path = require('path') // load nodejs path lib
const mdPath = 'md' // name of the local dir
const data = fs.readdirSync(path.join(__dirname, mdPath)) //join the paths and let fs read the dir
console.log('file names', data) // Return Array of files
for (let i = 0; i < data.length; i++) {
console.log('file name:', data[i]) // we get each file name
const fileContent = fs.readFileSync(path.join(__dirname, mdPath, data[i]), 'utf-8') // join dir name, md folder path and filename and read its content
console.log('content:\n' + fileContent) // log its content
}
I created a folder ./md, containing the files one.md, two.md, three.md. The code above logs their content just fine.
>> node .\foo.js
file names [ 'one.md', 'three.md', 'two.md' ]
file name: one.md
content:
# one
file name: three.md
content:
# three
file name: two.md
content:
# two
Note that there is no error handling for anything that could go wrong with reading files.

Convert buffer to file [FileCollection:meteor/ostrio:files]

when i get the image from the input
i have to convert it to a buffer to make some operations with the image, so as a result a i have a buffer instead of file.
im using FileCollection in meteor to store the image in mongo collection
uploadIt(e) {
e.preventDefault();
var reader = new FileReader();
var buffer;
var file = e.currentTarget.files[0];
if (e.currentTarget.files && e.currentTarget.files[0]) {
reader.onload = function(e){
buffer = new Uint8Array(reader.result);
// some operations over the buffer
};
reader.readAsArrayBuffer(file);
if (file) {
let uploadInstance = CourseFilesCollection.insert({
file: buffer,
..
..
})
}
}
but when i insert it got this error
message: "[FilesCollection] [insert] Have you forget to pass a File itself?
the code originally was
if (file) {
let uploadInstance = CourseFilesCollection.insert({
file: file,
..
..
})
}
but since i had to perfom operations over the the image i need to someway conver the buffer to file
any ideas how to solve this ?

Short answer
use the file constructor to turn bits back to a file container:
file: new File([buffer], file.name, file)
you could try using blob also with wider browser support... but if you want to use the latest tech, then:
async uploadIt (evt) {
evt.preventDefault()
const file = evt.currentTarget.files[0]
if (!file) return
const buffer = new Uint8Array(await file.arrayBuffer())
// some operations over the buffer
const uploadInstance = CourseFilesCollection.insert({
file: new File([buffer], file.name, file)
})
}

Node unzip . How to make file name

Using unzip module by NPM:
When convert, the name of converted file would be doc.xml, since unzipped xml file.
Don't want the name doc, so did this:
'use strict';
var fs = require('fs')
var unzip = require('unzip')
convert(process.argv[2], process.argv[3])
function convert(path, fileName) {
fs.createReadStream(path)
.pipe(unzip.Extract({ path: '/users/*****/desktop/templatexml/' + fileName + '.xml' }))
}
And ran this:
node /Users/*****/Desktop/converter/converter.js /Users/******/Desktop/template/103.zip 103
But the result will always be "fileName.xml/doc.xml"
"fileName.xml" appears as a directory name.
Want to change doc part.
And if possible, want to get the file name from original file path.
Inside process.argv[2] is like
"/Users/*****/Desktop/template/fileName.zip"
Want to get the fileName from here.
Thank you for comment, like this.
But didn't work.
'use strict';
var fs = require('fs')
var unzip = require('unzip')
convert(process.argv[2])
function convert(path) {
fs.createReadStream(path)
.pipe(unzip.Parse())
.on('entry', function(entry) {
var fileName = entry.path;
var type = entry.type; // 'Directory' or 'File'
var size = entry.size;
if (fileName === "this IS the file I'm looking for") {
entry.pipe(fs.createWriteStream('/users/*****/desktop/templatexml/'));
} else {
entry.autodrain();
}
});
}

fs.createReadStream('path/to/archive.zip')
.pipe(unzip.Parse())
.on('entry', function (entry) {
var fileName = entry.path;
var type = entry.type; // 'Directory' or 'File'
var size = entry.size;
if (fileName === "this IS the file I'm looking for") {
entry.pipe(fs.createWriteStream('output/path'));
} else {
entry.autodrain();
}
});

Node.js convert binary file to utf8

I have a jrmxl (Jasper report) file stored in a postgresql database in a binary format (bytea). I'm trying to read that file and convert it into a plain jrmxl (XML) file and save it on the disk.
Here is what i've tried so far
var fs = require('fs');
exports.saveFile = function (pg) {
//pg is the postgres connection to query the db
pg.query('Select data from data_file where id = 123', function (err, result) {
if (err) {
console.log(err);
return;
}
var data = result.rows[0].data;
//Buffer.isBuffer(data) === true
// I can get the data here. Now I try to convert it into text
var file = data.toString('utf8');
fs.writeFile('report.jrxml',file, function (er) {
if (er) {
console.log('an error occurred while saving the file');
return;
}
console.log('file saved');
}}
});
}
If i run the code above, the file is saved but it's somehow binary.
How can i convert this to a plain xml file in text format that i can import in ireport for example?

You might try going through a buffer first. I have used this technique to transform DB BLOBs into base64 strings.
var fileBuffer = new Buffer( result.rows[0].data, 'binary' );
var file = fileBuffer.toString('utf8');

I use 'pako' npm package to resolve that issue:
import { connection, Message } from 'websocket';
import * as pako from 'pako';
protected async onCustomMessage(message: Message, con): Promise<void> {
let data;
let text;
if (message.type === 'utf8') {
// console.log("Received UTF8: '" + message.utf8Data + "'");
text = message.utf8Data;
data = JSON.parse(text);
} else {
const binary = message.binaryData;
text = pako.inflate(binary, {
to: 'string',
});
data = JSON.parse(text);
}
}
npm i pako && npm i -D #types/pako

How to download and unzip a zip file in memory in NodeJs?

I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?
Here is what I tried:
var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';
var request = require('request'), fs = require('fs'), zlib = require('zlib');
request.get(url, function(err, res, file) {
if(err) throw err;
zlib.unzip(file, function(err, txt) {
if(err) throw err;
console.log(txt.toString()); //outputs nothing
});
});
[EDIT]
As, suggested, I tried using the adm-zip library and I still cannot make this work:
var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
if(err) throw err;
var zip = new ZipEntry();
zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
var text = zip.getData();
console.log(text.toString()); // fails
});

You need a library that can handle buffers. The latest version of adm-zip will do:
npm install adm-zip
My solution uses the http.get method, since it returns Buffer chunks.
Code:
var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
var AdmZip = require('adm-zip');
var http = require('http');
http.get(file_url, function(res) {
var data = [], dataLen = 0;
res.on('data', function(chunk) {
data.push(chunk);
dataLen += chunk.length;
}).on('end', function() {
var buf = Buffer.alloc(dataLen);
for (var i = 0, len = data.length, pos = 0; i < len; i++) {
data[i].copy(buf, pos);
pos += data[i].length;
}
var zip = new AdmZip(buf);
var zipEntries = zip.getEntries();
console.log(zipEntries.length)
for (var i = 0; i < zipEntries.length; i++) {
if (zipEntries[i].entryName.match(/readme/))
console.log(zip.readAsText(zipEntries[i]));
}
});
});
The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.
Update
This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.
var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
var AdmZip = require('adm-zip');
var request = require('request');
request.get({url: file_url, encoding: null}, (err, res, body) => {
var zip = new AdmZip(body);
var zipEntries = zip.getEntries();
console.log(zipEntries.length);
zipEntries.forEach((entry) => {
if (entry.entryName.match(/readme/i))
console.log(zip.readAsText(entry));
});
});
The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.

Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!
I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.
If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped
You can use the AMDZip (as #mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:
$ npm install git://github.com/cthackers/adm-zip.git
N.B. Assumption: the zip file contains only one file
var request = require('request'),
fs = require('fs'),
csv = require('csv')
NodeZip = require('node-zip')
function getData(tmpFolder, url, callback) {
var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()
var tempZipFileStream = fs.createWriteStream(tempZipFilePath)
request.get({
url: url,
encoding: null
}).on('end', function() {
fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {
var zip = new NodeZip(zipContent, { base64: true })
Object.keys(zip.files).forEach(function (filename) {
var tempFilePath = tmpFolder + new Date().getTime() + Math.random()
var unzipped = zip.files[filename].data
fs.writeFile(tempFilePath, unzipped, function (err) {
callback(err, tempFilePath)
})
})
})
}).pipe(tempZipFileStream)
}
getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {
if (err) {
return console.error('error: %s' + err.message)
}
var metadata = []
csv().fromPath(path, {
delimiter: '|',
columns: true
}).transform(function (data){
// do things with your data
if (data.NAME[0] === '#') {
metadata.push(data.NAME)
} else {
return data
}
}).on('data', function (data, index) {
console.log('#%d %s', index, JSON.stringify(data, null, ' '))
}).on('end',function (count) {
console.log('Metadata: %s', JSON.stringify(metadata, null, ' '))
console.log('Number of lines: %d', count)
}).on('error', function (error) {
console.error('csv parsing error: %s', error.message)
})
})

If you're under MacOS or Linux, you can use the unzip command to unzip from stdin.
In this example I'm reading the zip file from the filesystem into a Buffer object but it works
with a downloaded file as well:
// Get a Buffer with the zip content
var fs = require("fs")
, zip = fs.readFileSync(__dirname + "/test.zip");
// Now the actual unzipping:
var spawn = require('child_process').spawn
, fileToExtract = "test.js"
// -p tells unzip to extract to stdout
, unzip = spawn("unzip", ["-p", "/dev/stdin", fileToExtract ])
;
// Write the Buffer to stdin
unzip.stdin.write(zip);
// Handle errors
unzip.stderr.on('data', function (data) {
console.log("There has been an error: ", data.toString("utf-8"));
});
// Handle the unzipped stdout
unzip.stdout.on('data', function (data) {
console.log("Unzipped file: ", data.toString("utf-8"));
});
unzip.stdin.end();
Which is actually just the node version of:
cat test.zip | unzip -p /dev/stdin test.js
EDIT: It's worth noting that this will not work if the input zip is too big to be read in one chunk from stdin. If you need to read bigger files, and your zip file contains only one file, you can use funzip instead of unzip:
var unzip = spawn("funzip");
If your zip file contains multiple files (and the file you want isn't the first one) I'm afraid to say you're out of luck. Unzip needs to seek in the .zip file since zip files are just a container, and unzip may just unzip the last file in it. In that case you have to save the file temporarily (node-temp comes in handy).

Two days ago the module node-zip has been released, which is a wrapper for the JavaScript only version of Zip: JSZip.
var NodeZip = require('node-zip')
, zip = new NodeZip(zipBuffer.toString("base64"), { base64: true })
, unzipped = zip.files["your-text-file.txt"].data;

Develop Reference

JavaScript is the programming language of the Web.

Read all files in a directory and parse them to JSON - javascript

These code gives you list of your files in folder: var fs = require('fs'); var files = fs.readdirSync('/assets/photos/'); Then you can iterate these list and do your code.

Related

Not able to read files content inside NodeJS

Convert buffer to file [FileCollection:meteor/ostrio:files]

Node unzip . How to make file name

Node.js convert binary file to utf8

How to download and unzip a zip file in memory in NodeJs?

Categories

Resources