fs.readFileSync & path - javascript

I am relatively new to Node.js and have been looking around but cannot find a solution.
I want to read files from a subfolder 'filesPath'. I don't know how to write fs.readFileSync correctly
That is my idea. It works as let pdffile = fs.readFileSync(files[i]), but does not works as let pdffile = fs.readFileSync(filesPath, files[i]). Can you help me?
In example array is empty, but I cllect them in previous step.
var fs = require('fs')
const filesPath = path.join(__dirname, '/downloaded_files')
var files = []
function getNumbersAndPin() {
for (let i = 0; i < files.length; i++) {
let pdffile = fs.readFileSync(filesPath, files[i])
//let pdffile = fs.readFileSync(files[i]) //It works but looks for files in __dirname
pdfparse(pdffile).then(function (data) {
console.log(data.text.slice(-23))
})
}
}
setTimeout(getNumbersAndPin, 3000)

Check the documentation https://nodejs.org/api/fs.html#fsreadfilesyncpath-options. The second argument to readFileSync expects "options", not a file name or alike. Furthermore, your "files" array is empty.

As mentioned in a comment, you need to call path.join again. From this:
let pdffile = fs.readFileSync(filesPath, files[i])
to
let filePath = path.join(filesPath, '/', files[i])
let pdffile = fs.readFileSync(filePath)

Related

Not able to read files content inside NodeJS

I have some markdown files inside /markdown folder. I am trying to read content of these files. I can see the file names inside the array. But when I try to read it, it doesn't return any data or error. What needs to be done here?
app.get("/", async(req, res) => {
const mdPath = "...path"
const data = await fs.readdirSync(mdPath);
console.log(data) // Return Array of files
for (let i = 0; i <= data.length; i++) {
const fileContent = fs.readFileSync(i, "utf-8");
return fileContent;
}
})
You should use something like path() to better handle the filesystem side.
This could work your way:
const fs = require('fs') // load nodejs fs lib
const path = require('path') // load nodejs path lib
const mdPath = 'md' // name of the local dir
const data = fs.readdirSync(path.join(__dirname, mdPath)) //join the paths and let fs read the dir
console.log('file names', data) // Return Array of files
for (let i = 0; i < data.length; i++) {
console.log('file name:', data[i]) // we get each file name
const fileContent = fs.readFileSync(path.join(__dirname, mdPath, data[i]), 'utf-8') // join dir name, md folder path and filename and read its content
console.log('content:\n' + fileContent) // log its content
}
I created a folder ./md, containing the files one.md, two.md, three.md. The code above logs their content just fine.
>> node .\foo.js
file names [ 'one.md', 'three.md', 'two.md' ]
file name: one.md
content:
# one
file name: three.md
content:
# three
file name: two.md
content:
# two
Note that there is no error handling for anything that could go wrong with reading files.

Unable to fetch all text file names recursively from a directory

I'm trying to fetch all text files from a directory in a recursive manner (i.e. search all sub-folders):
let fs = require("fs");
function getPathNames(dirName) {
let pathNames = [];
for (let fileName of fs.readdirSync(dirName)) {
let pathName = dirName + "/" + fileName;
if (fs.statSync(pathName).isDirectory())
pathNames.concat(getPathNames(pathName));
else if (pathName.endsWith(".txt"))
pathNames.push(pathName);
}
return pathNames;
}
However, when I call getPathNames("."), I get only the name of the first file.
It works fine if I take the return-value out of the function, and update a global variable instead:
let fs = require("fs");
let pathNames = [];
function getPathNames(dirName) {
for (let fileName of fs.readdirSync(dirName)) {
let pathName = dirName + "/" + fileName;
if (fs.statSync(pathName).isDirectory())
getPathNames(pathName);
else if (pathName.endsWith(".txt"))
pathNames.push(pathName);
}
}
Does anyone spot anything wrong with the first method?
Well, concat is not an in place mutation, but returns you a new array instead, so I would say you should do this instead
pathNames = pathNames.concat(getPathNames(pathName));

Create Structured Data

lets assume i have this (snippet):
var filesarray = [];
if(req.files){
req.files.forEach(function(file){
var fileName = file.filename;
var filePath = file.path;
filesarray.push(
filePath
);
})
}
And later i push it with mongoose:
DB.create({
filepaths: filesarray,
}), function (err, res) {
if (err) {
throw err;
}else{
console.log("1 document inserted");
DB.close()
}}
});
The result i receive is not really what i want, because in mongodb i get a comma separated list, like:
filepaths
/files/1540474914824.png,/files/1540474914828.png,files/1540474914831.png
I would like to have something like:
filepaths
filename -> filepath
filename -> filepath
filename -> filepath
i hope i could make clear whats the goal. I am sure there is a elegant way to reacht the goal, so could someone please give me a direction.
Thanks,
Regards
First off you cant have more than one key in am object so {filepaths: {file: '1', file: '2'}} will not work. You will need to have a unique name per file/path
var files = {};
if(req.files){
req.files.forEach(function(file){
var fileName = file.filename;
var filePath = file.path;
files[fileName] = filePath;
})
}
you could use map to have an array of object but this seems more cumbersome to me
var files;
if(req.files){
files = req.files.map(function(file){
var fileName = file.filename;
var filePath = file.path;
return { [fileName]: filePath };
})
}

Create ReadStream from Base64 encoded string by file

I feel I got lack of understanding about Buffer and File Stream, but I can't find any specific idea from other answers which is treating base64 string as a actual file.
I used 'request' package from Npm, to send a file to other server by http, multipart protocol.
The code below is working well, read a file from actual file by 'fs' package and send it by ReadStream object by createReadStream method.
(The codes is coffeescript)
#working
res = request.postSync 'http://anotherUrl/uploadDocument',
formData: file: fs.createReadStream('/path/' + 'myfile.doc')
What I want to do is creating a same ReadStream object by fs module from a file Based64 encoded String.
I tested something like this, but it's not working properly.
#not working
res = request.postSync 'http://anotherUrl/uploadDocument',
formData: file: new Buffer(base64EncodedString, 'base64')
#not working
res = request.postSync 'http://anotherUrl/uploadDocument',
formData: file: _base64ToArrayBuffer(base64EncodedString)
#not working
res = request.postSync 'http://anotherUrl/uploadDocument',
formData: file: convertDataURIToBinary(base64EncodedString)
##(used function)
_base64ToArrayBuffer = (base64) ->
binary_string = require('atob')(base64)
len = binary_string.length
bytes = new Uint8Array(len)
i = 0
while i < len
bytes[i] = binary_string.charCodeAt(i)
i++
bytes.buffer
convertDataURIToBinary = (dataURI) ->
BASE64_MARKER = ';base64,'
base64Index = dataURI.indexOf(BASE64_MARKER) + BASE64_MARKER.length
base64 = dataURI.substring(base64Index)
raw = require('atob')(base64)
rawLength = raw.length
array = new Uint8Array(new ArrayBuffer(rawLength))
i = 0
while i < rawLength
array[i] = raw.charCodeAt(i)
i++
array
question conclusion
The 'base64EncodedString' is validated by decode & creating file, so i don't doubt it is the matter, so I think I could have this achievement with write file from base64 and read it again with fs module, but I believe that's not the proper way.
The point of question is,
1. How could I send the base64encoded string as a ReadStream object properly in this case
2. How could I figure out relationship clearly between buffer and dataview(uint8array ...) something like that
Thanks in advance.
appendix
From #Alex Nikulin 's comment, I've tested stream-buffers package.
streamBuffers = require('stream-buffers')
myReadableStreamBuffer = new (streamBuffers.ReadableStreamBuffer)(
frequency: 10
chunkSize: 2048)
myReadableStreamBuffer.put base64.decode example
myReadableStreamBuffer.stop()
And when I tried bellow it failed again, and the 'AnotherUrl' returns Error message like this, 'unexpected end of part'
res = request.postSync 'http://anotherUrl/uploadDocument',
formData: file: myReadableStreamBuffer
I figured out the myReadableStreamBuffer object is Readable object, so it might be different with ReadStream object. Can I get it as a ReadStream from myReadableStreamBuffer?
Try this
//if you need just a buffer
var base64ToBuffer = function(base64) {
var byteString = new Buffer(base64, 'base64').toString('binary');
var ab = new Buffer(byteString.length);
var ia = new Uint8Array(ab);
for (var i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
return ab;
}
//if you need a stream, not a buffer
var stream = require('stream');
// Initiate the source
var bufferStream = new stream.PassThrough();
// Write your buffer
bufferStream.end(base64ToBuffer(base64));
bufferStream.pipe( process.stdout );

Get docx file contents using javascript/jquery

I want to open / read docx file using client side technologies (HTML/JS).
I have found a Javascript library named docx.js but personally cannot seem to locate any documentation for it.
(http://blog.innovatejs.com/?p=184)
The goal is to make a browser based search tool for docx files and txt files.
With docxtemplater, you can easily get the full text of a word (works with docx only) by using the doc.getFullText() method.
HTML code:
<body>
<button onclick="gettext()">Get document text</button>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.26.2/docxtemplater.js"></script>
<script src="https://unpkg.com/pizzip#3.1.1/dist/pizzip.js"></script>
<script src="https://unpkg.com/pizzip#3.1.1/dist/pizzip-utils.js"></script>
<script>
function loadFile(url, callback) {
PizZipUtils.getBinaryContent(url, callback);
}
function gettext() {
loadFile(
"https://docxtemplater.com/tag-example.docx",
function (error, content) {
if (error) {
throw error;
}
var zip = new PizZip(content);
var doc = new window.docxtemplater(zip);
var text = doc.getFullText();
console.log(text);
alert("Text is " + text);
}
);
}
</script>
I know this is an old post, but doctemplater has moved on and the accepted answer no longer works. This worked for me:
function loadDocx(filename) {
// Read document.xml from docx document
const AdmZip = require("adm-zip");
const zip = new AdmZip(filename);
const xml = zip.readAsText("word/document.xml");
// Load xml DOM
const cheerio = require('cheerio');
$ = cheerio.load(xml, {
normalizeWhitespace: true,
xmlMode: true
})
// Extract text
let out = new Array()
$('w\\:t').each((i, el) => {
out.push($(el).text())
})
return out
}
You can try docxyz.
let {Document} = require('docxyz');
let fileName = 'yourfile.docx';
let document = new Document(fileName);
let text = document.text;
console.log(text);
No tables.
let {Document} = require('docxyz');
let fileName = 'yourfile.docx';
let document = new Document(fileName);
let a = [];
for(let paragraph of document.paragraphs){
a.push(paragraph.text);
}
let text = a.join('\n');
console.log(text);
This solution will give you an array of strings, one element for each paragraph in the docx :
const PizZip = require("pizzip");
const { DOMParser, XMLSerializer } = require("#xmldom/xmldom");
const fs = require("fs");
const path = require("path");
function str2xml(str) {
if (str.charCodeAt(0) === 65279) {
// BOM sequence
str = str.substr(1);
}
return new DOMParser().parseFromString(str, "text/xml");
}
function getParagraphs(content) {
const zip = new PizZip(content);
const xml = str2xml(zip.files["word/document.xml"].asText());
const paragraphsXml = xml.getElementsByTagName("w:p");
const paragraphs = [];
for (let i = 0, len = paragraphsXml.length; i < len; i++) {
let fullText = "";
const textsXml =
paragraphsXml[i].getElementsByTagName("w:t");
for (let j = 0, len2 = textsXml.length; j < len2; j++) {
const textXml = textsXml[j];
if (textXml.childNodes) {
fullText += textXml.childNodes[0].nodeValue;
}
}
paragraphs.push(fullText);
}
return paragraphs;
}
// Load the docx file as binary content
const content = fs.readFileSync(
path.resolve(__dirname, "examples/cond-image.docx"),
"binary"
);
// Will print ['Hello John', 'how are you ?'] if the document has two paragraphs.
console.log(getParagraphs(content));
Source : https://docxtemplater.com/faq/#how-can-i-retrieve-the-docx-content-as-text
If you want to be able to display the docx files in a web browser, you might be interested in Native Documents' recently released commercial Word File Editor; try it at https://nativedocuments.com/test_drive.html
You'll get much better layout fidelity if you do it this way, than if you try to convert to (X)HTML and view it that way.
It is designed specifically for embedding in a webapp, so there is an API for loading documents, and it will sit happily within the security context of your webapp.
Disclosure: I have a commercial interest in Native Documents

Categories

Resources