How to get xml2js results out of the parser in ES6? - javascript

I'm building a server in Node that will search a folder to see if an XML file exists (glob), and if it does, read the file in (fs) as a JSON object (xml2js) and eventually store it in a database somewhere. I'm want to get the results OUT of the parser and into another variable so I can do other things with the data. From what I can tell, something is running synchronously, but I can't figure out how to stop it and for me to wait until it's finished to continue moving on.
I'm separating my function out into a controller elsewhere from app.js:
app.controller.js
const fs = require('fs-extra');
const glob = require('glob');
const xml2js = require('xml2js');
exports.requests = {};
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
// this is the only place await works...
await glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
var parser = new xml2js.Parser();
// this is looking for a specific file now, which I'll address later once I can figure out this issue
fs.readFile('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le', function (err, data) {
if (err) {
console.log('ERROR: ', err);
} else {
parser.parseString(data, (err, result) => {
if (err) {
console.log('ERROR: ', err);
} else {
console.log('data found');
exports.requests = JSON.stringify(result.Records.Record);
// data is outputted here correctly
console.log(exports.requests);
// this doesn't even seem to want to save to exports.requests anyways...
}
});
}
});
});
}
app.js
const appController = require('./controllers/app.controller');
// check if there is file in filedrop
appController.checkFileDrop();
// prints out an empty object
console.log(appController.requests);
// can't do anything if it doesn't exist yet
appController.saveToDB(appController.requests);

await will wait for a Promise value to resolve, otherwise it'll just wrap the value it is given in a promise and resolve the promise right away. In your example,
await glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
the call to glob does not return a Promise, so the await is essentially useless. So you need to create the promise yourself.
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
const files = await new Promise((resolve, reject) => glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
if (err) reject(err);
else resolve(files);
});
const parser = new xml2js.Parser();
const data = await new Promise((resolve, reject) => fs.readFile('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le', function (err, data) {
if (err) reject(err);
else resolve(data);
});
const result = await new Promise((resolve, reject) => parser.parseString(data, (err, result) => {
if (err) reject(err);
else resolve(result);
});
console.log('data found');
const requests = JSON.stringify(result.Records.Record);
console.log(requests);
}
Note that now this function will reject the promise it returns instead of force-logging the error.
You can also condense this down with a helper. Node 8 for instance includes util.promisify to make code like this easier to write, e.g.
const util = require('util');
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
const files = await util.promisify(glob)('./filedrop/ALLREQUESTS-*.xml');
const parser = new xml2js.Parser();
const data = await util.promisify(fs.readFile)('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le');
const result = await util.promisify(parser.parseString.bind(parser))(data);
console.log('data found');
const requests = JSON.stringify(result.Records.Record);
console.log(requests);
}

You can use async/await
import fs from 'fs';
import { promisify } from 'util';
const xmlToJson = async filePath => {
const parser = new xml2js.Parser
try {
const data = await fs.promises.readFile(filePath, 'utf8')
const result = await promisify(parser.parseString)(data);
const requests = JSON.stringify(result.merchandiser.product);
return requests
}
catch(err) {
console.log(err)
}
}

Related

Using async/await syntax with node stream

I'm using node 18.7 on ubuntu. I'm trying to parse a bunch of csv files to objects (using csv-parse), ultimately to load into a db. Because there are large numbers of these I decided to try streams and I'd like to use the async await style.
So far I have:
const { parse } = require('csv-parse');
const path = __dirname + '/file1.csv';
const opt = { columns: true, relax_column_count: true, skip_empty_lines: true, skip_records_with_error: true };
console.log(path);
const { pipeline } = require('stream');
// const pipeline = stream.pipeline;
async function readByLine(path, opt) {
const readFileStream = fs.createReadStream(path);
var csvParser = parse(opt, function (err, records) {
if (err) throw err;
});
await pipeline(readFileStream, csvParser, (err) => {
if (err) {
console.error('Pipeline failed.', err);
} else {
console.log('Pipeline succeeded.');
}
});
for await (const record of csvParser) {
console.log(record);
}
}
readByLine(path, opt)
When I run this I see:
Pipeline succeeded.
But the parsed objects are not sent to the console. What am I doing wrong?
edit1:
I changed the code to :
async function readByLine(path, opt) {
const readFileStream = fs.createReadStream(path);
var csvParser = parse(opt, function (err, records) {
if (err) throw err;
});
await pipeline(readFileStream, csvParser, (err) => {
if (err) {
console.error('Pipeline failed.', err);
} else {
console.log('Pipeline succeeded.');
}
});
// for await (const record of csvParser) {
// console.log(record);
// }
return csvParser;
}
(async function () {
const o = await readByLine(path, opt);
console.log(o);
})();
The result is an object which has a million properties, but some look set like in the screenshot.
You can only useful await a promise.
The pipeline function you are using doesn't return a promise.
If you look at the documentation you will see:
The pipeline API provides a promise version, which can also receive an options argument as the last parameter with a signal <AbortSignal> property. When the signal is aborted, destroy will be called on the underlying pipeline, with an AbortError.
const { pipeline } = require('node:stream/promises');
const fs = require('node:fs');
const zlib = require('node:zlib');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
Note the different value passed to require. Use that version of pipeline instead.

Sharp Image Metadata Extraction Error - Input file contains unsupported image format

I am seeing the following error when trying to extract an image's metadata information with the Sharp module: "Input file contains unsupported image format".
This is only happening for certain signed image urls, particularly ones that contain xmp information in the metadata.
I am hoping someone can help me spot where the issue might be occurring in this code snippet.
Here is the exact code snippet I am using (insert the signed image URL where specified in the doStuff function to test):
const sharp = require("sharp");
const fs = require('fs');
const fetch = require('node-fetch');
async function storeUrlToLocal(sourceUrl) {
const destPath = './';
const request = {
method: 'GET',
encoding: null,
};
response = await fetch(sourceUrl, request);
if (response.status >= 400)
throw new Error(`Failed to fetch data from ${sourceUrl}, status returned = ${response.status}`);
const localPath = `${destPath}test.png`;
const fileStream = fs.createWriteStream(localPath);
return new Promise((resolve, reject) => {
response.body.pipe(fileStream);
response.body.on("error", reject);
response.body.on("end", async () => {
const fileExists = fs.existsSync(localPath);
console.log(`All the data in the file has been read ${localPath} = ${fileExists}`);
resolve(localPath);
});
response.body.on("finish",() => {
console.log('All writes are now complete.');
});
}).catch(error => {
console.log(error);
});
}
async function doStuff() {
const localFilePath = await storeUrlToLocal('<INSERT_IMAGE_URL_HERE>');
// Read image file and extract metadata
let manipulator;
let imageMetadata;
try {
manipulator = sharp(localFilePath, { limitInputPixels: 5000000000 });
console.log('Manipulator = ', manipulator);
imageMetadata = await manipulator.metadata();
console.log("ImageMetadata = ", imageMetadata);
} catch (error) {
console.log(`Image Metadata Extraction Error: ${error.message}`);
throw error;
}
}
doStuff();
This code snippet above fails with the "Input file contains unsupported image format" on the line that extracts metadata (imageMetadata = await manipulator.metadata();)
So the strange thing is, I am able to properly extract the metadata (with no errors) with this same code if I add a short Sleep after this line: const fileStream = fs.createWriteStream(localPath);
So this code snippet (all I'm doing here is adding a short sleep after fs.createWriteSteam) allows the image metadata to be extracted without issue:
const sharp = require("sharp");
const fs = require('fs');
const fetch = require('node-fetch');
async function storeUrlToLocal(sourceUrl) {
const destPath = './';
const request = {
method: 'GET',
encoding: null,
};
response = await fetch(sourceUrl, request);
if (response.status >= 400)
throw new Error(`Failed to fetch data from ${sourceUrl}, status returned = ${response.status}`);
const localPath = `${destPath}test.png`;
const fileStream = fs.createWriteStream(localPath);
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
await sleep(1000);
return new Promise((resolve, reject) => {
response.body.pipe(fileStream);
response.body.on("error", reject);
response.body.on("end", async () => {
const fileExists = fs.existsSync(localPath);
console.log(`All the data in the file has been read ${localPath} = ${fileExists}`);
resolve(localPath);
});
response.body.on("finish",() => {
console.log('All writes are now complete.');
});
}).catch(error => {
console.log(error);
});
}
async function doStuff() {
const localFilePath = await storeUrlToLocal('<INSERT_IMAGE_URL_HERE>');
// Read image file and extract metadata
let manipulator;
let imageMetadata;
try {
manipulator = sharp(localFilePath, { limitInputPixels: 5000000000 });
console.log('Manipulator = ', manipulator);
imageMetadata = await manipulator.metadata();
console.log("ImageMetadata = ", imageMetadata);
} catch (error) {
console.log(`Image Metadata Extraction Error: ${error.message}`);
throw error;
}
}
doStuff();
Why would this Sleep resolve my issues? I don't see any asynchronous calls being run that I would need to be waiting for to complete. Perhaps fs.createWriteStream didn't have enough time to complete its operation? But I do not have the option to await the call to fs.createWriteStream, as it is not async.

How to get value inside foreach in nodejs

I'm trying to develop a simple app that if you pass a parameter in command line the application will search inside a directory and if the text match in some of the files the file should be save in a list, but when I put the console.log the value is not updated
here is my code:
const folder = "./movies/data";
const fs = require("fs");
var args = process.argv.slice(2);
console.log("myArgs: ", args);
var count = 0;
var list = [];
fs.readdir(folder, (err, files) => {
files.forEach((file) => {
fs.readFile(`movies/data/${file}`, "utf8", function (err, data) {
if (err) console.log(err);
if (data.includes("walt disney")) {
count++;
list.push(data);
console.log("Found in: ", data);
}
});
});
console.log(`Foram encontradas ${count} ocorrĂȘncias pelo termo ${args}.`);
});
any suggestions about what i'm doing wrong?
For your program to work, you will have to add some Promise / async/await logic. On the moment you try to read from the files, the files are still undefined so the fs.readDir() function will not provide the wanted result.
This should work:
const { resolve } = require('path');
const { readdir } = require('fs').promises;
const fs = require("fs");
var args = process.argv.slice(2);
const pathToFiles = "./movies/";
async function getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
const files = await Promise.all(dirents.map((dirent) => {
const res = resolve(dir, dirent.name);
return dirent.isDirectory() ? getFiles(res) : res;
}));
return Array.prototype.concat(...files);
}
getFiles(pathToFiles)
.then(files => {
console.log(files)
files.forEach((file) => {
fs.readFile(file, 'utf8', (err, data) => {
if (err) console.log(err);
if (data.includes(args)) {
console.log(`${args} found in ${file}.`);
} else {
console.log(`${args} not found.`);
}
});
})
})
.catch (e => console.error(e));

I called then() on a TypeScript promise but it is still pending. Why is this? How can I get it to resolve?

Here is the index.ts script I am running (based on something I found on reddit):
const path = require("path");
const sql = require("mssql");
const config = require(path.resolve("./config.json"));
let db1;
const connect = () => {
return new Promise((resolve, reject) => {
db1 = new sql.ConnectionPool(config.db, err => {
if (err) {
console.error("Connection failed.", err);
reject(err);
} else {
console.log("Database pool #1 connected.");
resolve();
}
});
});
};
const selectProjects = async (name) => {
const query = `
select * from [Time].ProjectData where [Name] like concat('%', concat(#name, '%'))`;
const request = new sql.Request(db1);
const result = await request
.input("name", name)
.query(query);
return result.recordset;
};
module.exports = {
connect,
selectProjects
};
connect().then(function() {
console.log(selectProjects('General'));
}).catch(function(err) {
console.log(err);
});
When I run the script using node index (after compiling it of course), I get this in the console:
Database pool #1 connected.
Promise { <pending> }
And then the script hangs.
Apparently the await keyword creates an implicit promise; I had to change the last function call to:
connect().then(function() {
selectProjects('General').then(function(data) {
console.log(data);
});
}).catch(function(err) {
console.log(err);
});

How should I download a file in Node? [duplicate]

I have this code that serves every markdown file in the './markdown' folder. At '/api/markdown/filename'.
var apiRouter = express.Router();
markdownFolder = './markdown/';
apiRouter.get('/:markdown_file_noext', function(req, res) {
fs.readdir(markdownFolder, function(err, markdown) {
if (err) throw err;
markdown.forEach(function(file) {
fs.readFile(markdownFolder + file, 'utf8', function(err, file_content) {
if (err) throw err;
fileNoExtension = file.slice(0, file.indexOf('.'));
if (req.params.markdown_file_noext == fileNoExtension) {
res.json({
'title': fileNoExtension,
'markdown': marked(file_content)
});
};
});
});
});
});
But i end having a ton of callbacks do the the nature of the 'fs' methods. How do i avoid this?
Using Q as promise library:
const Q = require('q');
const fs = require('fs');
const markdownFolder = './markdown/';
const readdir = Q.nfbind(fs.readdir);
const readFile = Q.nfbind(fs.readFile);
readdir(markdownFolder).then(markdown => {
const promises = [];
markdown.forEach(file => promises.push(readFile(markdownFolder + file, 'utf8')));
return Q.all(promises);
}).then(files => {
// Do your magic.
}).catch(error => {
// Do something with error.
});
You have different option.
Use named Function instead of anonymus functinos. It would make it a little bit more readable but you will still be using callbacks.
Use Promises, but you will need to use bluebird to wrap the fs module.
For a more advance option, you can use generators and Promises to make your code look more like a sync way. Take a look at co or bluebird.coroutine.
With Promises you could do like this:
const path = require('path');
var apiRouter = express.Router();
markdownFolder = './markdown/';
apiRouter.get('/:markdown_file_noext', function(req, res) {
readdir(markdownFolder)
.then((files) => {
const tasks = files.map((file) => {
const filePath = path.resolve(markdownFolder, file);
return readFile(filePath);
});
return Promise.all(tasks); // Read all files
})
.then((fileContents) => {
return fileContents.map((content) => {
fileNoExtension = file.slice(0, file.indexOf('.'));
if (req.params.markdown_file_noext == fileNoExtension) {
return {
'title': fileNoExtension,
'markdown': marked(content)
};
};
})
})
.then((results) => {
// It's better if you aggregate all results in one array and return it,
// instead of calling res.json for each result
res.json(results);
})
.catch((err) => {
// All errors are catched here
console.log(err);
})
});
function readdir(folderPath) {
return new Promise((resolve, reject) => {
fs.readdir(folderPath, (err, files) {
if (err) {
return reject(err);
}
resolve(files);
});
});
}
function readFile(filePath) {
return new Promise((resolve, reject) => {
fs.readFile(filePath, 'utf8', (err, file_content) => {
if (err) {
return reject(err);
}
resolve(file_content);
});
});
}

Categories

Resources