NodeJS Waiting for asynchronous function to complete foreach - javascript

Hi so i have been trying to make this tried using async module didn't really know how to convert this to one tried promising it didn't really work well i think i did it wrong so i reverted the function to the way it was at first
Basically i want to wait till the ReadJson() function is done with reading all the json files that are in the array then do other functions like editjson etc
Code:
App.js
const Reader = require('./Reader');
Reader.ReadJson();
Reader.js
const fsp = require('fs-promise');
const JsonFiles = ['json1.json', 'json2.json', 'json3.json', 'json4.json'];
const JsonContents = [];
class Reader {
static ReadJson() {
JsonFiles.forEach(name => {
let FileDir = "D:\\Development\\Java\\" + name;
fsp.readJson(FileDir).then(contents => {
if (contents) {
JsonContents.push(contents);
console.log(`Loaded >> ${name} ${Reader.JsonContents.length}/${JsonFiles.length}`);
}
});
});
console.log('Done Reading Json Content!');
//Other functions
}
}
Reader.JsonContents = JsonContents;
module.exports = Reader;
So basically the output is:
Done Reading Json Content!
Loaded >> json1.json 1/4
Loaded >> json2.json 2/4
Loaded >> json3.json 3/4
Loaded >> json4.json 4/4
When i need it to be:
Loaded >> json1.json 1/4
Loaded >> json2.json 2/4
Loaded >> json3.json 3/4
Loaded >> json4.json 4/4
Done Reading Json Content!
Thank you :)

Return a promise, track your progress in the forEach and resolve it only when JsonContents length is the same as JsonFiles length.
const fsp = require('fs-promise');
const JsonFiles = ['json1.json', 'json2.json', 'json3.json', 'json4.json'];
const JsonContents = [];
class Reader {
static ReadJson() {
return new Promise((resolve, reject) => {
JsonFiles.forEach(name => {
let FileDir = "D:\\Development\\Java\\" + name;
fsp.readJson(FileDir).then(contents => {
if (contents) {
JsonContents.push(contents);
console.log(`Loaded >> ${name} ${Reader.JsonContents.length}/${JsonFiles.length}`);
}
if (JsonContents.length == JsonFile.length) {
return resolve(JsonContents);
}
}).catch(err => {
return reject(err);
});
});
});
}
}
Reader.JsonContents = JsonContents;
module.exports = Reader;
And then use it in your app:
const Reader = require('./Reader');
Reader.ReadJson().then(() => { console.log('Done Reading Json Content!'); });
Another option is using Promise.all, because you are using fs-promise, but although it can be done with forEach, a regular for loop is better here.
const fsp = require('fs-promise');
const JsonFiles = ['json1.json', 'json2.json', 'json3.json', 'json4.json'];
const JsonContents = [];
class Reader {
static ReadJson() {
var promises = [];
for (let i = 0; i < JsonFiles.length; i++) {
let FileDir = "D:\\Development\\Java\\" + JsonFiles[i];
promises.push(fsp.readJson(FileDir).then(contents => {
if (contents) {
JsonContents.push(contents);
console.log(`Loaded >> ${JsonFiles[i]} ${Reader.JsonContents.length}/${JsonFiles.length}`);
}
}));
}
return Promise.all(promises);
}
}
Reader.JsonContents = JsonContents;
module.exports = Reader;

As an addendum to Ron Dadon's Promise.all method....
The Bluebird promise library provides some helper functions like Promise.map and Promise.filter that can remove a lot of the boiler plate of Promise array processing code.
const Promise = require('bluebird');
const fsp = require('fs-promise');
const path = require('path');
class Reader {
static readFiles(jsonPath, jsonFiles){
let fileReadCount = 0;
return Promise.map(jsonFiles, name => {
let filePath = path.join(jsonPath, name);
return fsp.readJson(filePath);
})
.filter((content, index, length) => {
if (!content) return false;
console.log(`Loaded >> ${jsonFiles[index]} ${index+1} / ${length}`);
return true;
})
}
static readJson() {
return this.readFiles(this.jsonPath, this.jsonFiles).then(contents => {
console.log('Done Reading Json Content!', contents);
return this.jsonContents = contents;
})
}
}
Reader.jsonFiles = ['json1.json', 'json2.json', 'json3.json', 'json4.json'];
Reader.jsonPath = 'D:\\Development\\Java';
module.exports = Reader;

Related

Node JS function that return https get request final edited data

Hello everybody I have a problem with the Node JS function that I want it to return https get request final edited data, I know there are a lot of solutions for this async problem but I tried them all and still can't figure out what is wrong with my code?
here is my function without any other solutions editing:
function getMovie(apiKey, gen) {
const baseUrl = "https://api.themoviedb.org/3/discover/movie?api_key=" + apiKey + "&language=en-US&include_adult=false&include_video=false&page=1&with_genres=" + gen;
https.get(baseUrl, function (responce) {
console.log(responce.statusCode);
var d = "";
responce.on("data", function (data) {
d += data;
});
responce.on("end", () => {
const finalData = [];
const moviesData = JSON.parse(d);
const result = moviesData.results;
const maxx = result.length;
const rand = Math.floor(Math.random() * maxx);
const title = result[rand].title;
const rDate = result[rand].release_date;
const overview = result[rand].overview;
const imageRoot = result[rand].poster_path;
const movieId = result[rand].id;
const movieRating = result[rand].vote_average;
// here will push those variables to finalData array
// then return it
return finalData;
});
}).on('error', (e) => {
console.error(e);
});
}
and want after this finalData returns:
const finalResult = getMovie(apiKey, genre);
it always returns undefined, How can I fix this? please anyone ca help me with this problem
thanks in advance.
I solved this problem using promises using this code:
const rp = require('request-promise');
function getMovie(url) {
// returns a promise
return rp(url).then(body => {
// make the count be the resolved value of the promise
let responseJSON = JSON.parse(body);
return responseJSON.results.count;
});
}
getMovie(someURL).then(result => {
// use the result in here
console.log(`Got result = ${result}`);
}).catch(err => {
console.log('Got error from getMovie ', err);
});

How to read a file line by line in Javascript and store it in an array

I have a file in which the data is in the form like
abc#email.com:name
ewdfgwed#gmail.com:nameother
wertgtr#gmsi.com:onemorename
I want to store the emails and names in arrays like
email = ["abc#email.com","ewdfgwed#gmail.com","wertgtr#gmsi.com"]
names = ["name","nameother","onemorename"]
Also, guys, the file is a little bit large around 50 MB so also I want to do it without using a lot of resources
I have tried this to work but can't make things done
// read contents of the file
const data = fs.readFileSync('file.txt', 'UTF-8');
// split the contents by new line
const lines = data.split(/\r?\n/);
// print all lines
lines.forEach((line) => {
names[num] = line;
num++
});
} catch (err) {
console.error(err);
}
Maybe this will help you.
Async Version:
const fs = require('fs')
const emails = [];
const names = [];
fs.readFile('file.txt', (err, file) => {
if (err) throw err;
file.toString().split('\n').forEach(line => {
const splitedLine = line.split(':');
emails.push(splitedLine[0]);
names.push(splitedLine[1]);
});
});
Sync Version:
const fs = require('fs')
const emails = [];
const names = [];
fs.readFileSync('file.txt').toString().split('\n').forEach(line => {
const splitedLine = line.split(':');
emails.push(splitedLine[0]);
names.push(splitedLine[1]);
})
console.log(emails)
console.log(names)
You can directly use line-reader :
fileData.js :
const lineReader = require('line-reader');
class FileData {
constructor(filePath) {
this.emails = [];
this.names = [];
lineReader.eachLine(filePath, function(line) {
console.log(line);
const splitedLine = line.split(':');
emails.push(splitedLine[0]);
names.push(splitedLine[1]);
});
}
getEmails(){
return this.emails;
}
getNames(){
return this.names;
}
}
module.exports = FileData
Whrerever You want:
const FileData = require('path to fileData.js');
const fileData = new FileData('test.txt');
console.log(fileData.getEmails())

Pagination in Zapier

I am trying following code to get all records from a paginated API in Zapier.
const limitPerPage = 20;
const apiUrl = "https://myurl.com/data";
var lastCursor = null;
var output = null;
const getContent = async function (cursor) {
let actualUrl = apiUrl + `?cursor=${cursor}&limit=${limitPerPage}`;
var apiResults = await fetch(actualUrl)
.then(resp => {
return resp.json;
});
}
const getEntireContentList = async function (cursor) {
const results = await getContent(cursor);
console.log("Retreiving data from API for cursor : " + cursor);
if (results.metadata.cursor !== "") {
return results.concat(await getEntireContentList(results.metadata.cursor));
} else {
return results;
}
};
(async() => {
const entireList = await getEntireContentList();
console.log(entireList);
output = entireList;
callback(null, entireList);
})();
I get error as
You did not define output! Try output = {id: 1, hello: await Promise.resolve("world")};
How can I fix this?
Your problem is that though you're awaiting in that function, the top-level carries on and execution ends before your code has had a chance to run.
The good news is, Zapier wraps your code in an async function already, so you can use await at the top level (per these docs).
Try this instead:
const limitPerPage = 20;
const apiUrl = "https://myurl.com/data";
let lastCursor = null;
// var output = null; // zapier does this for you already
const getContent = async function (cursor) {
const actualUrl = apiUrl + `?cursor=${cursor}&limit=${limitPerPage}`;
const rawResponse = await fetch(actualUrl)
return resp.json() // async function, you had it as a property
}
const getEntireContentList = async function (cursor) {
const results = await getContent(cursor);
console.log("Retreiving data from API for cursor : " + cursor);
if (results.metadata.cursor !== "") {
return results.concat(await getEntireUserList(results.metadata.cursor)); // should this be named getEntireContentList?
} else {
return results;
}
};
return {
results: await getEntireContentList()
}
I noticed this is a recursive approach. That's fine, but remember that you've got limited execution time. You also might hit memory limits (depending on how many objects you're returning), so keep an eye on that.

module.exports is returning undefined

I am currently new to Node JS, and today I was trying to read data from a file data.json.
Here is the JSON file:
{"username":"rahul_v7","password":"9673"} {"username":"7vik","password":"3248"} {"username":"pradypot_2","password":"6824"} {"username":"ad_1","password":"9284"} {"username":"premchand_4","password":"4346"}
And, I was using the below code present in a file GetData.js, to read the data present in the data.json:
'use strict';
const fs = require('fs');
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
module.exports.objects = resObjs;
}
});
As you can see, I am exporting the resObjs array, which is actually an array of objects, to an another file named AppendData.js, which is given below:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData');
console.log(getObjs.objects);
But, when I run AppendData.js in Node.js 9.3.0 (ia32), it gives the following output:
You're trying to use the objects before they've been read. Remember that your code reading the stream runs asynchronously, and nothing in your code attempts to coordinate it with module loading. So AppendData.js isn't seeing the objects export because it doesn't exist yet as of when that code runs.
Instead, return a promise of the objects that AppendData.js can consume; see *** comments:
'use strict';
const fs = require('fs');
// *** Export the promise
module.exports.objectsPromise = new Promise((resolve, reject) => {
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
resolve(resObjs); // *** Resolve the promise
}
}).on('error', error => {
reject(error); // *** Reject the promise
});
});
Note I added a handler for errors.
And then:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData');
getObjs.objectsPromise
.then(console.log)
.catch(error => {
// Do something
});
Again note the error handler.
The problem happens because you're trying to use the objects in AppendData.js before they are loaded on GetData.js due to fs.createReadStream being asynchronous. To fix this just make module.exports be a function that expect a callback in GetData.js like:
'use strict';
const fs = require('fs');
module.exports = function(callback) {
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
callback(resObjs); // call the callback with the array of results
}
});
}
Which you can then use like this in AppendData.js:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData'); // getObjs is now a function
getObjs(function(objects) {
console.log(objects);
});

How to correctly extract text from a pdf using pdf.js

I'm new to ES6 and Promise. I'm trying pdf.js to extract texts from all pages of a pdf file into a string array. And when extraction is done, I want to parse the array somehow. Say pdf file(passed via typedarray correctly) has 4 pages and my code is:
let str = [];
PDFJS.getDocument(typedarray).then(function(pdf) {
for(let i = 1; i <= pdf.numPages; i++) {
pdf.getPage(i).then(function(page) {
page.getTextContent().then(function(textContent) {
for(let j = 0; j < textContent.items.length; j++) {
str.push(textContent.items[j].str);
}
parse(str);
});
});
}
});
It manages to work, but, of course, the problem is my parse function is called 4 times. I just want to call parse only after all 4-pages-extraction is done.
Similar to https://stackoverflow.com/a/40494019/1765767 -- collect page promises using Promise.all and don't forget to chain then's:
function gettext(pdfUrl){
var pdf = pdfjsLib.getDocument(pdfUrl);
return pdf.then(function(pdf) { // get all pages text
var maxPages = pdf.pdfInfo.numPages;
var countPromises = []; // collecting all page promises
for (var j = 1; j <= maxPages; j++) {
var page = pdf.getPage(j);
var txt = "";
countPromises.push(page.then(function(page) { // add page promise
var textContent = page.getTextContent();
return textContent.then(function(text){ // return content promise
return text.items.map(function (s) { return s.str; }).join(''); // value page text
});
}));
}
// Wait for all pages and join text
return Promise.all(countPromises).then(function (texts) {
return texts.join('');
});
});
}
// waiting on gettext to finish completion, or error
gettext("https://cdn.mozilla.net/pdfjs/tracemonkey.pdf").then(function (text) {
alert('parse ' + text);
},
function (reason) {
console.error(reason);
});
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
A bit more cleaner version of #async5 and updated according to the latest version of "pdfjs-dist": "^2.0.943"
import PDFJS from "pdfjs-dist";
import PDFJSWorker from "pdfjs-dist/build/pdf.worker.js"; // add this to fit 2.3.0
PDFJS.disableTextLayer = true;
PDFJS.disableWorker = true; // not availaible anymore since 2.3.0 (see imports)
const getPageText = async (pdf: Pdf, pageNo: number) => {
const page = await pdf.getPage(pageNo);
const tokenizedText = await page.getTextContent();
const pageText = tokenizedText.items.map(token => token.str).join("");
return pageText;
};
/* see example of a PDFSource below */
export const getPDFText = async (source: PDFSource): Promise<string> => {
Object.assign(window, {pdfjsWorker: PDFJSWorker}); // added to fit 2.3.0
const pdf: Pdf = await PDFJS.getDocument(source).promise;
const maxPages = pdf.numPages;
const pageTextPromises = [];
for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) {
pageTextPromises.push(getPageText(pdf, pageNo));
}
const pageTexts = await Promise.all(pageTextPromises);
return pageTexts.join(" ");
};
This is the corresponding typescript declaration file that I have used if anyone needs it.
declare module "pdfjs-dist";
type TokenText = {
str: string;
};
type PageText = {
items: TokenText[];
};
type PdfPage = {
getTextContent: () => Promise<PageText>;
};
type Pdf = {
numPages: number;
getPage: (pageNo: number) => Promise<PdfPage>;
};
type PDFSource = Buffer | string;
declare module 'pdfjs-dist/build/pdf.worker.js'; // needed in 2.3.0
Example of how to get a PDFSource from a File with Buffer (from node types) :
file.arrayBuffer().then((ab: ArrayBuffer) => {
const pdfSource: PDFSource = Buffer.from(ab);
});
Here's a shorter (not necessarily better) version:
async function getPdfText(data) {
let doc = await pdfjsLib.getDocument({data}).promise;
let pageTexts = Array.from({length: doc.numPages}, async (v,i) => {
return (await (await doc.getPage(i+1)).getTextContent()).items.map(token => token.str).join('');
});
return (await Promise.all(pageTexts)).join('');
}
Here, data is a string or buffer (or you could change it to take the url, etc., instead).
Here's another Typescript version with await and Promise.all based on the other answers:
import { getDocument } from "pdfjs-dist";
import {
DocumentInitParameters,
PDFDataRangeTransport,
TypedArray,
} from "pdfjs-dist/types/display/api";
export const getPdfText = async (
src: string | TypedArray | DocumentInitParameters | PDFDataRangeTransport
): Promise<string> => {
const pdf = await getDocument(src).promise;
const pageList = await Promise.all(Array.from({ length: pdf.numPages }, (_, i) => pdf.getPage(i + 1)));
const textList = await Promise.all(pageList.map((p) => p.getTextContent()));
return textList
.map(({ items }) => items.map(({ str }) => str).join(""))
.join("");
};
If you use the PDFViewer component, here is my solution that doesn't involve any promise or asynchrony:
function getDocumentText(viewer) {
let text = '';
for (let i = 0; i < viewer.pagesCount; i++) {
const { textContentItemsStr } = viewer.getPageView(i).textLayer;
for (let item of textContentItemsStr)
text += item;
}
return text;
}
I wouldn't know how to do it either, but thanks to async5 I did it. I copied his code and updated it to the new version of pdf.js.
I made minimal corrections and also took the liberty of not grouping all the pages into a single string. In addition, I used a regular expression that removes many of the empty spaces that PDF unfortunately ends up creating (it does not solve all cases, but the vast majority).
The way I did it should be the way that most will feel comfortable working, however, feel free to remove the regex or make any other changes.
// pdf-to-text.js v1, require pdf.js ( https://mozilla.github.io/pdf.js/getting_started/#download )
// load pdf.js and pdf.worker.js
function pdfToText(url, separator = ' ') {
let pdf = pdfjsLib.getDocument(url);
return pdf.promise.then(function(pdf) { // get all pages text
let maxPages = pdf._pdfInfo.numPages;
let countPromises = []; // collecting all page promises
for (let i = 1; i <= maxPages; i++) {
let page = pdf.getPage(i);
countPromises.push(page.then(function(page) { // add page promise
let textContent = page.getTextContent();
return textContent.then(function(text) { // return content promise
return text.items.map(function(obj) {
return obj.str;
}).join(separator); // value page text
});
}));
};
// wait for all pages and join text
return Promise.all(countPromises).then(function(texts) {
for(let i = 0; i < texts.length; i++){
texts[i] = texts[i].replace(/\s+/g, ' ').trim();
};
return texts;
});
});
};
// example of use:
// waiting on pdfToText to finish completion, or error
pdfToText('files/pdf-name.pdf').then(function(pdfTexts) {
console.log(pdfTexts);
// RESULT: ['TEXT-OF-PAGE-1', 'TEXT-OF-PAGE-2', ...]
}, function(reason) {
console.error(reason);
});

Categories

Resources