generators for asynchronous iteration of large file - javascript

Say I have a function called openShapeFile, which reads a file, and produces a Promise which wraps source object which has a read function, which is returns a Promise which wraps the actual value in the Shapefile, and has a .done boolean value which can be used to tell if the end of the file has been reached.
In effect, the shapefile.open from here:
https://www.npmjs.com/package/shapefile
If I now want to read a file into a database, I can say:
openShapeFile(`shapefile.shp`).then((source) => source.read()
.then(function log(result) {
if (result.done) {
return
} else {
let query = `INSERT INTO geodata(geometry, id, featcode) VALUES(ST_GeomFromGeoJSON('${
JSON.stringify(Object.assign({}, result.value.geometry, {coordinates: result.value.geometry.coordinates.map(JSON.stringify)}))
}'), '${
result.value.properties.ID
}', ${
result.value.properties.FEATCODE
});`
query = query.split('"[[').join('[[').split(']]"').join(']]')
return pool.query(query).then((result) => {
return source.read().then(log)
})
}
})).then(() => console.log(dirCount)).catch(err => 'Problem here')))
This just about works, but has a recursive Promise (weird)
So as an exercise and/or to see if it would produce more clarity, I decided to rewrite it into generators, producing something like so:
function *insertQuery(query) {
const result = pool.query(query)
return result
}
const shapeFileGenerator = co.wrap(function* (source) {
while (true) {
const result = yield source.read()
if (result.done) {
return yield {}
} else {
let query = `INSERT INTO geodata(geometry, id, featcode) VALUES(ST_GeomFromGeoJSON('${
JSON.stringify(Object.assign({}, result.value.geometry, {coordinates: result.value.geometry.coordinates.map(JSON.stringify)}))
}'), '${
result.value.properties.ID
}', ${
result.value.properties.FEATCODE
});`
query = query.split('"[[').join('[[').split(']]"').join(']]')
yield* insertQuery(query)
}
}
})
openShapeFile(`shapefile.shp`).then((source) => {
const s = shapeFileGenerator(source)
})))
Now this works! It reads all of the data!
However, I kind of hate the infinite loop, and I never call .next directly. How can I rework this? What would be an idiomatic way to do something like this with generators? It seems like I should be able to write a proper generator with s.next() resulting in a source.read()?

I would write
async function readFileToDB(filename) {
const source = await openShapeFile(filename);
for (let {value, done} = await source.read(); !done; {value, done} = await source.read()) {
const query = `INSERT INTO geodata(geometry, id, featcode) VALUES(ST_GeomFromGeoJSON('${
JSON.stringify(value.geometry)
}'), '${
value.properties.ID
}', ${
value.properties.FEATCODE
});`
const result = await pool.query(query);
}
console.log(dirCount);
}
readFileToDB(`shapefile.shp`).catch(err => console.error('Problem here', err));
tough I don't think there's anything wrong with the recursive solution either.
It seems like I should be able to write a proper generator with s.next() resulting in a source.read()?
No, generators are synchronous. You might want to have a look at the async iteration proposal though.

You can code your logic as if it was synchronous, and execute via sequential executor nsynjs. Below is slightly modified working example tested on this file:
main.js:
var nsynjs = require('nsynjs');
var shapefile = require('shapefile');
function synchrobousCode(shapefile /*, pool */) {
var source = shapefile.open('UScounties.shp').data;
var result = source.read().data;
while(result && !result.done) {
var query = "INSERT INTO geodata(geometry, id, featcode) VALUES('" +
JSON.stringify(Object.assign({}, result.value.geometry, {coordinates: result.value.geometry.coordinates.map(JSON.stringify)})) +
"'), '" +
result.value.properties.ID +
"'," +
result.value.properties.FEATCODE +
"')";
console.log(query.length);
// uncomment line below to sequentially insert to the DB
// var queryRes = pool.query(query).data;
result = source.read().data;
}
}
nsynjs.run(synchrobousCode,null,shapefile /*, pool */ ,function () {
console.log('all done');
})
Nsynjs will automatically detect if some function call returns promise. If yes, it will wait for promise to resolve, put result of it to data property, and only then continue to next expression.

Related

Why does this recursive function not run asynchronously?

I have a start(node, array) function that should perform a DFS by traversing an object tree via recursive calls to an API through callMsGraph(token, end) until image properties are found at the end of the tree, at which point they are pushed to array. The function seems like it works, but I can't get the output unless I wrap it in a 2 second setTimeout which indicates the recursion is not being waited on to complete. I would want to play around with async/await more, but it's not at the top-level.
I'm not sure if the nextNode.then is doing anything or maybe callMsGraph() needs to be awaited on differently to how I know. A solution would be much appreciated.
shelfdb.data = async (accessToken) => {
const token = accessToken;
const endpoint = 'https://graph.microsoft.com/v1.0/sites/webgroup.sharepoint.com,23e7ef7a-a529-4dde-81ba-67afb4f44401,0fa8e0f7-1c76-4ad0-9b6e-a485f9bfd63c/drive/items/01GNYB5KPQ57RHLPZCJFE2QMVKT5U3NYY3/children'
function start(node, array) {
if(node.value.length > 0) {
node.value.forEach(function(child) {
var end = 'https://graph.microsoft.com/v1.0/sites/webgroup.sharepoint.com,23e7ef7a-a529-4dde-81ba-67afb4f44401,0fa8e0f7-1c76-4ad0-9b6e-a485f9bfd63c/drive/items/' + child.id + '/children';
var nextNode = callMsGraph(token, end);
nextNode.then(function(currResult) {
if (currResult.value.length > 0) {
if ('image' in currResult.value[0]) {
currResult.value.forEach(function(imgChild) {
let img = {
'name': imgChild.name,
'job': imgChild.parentReference.path.split("/")[6],
'path': imgChild.webUrl,
'id': imgChild.id
}
array.push(img);
})
// complete storing images at tail object, go one level up after loop
return;
}
// if no 'image' or value, go into child
start(currResult, array);
}
}).catch(function(e) {
console.error(e.message);
})
})
}
return array;
}
var res = await callMsGraph(token, endpoint); // start recursion
var output = start(res, []);
console.log(output); // only displays value if wrapped in setTimeout
return output; // empty []
}
Each query to the API via callMsGraph(), returns an object like this, where subsequent queries are made with the id of each object/folder (as new endpoint) in value until an object with image property is found. The MS Graph API requires that folders are expanded at each level to access their children.
{
id: '01GNYB5KPQ57RHLPZCJFE2QMVKT5U3NYY3'
value: [
{
id: '01GNYB5KJMH5T4GXADUVFZRSITWZWNQROS',
name: 'Folder1',
},
{
id: '01GNYB5KMJKILOFDZ6PZBZYMXY4BGOI463',
name: 'Folder2',
}
]
}
This is the callMsGraph() helper:
function callMsGraph(accessToken, graphEndpoint) {
const headers = new Headers();
const bearer = `Bearer ${accessToken}`;
headers.append("Authorization", bearer);
const options = {
method: "GET",
headers: headers
};
return fetch(graphEndpoint, options)
.then(response => response.json())
.catch(error => {
console.log(error);
throw error;
});
}
The rule with promises is that once you opt into one (more likely, are forced into it by a library), all code that needs to block for a result anywhere after it also has to await. You can't "go back" to sync and if even a single piece of the promise chain between where the promise starts and where you want its result isn't awaited, the result will be unreachable*.
Taking a snippet of the code:
function start(node, array) { // not async!
// ..
node.value.forEach(function(child) { // doesn't await!
// ..
nextNode.then(function(currResult) {
// this promise is not hooked up to anything!
start(...) // recurse without await!
There's no await in front of then, start doesn't return a promise and isn't awaited recursively, and forEach has no way to await its callback's asynchronous results, so each promise in the nextNode.then chain is orphaned into the void forever*.
The solution is a structure like this:
async function start(node, array) {
// ..
for (const child of node.value) {
// ..
const currResult = await callMsGraph(token, end);
// ..
await start(...);
array.push(currResult);
}
// returns a promise implicitly
}
// ..
await start(...);
// `array` is populated here
Or Promise.all, which runs in parallel and returns an array (which could replace the parameter array):
function start(node, array) {
return Promise.all(node.value.map(async child => {
const currResult = await callMsGraph(token, end);
// ..
await start(...);
return currResult;
}));
}
I'd be happy to provide a minimal, runnable example, but the code you've provided isn't runnable, so you'll have to massage this a bit to work for you. If you make sure to await everything, you're good to go (and generally avoid mixing .then and async/await--the latter seems easier for this use case).
* (for all practical intents and purposes)
There is a few places where you are not handling promises returned in you code. nextNode.then if your forEach loop is just "called", next line of the code will not wait for it to complete, forEach loop will complete execution before then callbacks are called.
I changed you code a bit, but I have no way to check if it works correctly due to i would need to populate dummy data for callMsGraph but if you encounter any - tell me and I'll modify the answer
shelfdb.data = async (accessToken) => {
const token = accessToken;
const endpoint = 'https://graph.microsoft.com/v1.0/sites/webgroup.sharepoint.com,23e7ef7a-a529-4dde-81ba-67afb4f44401,0fa8e0f7-1c76-4ad0-9b6e-a485f9bfd63c/drive/items/01GNYB5KPQ57RHLPZCJFE2QMVKT5U3NYY3/children'
const images = [];
async function start(node, array) {
if (node.value.length <= 0) return array; // or === 0 or whatever
for (const child of node.value) {
const end = `https://graph.microsoft.com/v1.0/sites/webgroup.sharepoint.com,23e7ef7a-a529-4dde-81ba-67afb4f44401,0fa8e0f7-1c76-4ad0-9b6e-a485f9bfd63c/drive/items/${child.id}/children`;
const nextNode = await callMsGraph(token, end);
if (nextNode.value.length > 0) {
if ('image' in nextNode.value[0]) {
const mapped = nextNode.value.map(imgChild => {
return {
'name': imgChild.name,
'job': imgChild.parentReference.path.split("/")[6],
'path': imgChild.webUrl,
'id': imgChild.id
}
});
array.push(...mapped);
}
// if no 'image' or value, go into child
await start(nextNode, array);
}
}
return array;
}
var res = await callMsGraph(token, endpoint);
var output = await start(res, []);
console.log(output);
return output;
}
Also, please, feel free to add a try{} catch{} blocks in any place you need them, I skipped them

How to wait for the iteration to finish when pushing result of a callback function into an array

What is the correct way to implement array.push so that it "array_of_results" is returned after the forEach iteration if finished?
const postgres = require("./postgres");
function get_array(value) {
var array_of_results = []
value.forEach( item => {
postgres.query(item["id"],function(res){
console.log(res) //gives proper res after empty array
array_of_results.push(res);
})
});
console.log(array_of_results)// prints empty array
return array_of_results;
}
Edit:
and postgres.js looks like :
const { Pool } = require("pg");
const pool = new Pool();
var query_string = "select...."
function query(id, call) {
pool.query(query_string, [id], (err, res) => {
if (err) {
console.log(err.stack)
} else {
call(res.rows[0])
}
})
}
module.exports = {
query
}
There are a few ways to do this, but first you need to understand what is actually happening.
In postgres.query(item["id"],function(res){ you are calling postgres.query with (1) an item ID and (2) a callback function. That call happens and then immediately continues in your calling code. So now you've just sent a bunch of requests to your database, and then immediately return an empty array. Those callbacks (2) have not been called yet.
To get the data back to your calling function, you'll need to either pass a callback instead of using return, or change to async/await.
Using async/await in every iteration of your loop is not as efficient, as you're waiting for each call to return sequentially. For the most efficient method, you will need to fire the requests and wait for them all to complete. You can do this by using promises.
You can modify your code to push a promise into an array for each iteration of the loop, then call (and await) Promise.all on the array of promises.
Here's a basic rewrite for you:
postgres.js:
function query(id) {
return new Promise((resolve, reject) => {
pool.query(query_string, [id], (err, res) => {
if (err) {
console.log(err.stack)
reject(err)
} else {
resolve(res.rows[0])
}
})
})
}
module.exports = {
query
}
get_array implementation :
async function get_array(value) {
var array_of_promises = [], array_of_results = []
value.forEach( item => {
array_of_promises.push(postgres.query(item["id"]));
});
array_of_results = await Promise.all(array_of_promises);
console.log(array_of_results)// prints populated array
return array_of_results;
}
Note that when you call get_array you'll have to use await before the call, e.g. change var array = get_array(items) to var array = await get_array(items) and using await in a function requires it to be declared as an async function.
If you can't declare it as an async function, you may change the calling code to consume the promise:
var arrayPromise = get_array(items);
arrayPromise.then((results) => {
// do something with results
// but remember you cannot _return_ from within a callback, as discussed above
});

Sequential execution of Promise.all

Hi I need to execute promises one after the other how do I achieve this using promise.all any help would be awesome. Below is the sample of my code I am currently using but it executes parallel so the search will not work properly
public testData: any = (req, res) => {
// This method is called first via API and then promise is triggerd
var body = req.body;
// set up data eg 2 is repeated twice so insert 2, 5 only once into DB
// Assuming we cant control the data and also maybe 3 maybe inside the DB
let arrayOfData = [1,2,3,2,4,5,5];
const promises = arrayOfData.map(this.searchAndInsert.bind(this));
Promise.all(promises)
.then((results) => {
// we only get here if ALL promises fulfill
console.log('Success', results);
res.status(200).json({ "status": 1, "message": "Success data" });
})
.catch((err) => {
// Will catch failure of first failed promise
console.log('Failed:', err);
res.status(200).json({ "status": 0, "message": "Failed data" });
});
}
public searchAndInsert: any = (data) => {
// There are database operations happening here like searching for other
// entries in the JSON and inserting to DB
console.log('Searching and updating', data);
return new Promise((resolve, reject) => {
// This is not an other function its just written her to make code readable
if(dataExistsInDB(data) == true){
resolve(data);
} else {
// This is not an other function its just written her to make code readable
insertIntoDB(data).then() => resolve(data);
}
});
}
I looked up in google and saw the reduce will help I would appreciate any help on how to convert this to reduce or any method you suggest (Concurrency in .map did not work)
the Promises unfortunatelly does not allow any control of their flow. It means -> once you create new Promise, it will be doing its asynchronous parts as they like.
The Promise.all does not change it, its only purpose is that it checks all promises that you put into it and it is resolved once all of them are finished (or one of them fail).
To be able to create and control asynchronous flow, the easiest way is to wrap the creation of Promise into function and create some kind of factory method. Then instead of creating all promises upfront, you just create only one promise when you need it, wait until it is resolved and after it continue in same behaviour.
async function doAllSequentually(fnPromiseArr) {
for (let i=0; i < fnPromiseArr.length; i++) {
const val = await fnPromiseArr[i]();
console.log(val);
}
}
function createFnPromise(val) {
return () => new Promise(resolve => resolve(val));
}
const arr = [];
for (let j=0; j < 10; j++) {
arr.push(createFnPromise(Math.random()));
}
doAllSequentually(arr).then(() => console.log('finished'));
PS: It is also possible without async/await using standard promise-chains, but it requires to be implemented with recursion.
If anyone else cares about ESLint complaining about the use of "for" and the "no await in loop" here is a typescript ESLint friendly version of the above answer:
async function runPromisesSequentially<T>(promises: Array<Promise<T>>):Promise<Array<T>> {
if (promises.length === 0) return [];
const [firstElement, ...rest] = promises;
return [await firstElement, ...(await runPromisesSequentially(rest))];
}
You can then just replace Promise.all by runPromisesSequentially.
#lmX2015's answer is close but it's taking in promises that have already started executing.
A slight modification fixes it
export async function runPromisesSequentially<T>(functions: (() => Promise<T>)[]): Promise<T[]> {
if (functions.length === 0) {
return [];
}
const [first, ...rest] = functions;
return [await first(), ...(await runPromisesSequentially(rest))];
}

How to write an arbitrarily long Promise chain

I receive an object bigListFromClient that includes an arbitrary number of objects each of which may have an arbitrary number of children. Every object needs to be entered into my database, but the DB needs to assign each of them a unique ID and child objects need to have the unique ID of their parents attached to them before they are sent off to the DB.
I want to create some sort of Promise or other calling structure that would call itself asynchronously until it reached the last object in bigListFromClient but I'm having trouble figuring out how to write it.
for(let i = 0; i < bigListFromClient.length; i++){
makeDbCallAsPromise(bigListFromClient[i].queryString, console.log); //I'm not just accepting anything from a user here, but how I get my queryString is kind of out of scope for this question
for(let j = 0; j < bigListFromClient[i].children.length; j++){
//the line below obviously doesn't work, I'm trying to figure out how to do this with something other than a for loop
makeDbCallAsPromise(bigListFromClient[i].children[j].queryString + [the uniqueID from the DB to insert this correctly as a child], console.log);
}
}
//this promise works great
makeDbCallAsPromise = function(queryString){
return new Promise((resolve, reject) => {
connection = mysql.createConnection(connectionCredentials);
connection.connect();
query = queryString;
connection.query(query, function (err, rows, fields) {
if (!err) {
resolve(rows);
} else {
console.log('Error while performing Query.');
console.log(err.code);
console.log(err.message);
reject(err);
}
});
connection.end();
})
};
My attempts at solving this on my own are so embarrassingly bad that even describing them to you would be awful.
While I could defer all the calls to creating children until the parents have been created in the DB, I wonder if the approach I've described is possible.
There are essentially two ways to do this. One is making the database calls sequential and the other one is making the calls parallel.
Javascript has a built-in function for parallel called Promise.all, you pass it an array of Promise instances and it returns a Promise instance containing the array.
In your case your code would look like this:
const result = Promise.all(
bigListFromClient.map(item =>
makeDbCallAsPromise(item.queryString).then(result =>
Promise.all(
item.children.map(item =>
makeDbCallAsPromise(item.queryString + [result.someId])
)
)
])
})
result will now contain a Promise that resolves to an array of arrays. These arrays contain the result of intserting children.
Using a more modern approach (with async await), sequential and with all results in a flat array:
const result = await bigListFromClient.reduce(
async (previous, item) => {
const previousResults = await previous
const result = await makeDbCallAsPromise(item.queryString)
const childResults = await item.children.reduce(
async (result, item) =>
[...(await result), await makeDbCallAsPromise(item.queryString + [result.someId])],
[]
)
return [...previousResults, result, ...childResults)
]),
[]
})
Depending on what you want to achieve and how you want to structure your code you can pick and choose from the different approaches.
For this sort of operation, try looking into bulk inserting. If you are intent on performing a single DB query/transaction per iteration, loop recursively over each parent and/or execute the same procedure for each child.
const dbCall = async (elm) => {
elm.id = Math.random().toString(36).substring(7)
if (elm.children) {
await Promise.all(elm.children.map(child => {
child.parentId = elm.id
return dbCall(child)
}))
}
return elm
}
const elms = [
{
queryString: '',
children: [
{
queryString: ''
}
]
}
]
Promise.all(elms.map(dbCall)).then(elm => /* ... */)

ES6 Dyanmic Promise Chaining from array

Scenario
I have an array of URLs that I need to download, however each must also be supplied with a unique transaction ID that must be requested from the server and only increments when a request is successful.
Problem
As I loop through the array I need to wait for both the request for the transaction ID and the request for the file to complete before starting the next iteration of the loop but the number of files is not fixed so need to dynamically build a chain of promises.
Pseudocode
Below is some pseudocode, getFiles() is the problem because all the requests get the same transaction Id as they don't wait for the previous request to finish.
function getTransationId(){
return new Promise((resolve,reject)=> {
let id = getNextTransactionId();
if(id!=error){
resolve(id);
}else{
reject(error);
}
})
}
function getFile(url, transactionId){
return new Promise((resolve,reject)=>{
http.request(url+transactionId, function(err,response){
if(err){
reject(err);
}else{
resolve(response);
}
});
});
}
function getFilesFromArray(urlArray){
for(let url of urlArray){
getTransactionId().then(resolve=>getFile(url,resolve),reject=>console.error(reject));
}
}
Question
How do I chain chain promises together dynamically?
Answer
Here's a JSFiddle of Ovidiu's answer
A functional approach is to use reduce to iterate and return a final promise chained up from each sub-promise. It also helps building the results e.g. in an array:
function getFilesFromArray(urlArray){
const filesPromise = urlArray.reduce((curPromise, url) => {
return curPromise
.then(curFiles => {
return getTransactionId()
.then(id => getFile(url, id))
.then(newFile => [...curFiles, newFile]);
});
}, Promise.resolve([]));
filesPromise.then(files => {
console.log(files);
}
}
This effectively builds a promise chain that:
starts with a static Promise with a value [] representing the initial set of files: Promise.resolve([])
on each iteration, returns a promise that waits for the curPromise in the chain and then
performs getTransactionId and uses the id to getFile
once the file will be retrieved it will return an array containing the curFiles set in the the curPromise (previous values) and concatenates the newFile into it
the end result will be a single promise with all files collected
You can do something along these lines
function getAllFiles(i, results, urlArray) {
if(i == urlArray.length) return;
getTransationId().then(id => {
return new Promise((resolve, reject) => {
http.request(urlArray[i] + id, (err, response) => {
if(err){
reject();
}else{
results.push(response);
resolve();
}
});
});
}).then(() => {
getAllFiles(i + 1, results, urlArray);
})
}
Try using async/await.
Read more here
async function getFilesFromArray(urlArray) {
for(let url of urlArray){
//wrap this in a try/catch block if you want to continue with execution
//if you receive an error from one of the functions
const transactionId =await getTransactionId()
const file = await getFile(url,transactionId)
}
}
You can simplify logic if you run it via synchronous executor nsynjs. Nsynjs will pause when some function evaluates to promise, and then assigns the result to data property. The code will transform like this:
function getFilesFromArray(urlArray){
for(var i = 0; i<urlArray.length; i++) {
var trId = getTransactionId().data;
// trId is ready here
var fileContent = getFile(urlArray[i],trId).data;
// file data is ready here
console.log('fileContent=',fileContent);
};
};
nsynjs.run(getFilesFromArray,{},urls,function(){
console.log('getFilesFromArray is done');
});
getFilesFromArray can be further simplified to:
function getFilesFromArray(urlArray){
for(var i = 0; i<urlArray.length; i++) {
var fileContent = getFile(urlArray[i],getTransactionId().data).data;
console.log('fileContent=',fileContent);
};
};

Categories

Resources