How to persist data through promise chain in NodeJS (Bluebird) - javascript

Follow-up to Swap order of arguments to "then" with Bluebird / NodeJS Promises (the posted answer worked, but immediately revealed a new issue)
This is the first time I've ever used promises in NodeJS so I apologize if some conventions are poorly adhered to or the code is sloppy. I'm trying to aggregate data from multiple APIs, put it in a database, then compute some statistics based on similarities and differences in the data. As a starting point I'm trying to get an API token for a single one of the APIs.
Here is my full code:
var Promise = require('bluebird');
var fs = require('fs');
var request = require('request');
Promise.promisifyAll(fs);
Promise.promisifyAll(request);
// tilde-expansion doesn't follow the callback(err, data) convention
var tilde = function(str) {
var _tilde = require('tilde-expansion');
return new Promise(function(resolve, reject) {
try {
_tilde(str, resolve);
} catch(e) {
reject(e);
}
});
}
var getToken = function() {
return request.getAsync(process.env.token_url, {
headers: {
"Content-Type": "applications/x-www-form-urlencoded"
},
form: {
client_id: process.env.client_id,
client_secret: process.env.client_secret,
grant_type: "client_credentials"
}
})
.then(function(resp) { return resp.body; });
}
var tokenFile = tilde(process.env.token_file)
.catch(function(err) {
console.log("Error parsing path to file... can not recover");
});
var token = tokenFile
.then(fs.readFileAsync) //, "utf8")
.then(function(data) {
console.log("Token (from file): " + data);
return data;
})
.then(JSON.parse)
.catch(function(err) {
console.log("Error reading token from file... getting a new one");
return getToken()
.then(function(data) {
console.log("Token (from API): " + data);
return data;
})
.then(JSON.stringify)
.then(fs.writeFileAsync.bind(null, tokenFile.value()));
});
token.then(function(data) {
console.log("Token (from anywhere): " + token.value);
});
This code is currently logging:
Token: undefined
if I fall back to the API. Assuming I did my promise stuff correctly (.catch() can return a promise, right?) then I would assume the issue is occurring because fs.writeFileAsync returns void.
I would like to append a .return() on the end of this promise, but how would I gain access to the return value of getToken()? I tried the following:
.catch(function(err) {
console.log("Error reading token from file... getting a new one");
var token = "nope";
return getToken()
.then(function(data) {
console.log("Token (from API): " + data);
token = data;
return data;
})
.then(JSON.stringify)
.then(fs.writeFileAsync.bind(null, tokenFile.value()))
.return(token);
});
However this logs "nope".

Over the weekend I continued my research on promises and upon making a pivotal realization I was able to develop the solution to this. Posting here both the realization and the solution:
The Realization
Promises were invented so that asynchronous code could be used in a synchronous manner. Consider the following:
var data = processData(JSON.parse(readFile(getFileName())));
This is the equivalent of:
var filename = getFileName();
var fileData = readFile(filename);
var parsedData = JSON.parse(fileData);
var data = processData(parsedData);
If any one of these functions is asynchronous then it breaks, because the value isn't ready on time. So for those asynchronous bits we used to use callbacks:
var filename = getFileName();
var data = null;
readFile(filename, function(fileData){
data = processData(JSON.parse(fileData));
});
This is not only ugly, but breaks a lot of things like stack traces, try/catch blocks, etc.
The Promise pattern fixed this, letting you say:
var filename = getFileName();
var fileData = filename.then(readFile);
var parsedData = fileData.then(JSON.parse);
var data = parsedData.then(processData);
This code works regardless of whether these functions are synchronous or asynchronous, and there are zero callbacks. It's actually all synchronous code, but instead of passing values around, we pass promises around.
The led me to the realization that: for every bit of code that can be written with promises, there is a synchronous corollary
The solution
Realizing this, I tried to consider my code if all of the functions were synchronous:
try {
var tokenFile = tilde(process.env.token_file)
} catch(err) {
throw new Error("Error parsing path to file... can not recover");
}
var token = null;
try {
token = JSON.parse(readFile(tokenFile));
} catch(err) {
token = getToken();
writeFile(tokenFile, JSON.stringify(token));
}
console.log("Token: " + token.value);
After framing it like this, the promise version follows logically:
var tokenFile = tilde(process.env.token_file)
.catch(function(err) {
throw new Error("Error parsing path to file... can not recover");
});
var token = tokenFile
.then(readFile)
.then(JSON.parse)
.catch(function(err) {
var _token = getToken();
_token
.then(JSON.stringify)
.then(writeFile.bind(null, tokenFile.value));
return _token;
});

Related

Promises structure misunderstood

I have a problem with understanding Promises syntax.
So, what I am trying to do:
getPosts() gets some data from a DB then, I want to get some metadata for each row with another promise call, addMetadata(). Then, once all the metadata is fetched, I want to console it out.
See my attempt below:
var getPosts = function(){
return new Promise(function(resolve, reject){
postModel.find()
.exec(function(err, posts) {
resolve(posts);
);
});
};
var addMetadata = function(posts){
var options = {
host: 'localhost',
port: 3000,
path: '',
method: 'GET'
};
var postPromises = posts.map(function(post) {
return new Promise(function(resolve) {
options.path = '/api/user?id=' + post.profileID;
var req = http.get(options, function(res) {
var bodyChunks = [];
res.on('data', function(chunk) {
bodyChunks.push(chunk);
}).on('end', function() {
var body = Buffer.concat(bodyChunks);
var parsedBody = JSON.parse(body);
post.fullname = parsedBody.user.fullname;
post.profilePic = parsedBody.user.profilePic;
// resolve the promise with the updated post
resolve(post);
});
});
});
});
// Is this the right place to put Promise.all???
Promise.all(postPromises)
.then(function(posts) {
//What should I put here
});
};
getPosts()
.then(function(posts){
return addMetadata(posts);
})
.then(function(posts){//I get a little lost here
console.log();//posts is undefined
});
Of course, my understanding is wrong but I thought I was going the right way. Can someone please guide me to the right direction?
Thanks
Change
// Is this the right place to put Promise.all???
Promise.all(postPromises)
.then(function (posts) {
//What should I put here
});
into
// Is this the right place to put Promise.all???
return Promise.all(postPromises);
This way your addMetadata function will return Promise that resolve when all promises from postPromises resolves or reject if any of postPromises rejects.
The key point to understand the async concept of it and what time the content is available.
Reading this will help to put you in the right direction.
For instance:
var promise = new Promise(function(resolve, reject) {
resolve(1);
});
promise
.then(function(val) {
console.log(val); // 1
return val + 2;
})
.then(function(val) {
console.log(val); // 3
})
After as per your scenario, in order to have all the metadata Promise.all is the way to go.
Promise.all(arrayOfPromises).then(function(arrayOfResults) {
// One result per each promise of AddMetadata
})
What you wanna do here, if I am correct, is called streams, as you wanna call multiple paralel promises as your concept of looping through list of posts using map is not going to work this way
Take a look at this short video introducing streams Streams - FunFunFunction, he is using library for workin with streams called Baconjs
Here is a short example on streams
const stupidNumberStream = {
each: (callback) => {
setTimeout( () => callback(1), 3000 )
setTimeout( () => callback(2), 2000 )
setTimeout( () => callback(3), 1000 )
}
}
stupidNumberStream.each(console.log)
Your getPosts function is good in the sense that its only job is to promisfy the database find. (Though, I think if it's mongo, the exec produces a promise for you).
Your addMetadataToAPost is less good, because it mixes up processing an array of posts and "promisifying" the http.get. Use the same pattern you applied correctly in the first function and return a promise to do a single get and add metadata. (It would be even better to just wrap the get, which you can reuse, and build a simple add-metadata function that returns - rather than creates - a promise)
// renamed pedantically
var addMetadataToAPost = function(post) {
return new Promise(function(resolve) {
options.path = '/api/user?id=' + post.profileID;
var req = http.get(options, function(res) {
var bodyChunks = [];
res.on('data', function(chunk) {
bodyChunks.push(chunk);
}).on('end', function() {
var body = Buffer.concat(bodyChunks);
var parsedBody = JSON.parse(body);
post.fullname = parsedBody.user.fullname;
post.profilePic = parsedBody.user.profilePic;
// resolve the promise with the updated post
resolve(post);
});
});
});
}
Now your batching function is simple:
// also renamed pedantically
var addMetadataToAllPosts = function(posts){
var postPromises = posts.map(function(post) {
return addMetadataToAPost(post)
})
return Promise.all(postPromises)
};
Your original code should work...
getPosts().then(function(posts){
return addMetadataToAllPosts(posts);
})
.then(function(posts){
console.log(posts);//posts should be an array of posts with metadata added
});

Node.js web scraping with loop on array of URLs

I'm trying to build a little script to scrap some data. I'm some basics knowledge in javascript however I'm kind of lost with all the async callback or promises stuff. Here is what I have now :
url = "http://Blablablabla.com";
var shares = function(req, res) {
request(url, function (error, response, body) {
if (!error) {
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
return res.send(url + ":" + share);
} else {
console.log("We've encountered an error: " + error);
}
})
}
So everything is fine with this piece of code. What I would like to do is :
Using an array of url var urls = [url1,url2,url3,etc...]
Storing my scrapped data into another array, something like this data = [{url: url1, shares: share},{url: url2, shares: share},etc...]
I know I need to use something like this data.push({ urls: url, shares: share})})
and I understand that I need to loop over my first url array to push data into my second data array.
however I'm kind of lost with the request method and the way I should deal with async issue in my situation.
thanks !
edit#1 :
I tried this to use promises :
var url = "www.blablabla.com"
var geturl = request(url, function (error, response, body) {
if (!error) { return $ = cheerio.load(body) } else
{ console.log("We've encountered an error: " + error); }
});
var shares = geturl.then( function() {
return $(".nb-shares").html();
})
but got the following error geturl.then is not a function
I think you should use async:
var async = require('async');
var urls = ["http://example.com", "http://example.com", "http://example.com"];
var data = [];
var calls = urls.map((url) => (cb) => {
request(url, (error, response, body) => {
if (error) {
console.error("We've encountered an error:", error);
return cb();
}
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
data.push({ url, share })
})
})
async.parallel(calls, () => { /* YOUR CODE HERE */ })
You could do the same with promises, but I don't see why.
I took a stab at it. You need to install the q library and require it to
var Q = require('q');
//... where ever your function is
//start with an array of string urls
var urls = [ "http://Blablablabla.com", '...', '...'];
//store results in this array in the form:
// {
// url: url,
// promise: <will be resolved when its done>,
// share:'code that you wanted'
// }
var results = [];
//loop over each url and perform the request
urls.forEach(processUrl);
function processUrl(url) {
//we use deferred object so we can know when the request is done
var deferred = Q.defer();
//create a new result object and add it to results
var result = {
url: url,
promise: deferred.promise
};
results.push(result);
//perform the request
request(url, function (error, response, body) {
if (!error) {
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
//resolve the promise so we know this request is done.
// no one is using the resolve, but if they were they would get the result of share
deferred.resolve(share);
//set the value we extracted to the results object
result.share = share;
} else {
//request failed, reject the promise to abort the chain and fall into the "catch" block
deferred.reject(error)
console.log("We've encountered an error: " + error);
}
});
}
//results.map, converts the "array" to just promises
//Q.all takes in an array of promises
//when they are all done it rull call your then/catch block.
Q.all(results.map(function(i){i.promise}))
.then(sendResponse) //when all promises are done it calls this
.catch(sendError); //if any promise fails it calls this
function sendError(error){
res.status(500).json({failed: error});
}
function sendResponse(data){ //data = response from every resolve call
//process results and convert to your response
return res.send(results);
}
Here is another solution I like a lot :
const requestPromise = require('request-promise');
const Promise = require('bluebird');
const cheerio = require('cheerio');
const urls = ['http://google.be', 'http://biiinge.konbini.com/series/au-dela-des-murs-serie-herve-hadmar-marc-herpoux-critique/?src=konbini_home']
Promise.map(urls, requestPromise)
.map((htmlOnePage, index) => {
const $ = cheerio.load(htmlOnePage);
const share = $('.nb-shares').html();
let shareTuple = {};
shareTuple[urls[index]] = share;
return shareTuple;
})
.then(console.log)
.catch((e) => console.log('We encountered an error' + e));

Returning values from Promise(Bluebird) in Node.js

I am learning Node.js and was looking at the Promise module. I am very new at this so please bare with bad code conventions and stuff. I am trying to build a REST Client API using restify. I have attached my Client Code below:
Here is my Client:
// local functions
function loadResult(err, req, res, obj) {
//console.log(err, req, res, obj);
var result = {
'err': err,
'text': obj
};
console.log(result);
return result;
}
function getClient() {
if (client) {
return client;
} else {
client = restify.createJsonClient(options);
return client;
}
};
function RestClient(headers) {
if (headers) {
global_headers.Authorization = headers.Authorization || global_headers.Authorization;
options.headers = global_headers;
}
};
RestClient.prototype.getClient = getClient;
RestClient.prototype.doGET = function doGET(path) {
return new Promise(function(resolve) {
client.get(path, loadResult);
}).then(function(result) {
console.log("before final return:" + result);
return result;
});
};
// exports
module.exports = RestClient;
In the console when I run the following lines:
var Rest = require('./testRest.js');
var restClient = new Rest();
var client = restClient.getClient();
var data = restClient.doGET('/hello/mark');
I can see the result printed just once on the console. But the Data is just what I believe a Promise. Is there a way for me to get the result in data? Also trying to figure out why the result never got printed the second time.
P.S.: I tried using Promise.promisifyAll(require('restify')) but it kept throwing some error saying fn is not a function which force use Promise in this way.
First of all, there is no such error when I do:
var Promise = require("bluebird");
Promise.promisifyAll(require("restify"));
Secondly, restify unfortunately uses some weird getters when exporting the module, as getters can (and do) have side effects, promisifyAll will not visit them, so the one line magic won't work. You need to promisify the classes manually, e.g.
Promise.promisifyAll(require("restify").JsonClient.prototype);
Then you would call:
client.getAsync(path).spread(function(req, res, obj) {
});

how to use deferred with https.request in nodejs

I use github to authenticate in my node application. I have constructed the following code:
var req = request(postOptions, function (res) {
res.on('data', function (d) {
...
var getOptions = parseUrl('https://api.github.com/user?access_token=' + accessToken);
...
var req = request(getOptions, function (resp) {
...
resp.on('data', function (d) {
...
})
.on('end', function () {
...
})
});
req.end();
});
});
req.write(postData);
req.end();
I've removed some code, because the point here is that I have a request in a request. Now, nodejs has deferreds The question is if this can be used to simplify the above code ?
Well, you have no error handling. Promises significantly cleans up code that correctly propagates errors and doesn't leak resources because those become automatic. So it's impossible to make a fair comparison because promise code that doesn't handle errors still propagates them.
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"));
function githubAuthenticate() {
return request.postAsync(postOptions, postData)
.spread(function(response, body) {
var accessToken = ...
var getOptions = parseUrl('https://api.github.com/user?access_token=' + accessToken);
return request.getAsync(getOptions);
})
.spread(function(response, body) {
});
}
Now imagine if something failed here? You would add a .catch only once, in one place, and handle it there. Since errors automatically propagate, the code above doesn't need to do anything. The consumer code can just do:
gitHubAuthenticate().then(function() {
}).catch(function(err) {
// Any error that happened with the post, get or your code gets here
// automatically
});

Having trouble with promises in nodejs

I'm trying to use promises with nodejs (I'm trying with node-promise package); however, without any success. See the code below:
var express = require('express'),
request = require('request'),
promise = require('node-promise');
app.get('/promise', function(req, res) {
var length = -1;
new promise.Promise(request(
{uri: "http://www.bing.com"},
function (error, response, body) {
if (error && response.statusCode !== 200) {
console.log("An error occurred when connected to the web site");
return;
}
console.log("I'll return: " + body.length);
length = body.length;
}
)).then(function(result) {
console.log("This is what I got: " + length);
console.log("Done!");
});
res.end();
});
The output of the above code is I'll return: 35857 only and it doesn't go to the then part.
I change the code then to be:
app.get('/promise', function(req, res) {
var length = -1;
promise.when(
request(
{uri: "http://www.bing.com"},
function (error, response, body) {
if (error && response.statusCode !== 200) {
console.log("An error occurred when connected to the web site");
return;
}
console.log("I'll return: " + body.length);
length = body.length;
}
),
function(result) {
console.log("This is what I got: " + length);
console.log("Done!");
},
function(error) {
console.log(error);
}
);
res.end();
});
This time the output is This is what I got: -1 then Done!... looks like the "promise" was not called this time.
So:
What's needed to be done to fix the code above? Obviously I'm not doing it right :)
Is node-promise "the way to go" when I'm doing promises, or is there a better way/package? i.e. simpler and more production-ready.
Thanks.
Try jquery-deferred-for-node.
I'm not an expert but understand that this lib tends to be favoured by programmers who work both server-side and client-side.
Even if you don't already know jQuery's Deferreds, the advantages of going this route are that :
the documentation is excellent (it comprises links to the jQuery docs), though you may struggle to find examples specific to Node.
methods are chainable.
jQuery Callbacks are also included.
when one day you need to do asynchronous stuff client-side, then there's virtually nothing to relearn - the concepts are identical and the syntax very nearly so. See the "Correspondances" section in the github page hyperlinked above.
EDIT
I'm not a node.js person so I'm guessing here but based on your code above, you might want to consider something along the following lines with jquery-deferred-for-node :
var express = require('express'),
request = require('request'),
Deferred = require('JQDeferred');
function fetch(uri, goodCodes) {
goodCodes = (!goodCodes) ? [200] : goodCodes;
var dfrd = Deferred(); // A Deferred to be resolved/rejected in response to the `request()`.
request(uri, function(error, response, body) {
if (!error) {
var isGood = false;
// Loop to test response.statusCode against `goodCodes`.
for (var i = 0; i < goodCodes.length; i++) {
if (response.statusCode == goodCodes[i]) {
isGood = true;
break;
}
}
if (isGood) {
dfrd.resolve(response.statusCode, body);
} else {
dfrd.reject(response.statusCode, "An invalid response was received from " + uri);
}
} else {
dfrd.reject(response.statusCode, "An error occurred attempting to connect to " + uri);
}
});
// Make promise derived from dfrd available to "consumer".
return dfrd.promise();
};
//...
app.get('/promise', function(req, resp) {
fetch("http://www.bing.com").done(function(statusCode, result) {
console.log("Done! This is what I got: " + result.length);
}).fail(function(statusCode, message) {
console.log("Error (" + statusCode + "): " + message);
});
resp.end();
};
Here, I have tried to write a generalized utility for fetching a resource in such a way that the asynchronous response (or error) can be handled externally. I think this is broadly along the lines of what you were trying to achieve.
Out of interest, where do console.log() messages end up with node.js?
EDIT 2
Above, I have given Deferred an initial capital, as is conventional for Constructors
With jQuery Deferreds, there must be any number of ways to fetch() consecutively. The approach below leaves fetch() as it was, and introduces fetch_() to act as its front-end. There may be simpler ways but this allows fetch() to remain a general utility, functionally equivalent to the client-side jQuery.ajax().
function fetch_(uri){
return function(){
return fetch(uri, [200]).then(function(statusCode, result){
console.log("Done! This is what I got: " + result.length);
},function(statusCode, message){
console.log("Error (" + statusCode + "): " + message);
});
};
}
Note that function fetch() returns a function. It has to be like this because where fetch() is called, we want an unexecuted function, not (yet) the result of that function.
Now let's assume an array of uris is available. This can be hard-coded or built dynamically - whatever the application demands.
var uris = [
'http://xxx.example.com',
'http://yyy.example.com',
'http://zzz.example.com'
];
And now, a variety of ways in which fetch_() might be called :
//v1. To call `resp.end()` when the fetching process starts.
app.get('/promise', function(req, resp) {
fetch_(uris[0])().then(fetch_(uris[1])).then(fetch_(uris[2]));
resp.end();
});
//v2. To call `resp.end()` when the fetching process has finished.
app.get('/promise', function(req, resp){
fetch_(uris[0])().then(fetch_(uris[1])).then(fetch_(uris[2])).always(resp.end);
});
//v3. As v2 but building a `.then()` chain of any (unknown) length.
app.get('/promise', function(req, resp){
var dfrd = Deferred().resolve();//
$.each(uris, function(i, uri){
dfrd = dfrd.then(fetch_(uri));
});
dfrd = dfrd.always(resp.end);
});
untested
I have more confidence in v1 and v2. v3 may work.
v2 and v3 should both give exactly the same behaviour but v3 is generalized for any number of uris.
Everything may need debugging.
I would recommend using Q: https://github.com/kriskowal/q. I believe that it's used internally by other frameworks (like jQuery deferred implementation).
I believe that the documentation is "fine"; the syntax is consistent with other promise implementations... and it has a node adapter.
So your deferred style approach:
var deferred = Q.defer();
FS.readFile("foo.txt", "utf-8", function (err, res) {
if (!err) {
deferred.resolve(res);
} else {
deferred.reject(err);
}
});
return deferred.promise;
Can be written more concisely as:
var deferred = Q.defer();
FS.readFile("foo.txt", "utf-8", deferred.makeNodeResolver());
return deferred.promise;

Categories

Resources