Upload data from Node.js stream to ElasticSearch database - javascript

My current Node.js code creates a stream from a very large USPTO Patent XML file (approx 100mb) and creates a patentGrant object while parsing the XML stream. The patentGrant object includes publication number, publication country, publication date and kind of patent. I am trying to create a database containing all of the patentGrant objects using ElasticSearch. I've successfully added code to connect to the local ElasticSearch DB but I am having trouble understanding the ElasticSearch-js API. I don't know how I should go about uploading the patentGrant object to the DB. From the following tutorial and a previous stackoverflow question I asked here. It seems like I should use the bulk api.
Heres my ParseXml.js code:
var CreateParsableXml = require('./CreateParsableXml.js');
var XmlParserStream = require('xml-stream');
// var Upload2ES = require('./Upload2ES.js');
var parseXml;
var es = require('elasticsearch');
var client = new es.Client({
host: 'localhost:9200'
});
// create xml parser using xml-stream node.js module
parseXml = new XmlParserStream(CreateParsableXml.concatXmlStream('ipg140107.xml'));
parseXml.on('endElement: us-patent-grant', function(patentGrantElement) {
var patentGrant;
patentGrant = {
pubNo: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['doc-number'],
pubCountry: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['country'],
kind: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['kind'],
pubDate: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['date']
};
console.log(patentGrant);
});
parseXml.on('end', function() {
console.log('all done');
});

The bulk api, as it says in the docs you linked, is used for "index" and "delete" operations.
Use create https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html#api-create
parseXml.on('endElement: us-patent-grant', function(patentGrantElement) {
var patentGrant;
patentGrant = {
pubNo: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['doc-number'],
pubCountry: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['country'],
kind: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['kind'],
pubDate: patentGrantElement['us-bibliographic-data-grant']['publication-reference']['document-id']['date']
};
client.create({
index: 'myindex',
type: 'mytype',
body: patentGrant,
}, function() {}
)
console.log(patentGrant);
});
without ID, it should create one id as per https://www.elastic.co/guide/en/elasticsearch/reference/1.6/docs-index_.html#_automatic_id_generation

Related

Storing a reference in another collection creates object but not string Meteor

I am uploading profile pictures for my users using collectionFS. I have two collections, one for userData and another for Images. I am trying to attach the image reference in the other collection. But I am getting the object rather than the _id string. Any pointers?
Biye = new Meteor.Collection ('biye');
Images = new FS.Collection("images", {
stores: [new FS.Store.GridFS("images")]
});
// .....................upload.............
Template.upload.events({
'change #exampleInput':function(event, template){
var file = $('#exampleInput').get(0).files[0];
newFile = new FS.File(file);
newFile.metadata = {'ownerId':Biye.findOne({ObjectID:this._id})};
Images.insert(newFile,function(err,result){
if(!err){
console.log(result._id);
}
});
}
})
newFile.metadata = {'ownerId':Biye.findOne({ObjectID:this._id})};
this line returns a whole object. I wanted a string.
I was able to store the string using
newFile.metadata = {'ownerId':Biye.findOne({ObjectID:this._id})._id};
just added ._id at the end of
newFile.metadata = {'ownerId':Biye.findOne({ObjectID:this._id})};

How do I add temporary properties on a mongoose object just for response, which is not stored in database

I would like to fill a couple of extra temporary properties with additional data and send back to the response
'use strict';
var mongoose = require('mongoose');
var express = require('express');
var app = express();
var TournamentSchema = new mongoose.Schema({
createdAt: { type: Date, default: Date.now },
deadlineAt: { type: Date }
});
var Tournament = mongoose.model('Tournament', TournamentSchema);
app.get('/', function(req, res) {
var tournament = new Tournament();
// Adding properties like this 'on-the-fly' doesnt seem to work
// How can I do this ?
tournament['friends'] = ['Friend1, Friend2'];
tournament.state = 'NOOB';
tournament.score = 5;
console.log(tournament);
res.send(tournament);
});
var server = app.listen(3000, function() {
console.log('Listening on port %d', server.address().port);
});
But the properties wont get added on the Tournament object and therefor not in the response.
Found the answer here: Unable to add properties to js object
I cant add properties on a Mongoose object, I have to convert it to plain JSON-object using the .toJSON() or .toObject() methods.
EDIT: And like #Zlatko mentions, you can also finalize your queries using the .lean() method.
mongooseModel.find().lean().exec()
... which also produces native js objects.

Inserting into MongoDB using Node js

Inserting into Mongo DB using the mongojs module fails cryptically,I have two functions,setupMongoDB and pushRequestsToMongoDB(what they do is self-explanatory).
I get a request from my web-page and parse the data to JSON.
The console looks like this:
created worker: 22987
created worker: 22989
created worker: 22990
created worker: 22991
{ type: 'line',geometry: '`tvtBat~_Wnaoi#_kc~BzlxZbrvdA{fw[`mw}#' }
object
[Error: connection closed]
The code that did produces the error looks like this:
var mongo=require('mongojs');
var collections=['testData'];
var dbURL='localhost:3000/mapData';
var db=mongo.connect(dbURL,collections);
var insert=function(obj)
{
db.testData.save(obj,function(err,obj){
if(err || !obj)
{
console.log(err);
}
else
{
console.log('Data successfully inserted with _id '+obj['_id']);
}
});
};
exports.insert=insert;
This is how I use the function:
var express=require('express');
var app=express();
var mongo=require('./mongo_try');
app.use(express.bodyParser());
app.post('/map',function(req,res){
var data=req.body;
console.log(data);
console.log(typeof data);
mongo.insert(data);
});
I'm very confused at what "conn.markers.save" intends to do. Is this a mongoose call? Mongodb node native doesn't have a "save" command. Do you mean to get the "markers" collection, and then insert the data? You won't need to stringify it.
Are you using this: https://github.com/mafintosh/mongojs ?
You shouldn't have to stringify that save command either. Change this line:
conn.markers.save(obj,function(err,data){
Or if the contents of "obj" are already a string, change it to:
conn.markers.save(JSON.parse(obj),function(err,data){

How to properly create a new Producer?

I'm using the driven object model tool CodeFluentEntities in order to deploy a model to a DataBase engine.
I'm thinking about using localStorage database engines (like IndexedDB or Web SQL) in order to store my datas for a web application without server.
I looked into the documentation but it seems to me a little poor... I think I understood the basic principles like the injection points that are Produce() and Terminate() but what about the target directory of the actual production ?
In my case, which is Javascript source code files, how can I specify correctly (in a referenced manner) where to generate them ? And does it have to be in an external project, or could I just fill a directory in an other project (which is the .vsproj of my webapp, per example) ?
Can the documentation integrate a sample of code regarding this aspects, or someone can redirect me to an article fitting my needs ?
The Template approach
According to your needs, I suggest you to use a template instead of developing your custom Producer because of, among others, deployment reasons. Using the template producer (shipped with CodeFluent Entities) you can quickly and easily create complex scripts by taking advantage of the CodeFluent Entities meta model.
This producer is based on CodeFluent Entities' template engine and allow you to generate text files (JavaScript in your case) at production time.
As a reminder, A template is simply a mixture of text blocks and control logic that can generate an output file
This producer takes care of all common operations : update the project (.XXproj) to add your generated files, add missing references, etc.
You can find thereafter an example to generate an IndexDB script file based on a CodeFluent Entities model (demonstration purposes only). Here's the template source file :
[%# reference name="C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.5\System.Core.dll" %]
[%# namespace name="System" %]
[%# namespace name="System.Linq" %]
[%# namespace name="CodeFluent.Model" %]
var context = {};
context.indexedDB = {};
context.indexedDB.db = null;
context.indexedDB.open = function () {
var version = 11;
var request = indexedDB.open([%=Producer.Project.DefaultNamespace%], version);
request.onupgradeneeded = function (e) {
var db = e.target.result;
e.target.transaction.onerror = context.indexedDB.onerror;
[%foreach(Entity entity in Producer.Project.Entities){
string properties = String.Join(", ", entity.Properties.Where(p => !p.IsPersistenceIdentity).Select(p => "\"" + p.Name + "\""));
%]
if (db.objectStoreNames.contains("[%=entity.Name%]")) {
db.deleteObjectStore("[%=entity.Name%]");
}
var store = db.createObjectStore("[%=entity.Name%]",
{ keyPath: "id", autoIncrement: true });
store.createIndex([%=properties %], { unique: false });[%}%]
};
request.onsuccess = function (e) {
context.indexedDB.db = e.target.result;
};
request.onerror = context.indexedDB.onerror;
};
[%foreach(Entity entity in Producer.Project.Entities){
string parameters = String.Join(", ", entity.Properties.Where(p => !p.IsPersistenceIdentity).Select(p => p.Name));%]
context.indexedDB.[%=entity.Name%] = {}
context.indexedDB.[%=entity.Name%].add = function ([%= parameters %]) {
var db = context.indexedDB.db;
var trans = db.transaction(["[%=entity.Name%]"], "readwrite");
var store = trans.objectStore("[%=entity.Name%]");
var request = store.put({
[%
foreach (Property property in entity.Properties.Where(p => !p.IsPersistenceIdentity)) {%]
"[%=property.Name%]": [%=property.Name%], [%}%]
"timeStamp": new Date().getTime()
});
request.onsuccess = function (e) {
console.log(e.value);
};
request.onerror = function (e) {
console.log(e.value);
};
};
context.indexedDB.[%=entity.Name%].delete = function (id) {
var db = context.indexedDB.db;
var trans = db.transaction(["[%=entity.Name%]"], "readwrite");
var store = trans.objectStore("[%=entity.Name%]");
var request = store.delete(id);
request.onsuccess = function (e) {
console.log(e);
};
request.onerror = function (e) {
console.log(e);
};
};
context.indexedDB.[%=entity.Name%].loadAll = function () {
var db = context.indexedDB.db;
var trans = db.transaction(["[%=entity.Name%]"], "readwrite");
var store = trans.objectStore("[%=entity.Name%]");
var keyRange = IDBKeyRange.lowerBound(0);
var cursorRequest = store.openCursor(keyRange);
request.onsuccess = function (e) {
// not implemented
};
request.onerror = function (e) {
console.log(e);
};
};
[%}%]
function init() {
context.indexedDB.open(); // initialize the IndexDB context.
}
window.addEventListener("DOMContentLoaded", init, false);
Then you need to configure your CodeFluent Entities Project by adding the Template Producer and define the template above as the source file.
If you consider the following model :
Just build it to generate the IndexDB script file in the target project (a web application for example) and you'll be able to manipulate the generated API like this :
context.indexedDB.Contact.add("Peter", "Boby")
context.indexedDB.Product.add("Tablet")
context.indexedDB.Product.add("Computer")
context.indexedDB.Contact.delete(1)
context.indexedDB.Product.loadAll()
The custom Producer approach
Nevertheless, if ever you need to target a technology or platform that isn't supported by CodeFluent Entities natively, you may create your own custom producer by implementing the IProducer interface :
public interface IProducer
{
event Producer.OnProductionEventHandler Production;
void Initialize(Project project, Producer producer);
void Produce();
void Terminate();
}
First of all, you need to understand that the CodeFluent Entitie Build engine calls each of your configured producers one by one to generate your code.
Firstly, CodeFluent Entities calls the Initialize method for each producers. It takes as a parameter an instance of the CodeFluent Entities project and the current producer.
Then it calls the Product method following the same process. It's the right place to implement your generation logic.
Finally, you could implement a finalize logic in the Terminate method.
CodeFluent provides some base classes that implement the IProducer interface such as BaseProducer which is located in CodeFluent.Producers.CodeDom assembly that provides behaviors like "add missing references" or "update Visual Studio project (.XXproj).
In addition, here's a blog post that can help you to integrate a custom producer to the modeler.
The Sub-Producer approach
An other approach might be to develop a custom Sub-Producer but, in my opinion, it is not suitable according to your needs.

Node.js - how to use external library (VersionOne JS SDK)?

I'm trying to use VersionOne JS SDK in Node.js (https://github.com/versionone/VersionOne.SDK.JavaScript). I'm simply downloading whole library, placing it alongside with my js file:
var v1 = require('./v1sdk/v1sdk.js');
var V1Server = v1.V1Server;
console.log(v1);
console.log(V1Server);
Unfortunately something seems wrong, the output I get after calling
node app.js
is:
{}
undefined
Can somebody point me what I'm doing wrong or check whether the sdk is valid.
Thanks!
You can see in the source where V1Server is defined, that it's a class with a constructor. So you need to use the new keyword and pass the arguments for your environment.
https://github.com/versionone/VersionOne.SDK.JavaScript/blob/master/client.coffee#L37
var server = new V1Server('cloud'); //and more if you need
Can you try the sample.js script that I just updated from here:
https://github.com/versionone/VersionOne.SDK.JavaScript/blob/master/sample.js
It pulls in the two modules like this:
var V1Meta = require('./v1meta').V1Meta;
var V1Server = require('./client').V1Server;
var hostname = "www14.v1host.com";
var instance = "v1sdktesting";
var username = "api";
var password = "api";
var port = "443";
var protocol = "https";
var server = new V1Server(hostname, instance, username, password, port, protocol);
var v1 = new V1Meta(server);
v1.query({
from: "Member",
where: {
IsSelf: 'true'
},
select: ['Email', 'Username', 'ID'],
success: function(result) {
console.log(result.Email);
console.log(result.Username);
console.log(result.ID);
},
error: function(err) { // NOTE: this is not working correctly yet, not called...
console.log(err);
}
});
You might have to get the latest and build the JS from CoffeeScript.
I think I was trying out "browserify" last year and that's how the "v1sdk.js" file got generated. But I'm not sure if that's the best approach if you're using node. It's probably better just to do it the way the sample.js file is doing it.
However, I did also check in a change to v1sdk.coffee which property exports the two other modules, just as a convenience. With that, you can look at sample2.js. The only different part there is this, which is more like you were trying to do with your example:
var v1sdk = require('./v1sdk');
var hostname = "www14.v1host.com";
var instance = "v1sdktesting";
var username = "api";
var password = "api";
var port = "443";
var protocol = "https";
var server = new v1sdk.V1Server(hostname, instance, username, password, port, protocol);
var v1 = new v1sdk.V1Meta(server);

Categories

Resources