Tensorflow.js Model only predicting same values - javascript
I have a dataset (which contains both the data and the label) with the size of [299,13], and the model keeps outputting / predicting the same value. This is a binary classification task. How would I make my model predict values which are not constantly the same?
Here is the code (with some dummy data):
var Dataset = tf.tensor([[1,0.491821360184978,9,314,0.504585169147173,542,1231,3213,1,0.267304071302649,3,0.615917680092409,0],
[0,0.72959029133292,3,758,0.402582737085955,400,1788,4599,0,0.532702887951197,4,0.18630897965037,1],
[1,0.198764110760428,5,787,0.65507860022684,887,192,4831,1,0.739456077544426,3,0.100068056951143,1],
[0,0.583574833590476,5,596,0.933996451580092,631,331,811,0,0.258445986493932,7,0.811276729811182,0],
[1,0.701499878184206,8,854,0.0326334179806069,845,470,4930,1,0.825469683527519,1,0.448086959665654,1],
[0,0.954482878414911,2,468,0.736300149681564,557,3110,739,0,0.325783042694677,5,0.43488580142501,1],
[1,0.384845877769,2,662,0.265402742189238,649,384,1158,1,0.484884260891815,2,0.915444292219105,0],
[1,0.379266474923531,9,551,0.275982850450116,1022,3329,1413,1,0.237295089390298,4,0.817104709627837,1],
[1,0.691365367558705,8,549,0.479627221800976,796,3381,495,1,0.37129382411555,9,0.332832739155564,1],
[0,0.433042848178662,5,529,0.545178403950882,842,4768,506,0,0.386370525896832,9,0.189942077251933,0],
[1,0.611272282663452,4,823,0.737901576655264,839,2724,1787,1,0.365032317656007,6,0.884073622694046,0],
[0,0.0084315409129881,5,352,0.76858549557176,476,685,4796,0,0.302944943656102,1,0.849655932794213,1],
[0,0.977380232874908,6,701,0.588833228576897,999,2897,3325,0,0.418024491281536,2,0.631872118440871,1],
[1,0.419601058571829,10,384,0.0157052616592944,1009,4438,113,1,0.909015627566542,1,0.0297684897733232,0],
[0,0.739471449044276,4,836,0.0430176780439737,1030,1456,3932,0,0.331426481315121,6,0.734008754824423,0],
[1,0.00209807072438295,4,352,0.499622407429238,418,1912,4452,1,0.727130871883893,8,0.157427964683612,0],
[1,0.956533819923862,10,681,0.196708599930969,829,4562,1718,1,0.233193195569506,7,0.60582783922237,0],
[1,0.504637155233183,8,809,0.608861975627751,717,130,4194,1,0.134197560919101,6,0.375188428842507,0],
[0,0.747363884375055,1,522,0.868234577182028,849,3529,1192,0,0.0322641640468155,5,0.185973206518818,0],
[0,0.244142898027225,10,402,0.0280582030746698,315,3576,3882,0,0.724916254371562,8,0.062229775169706,1],
[0,0.858414851618448,8,459,0.367325906336267,616,930,3892,0,0.177388425930446,10,0.859824526007041,1],
[1,0.921555604905976,2,863,0.821166873626313,528,1624,1289,1,0.366243396916411,5,0.453840754701258,1],
[1,0.171321120311715,1,524,0.177251413832862,468,1608,3123,1,0.192861821442111,8,0.122983286410146,0],
[0,0.539946042901786,6,692,0.817780349862711,392,1053,4891,0,0.409578972921785,3,0.0453862502541893,1],
[1,0.996848843212564,5,549,0.877740438211017,762,3046,843,1,0.888578696082088,8,0.877971306478434,1],
[0,0.218116987741582,3,655,0.240496962520226,407,1001,1474,0,0.976212355833712,2,0.936396547703282,1]])
var x = Dataset.slice([0, 0], [-1, 12])
var y = Dataset.slice([0, 12], [-1, 1]) y = y.cast('int32').reshape([-1]).oneHot(2) y.print()
const model = tf.sequential({
layers: [
tf.layers.dense({ inputShape: [12], units: 12, activation: "relu6" }),
tf.layers.dense({ units: 56, activation: "tanh" }),
tf.layers.dense({ units: 28, activation: "tanh" }),
tf.layers.dense({ units: 14, activation: "sigmoid" }),
tf.layers.dense({ units: 58, activation: "tanh" }),
tf.layers.dense({ units: 2, activation: "softmax" })
] }) model.summary()
model.compile({
optimizer: tf.train.adam(),
loss: 'categoricalCrossentropy',
metrics: ['accuracy'], });
model.fit(x, y, { batchSize: 3, epochs: 10, shuffle: true }).then(h => {
console.log("Training Complete")
var predictions = model.predict(x)
predictions.print() });
299 samples with 13 features. That might not be enough for model to generalize. In your hidden layers you use tanh, and sigmoid. I suggest using relu. Also you one-hot-encoding your labels to use softmax, that's understandable but you might want to use sigmoid.
If you use sigmoid without one-hot-encoding, then you will have a chance to set some threshold depending on your business problem.
tf.layers.dense({ units: 1, activation: "sigmoid" })
Let's say you set 0.5 threshold for predictions, means if your prediction is bigger than 0.5 then it will belong to second class. But you can adjust it to, say 0.4, to see what happens. You can conclude it by interpreting AUC-ROC curve.
Another thing is about features, they are not scaled properly:
[1,0.00209807072438295,4,352,0.499622407429238,418,1912,4452,1,0.727130871883893,8,0.157427964683612,0]
If they are not scaled properly in a range, then model can give more importance to certain features than the others, or some unexpected behaviors can happen.
I had the same problem. Model is trained but it always predicts the same value. I don't know the exact principle, but I first trained model with fake data and then trained with normal data again, and this problem was solved.
I think it was initialized by training with fake data at first.
I will add example code.
var fake_xs = tf.zeros([10, 7, 7, 256]);
var fake_ys = tf.zeros([10]);
newModel.current.fit(
fake_xs,
fake_ys, {
epochs: 5,
callbacks: {
onEpochEnd: async (epoch, logs) => {
setLoss(logs.loss.toFixed(5));
console.log("LOSS: " + logs.loss.toFixed(5));
},
},
});
const history = await newModel.fit(
datasetForTraining.xs,
datasetForTraining.ys,
{
epochs: epochNum,
batchSize: 16,
callbacks: {
onEpochEnd: async (epoch, logs) => {
setLoss(logs.loss.toFixed(5));
console.log("LOSS: " + logs.loss.toFixed(5));
},
},
}
Related
Reloading DeckGL HexagonLayer when data array changes/Triggering reload for DeckGL HexagonLayer
I'm using DeckGL with React to display some data on an OpenStreetMap. I'm planning on implementing some filters to be able to display different views on the data I have. My main problem is, that I can't figure out how to refresh the data representing layer after filtering the data array. I saw a bunch of people creating a DeckGL-Object in JavaScript, and then using this, to call deck.setProps() but I couldn't figure out how to render this DeckGL-Object by using react. This is my app.js: export default function App({showBorder = false, onTilesLoad = null}) { layers = [ /** * TileLayer ist ein Layer aus Open-Streetmap-Tiles (Anzeigen der Karte) */ new TileLayer({ data: [/*OSM TileServer*/], maxRequests: 20, pickable: true, onViewportLoad: onTilesLoad, autoHighlight: showBorder, highlightColor: [60, 60, 60, 40], minZoom: 0, maxZoom: 19, tileSize: 512 / devicePixelRatio, renderSubLayers: (props) => { const { bbox: {west, south, east, north} } = props.tile; return [ new BitmapLayer(props, { data: null, image: props.data, bounds: [west, south, east, north] }), showBorder && new PathLayer({ id: `${props.id}-border`, visible: props.visible, data: [ [ [west, north], [west, south], [east, south], [east, north], [west, north] ] ], getPath: (d) => d, getColor: [255, 0, 0], widthMinPixels: 4 }) ]; } }), new HexagonLayer({ id: 'hexagon-layer', data: /*JsonDataArray*/, pickable: true, extruded: true, radius: 2000, elevationRange: [25, 500], elevationScale: 200, autoHighlight: true, opacity: 0.2, colorRange: [ [255, 255, 204], [199, 233, 180], [127, 205, 187], [65, 182, 196], [44, 127, 184], [37, 52, 148] ], getElevationHeight: () => 500, getPosition: (d) => d.coordinates, }) ]; return ( <DeckGL layers={layers} views={new MapView({repeat: true})} initialViewState={INITIAL_VIEW_STATE} controller={true} /> ); } Obviously is there a little bit more to my app.js but I don't think the missing parts are important since I just wanna know how I can refresh a layer. I also have a index.html but I don't think it's content is really relevant either since it's only use is to call the App function to render the layers. I just can't find out what to do, to cause a reload of the HexagonLayer. Thanks for your help in advance.
A good approach is using DataFilterExtension. GPU-based data filtering, go this way if you care about performance. For the moment there is a limitation of the extension for HexagonLayer, but maybe using GPUGridLayer can help you in your visualization also. I.e: let's say you want to filter your qualitative data. filterRange needs numeric bounds (which defines whether an object should be rendered), so you can set your bounds as [1, 1] and check if some object matches with you current filter condition, if matches, getFilterValue gets 1, so that object will be rendered, otherwise, not rendered: const [filterCondition, setFilter] = useState(''); useEffect(() => { // dispatch some action to set the filter setFilter('cities'); }, []); new ScatterplotLayer({ ...otherProps, getFilterValue: object => object.properties.target === filterCondition ? 1 : 0, filterRange: [1, 1], extensions: [new DataFilterExtension({ filterSize: 1 })], updateTriggers: { // It's important to tell deck.gl when to update getFilterValue: filterCondition } }); Otherwise updating you data array should be enough. That means a CPU-based data filtering, if your data is not huge, it's okey. Thanks to reactivity should be enough with something like this: const [yourData, setData] = useState([]); useEffect(() => { // dispatch some action to set data setData([newData]); }, []); const layers = [ new HexagonLayer({ ...otherProps, data: yourData }); ]; return ( <DeckGL ...otherProps, layers={layers} /> ); P.D: deck.setProps() is recommended to use in a non-reactive environment
TensorflowJS in a 2-dimension simple problem
I am playing with TensorflowJS and I am surprised by the (bad) results I got. Here is the problem I am working on: you have a 2D square from the top left (0,0) to the bottom right (1,1). Each corner has a color in RGB as follow: top-left: black top-right: red bottom-right: green bottom-left: blue I want to infer the color on a point which is in the square. I have set a simple Tensorflow model. After a simple trainng, I test it on the bottom right... and instead of getting something close to the green, I got bad results. Could you please tell me where I am doing something wrong? Thanks async function test() { tf.setBackend('cpu'); const model = tf.sequential(); model.add(tf.layers.dense({units: 3, inputShape: [2] })); model.compile({loss: 'meanSquaredError', optimizer: 'sgd'}); const xs = tf.tensor([0,0, 1,0, 1,1, 0,1 ], [4, 2]); const ys = tf.tensor([ [ 0, 0, 0 ], // black [ 1, 0, 0 ], // red [ 0, 1, 0 ], // green [ 0, 0, 1 ], // blue ], [4, 3]); await model.fit(xs, ys, {epochs: 5000}); const input = tf.tensor([1,1], [1, 2]); console.log(model.predict(input).dataSync()); } My results: Float32Array(3) [0.25062745809555054, 0.7481716275215149, 0.2501324415206909]
The model is using a linear activation, which can output correct results only if the features and the labels are linearily dependant ( y = ax+b). A different activation needs to be used. Often a model needs to be fine tuned, meaning that a different set of parameters needs to be used until one finds a model with best accuracy - it is called fined tuning. Below is a model with a set of parameters that has a low accuracy. One needs to keep in mind that this is not "the" set of parameters. It is "a" set of parameters. See this answer about how to fine tuned a model. (async() => { const model = tf.sequential(); model.add(tf.layers.dense({units: 18, inputShape: [2]})); model.add(tf.layers.dense({units: 14, activation: 'relu6'})); model.add(tf.layers.dense({units: 3, activation: 'relu6'})); const xs = tf.tensor([0,0, 1,0, 1,1, 0,1 ], [4, 2]); const ys = tf.tensor([ [ 0, 0, 0 ], // black [ 1, 0, 0 ], // red [ 0, 1, 0 ], // green [ 0, 0, 1 ], // blue ], [4, 3]); const optimizer = tf.train.sgd(0.000001) model.compile({loss: 'meanSquaredError', optimizer: 'adam'}); await model.fit(xs, ys, { epochs: 500, callbacks: {onEpochEnd: (epoch, logs) => console.log(logs.loss)} }); const input = tf.tensor([1,1], [1, 2]); console.log(model.predict(input).dataSync()); })() <script src="https://cdn.jsdelivr.net/npm/#tensorflow/tfjs/dist/tf.min.js"> </script> Even if the above model has a low accuracy, resulting in better prediction, the problem solved here seems not to be a regression problem. If the goal is just to pick a color out of three, we rather have a classification problem. The difference would be to have a softmax activation layer on the last layer and the loss function would be a binaryCrossentropy (respectively categoricalCrossentropy) for 2 (respectively more than 2) units on the last layer. (async() => { const model = tf.sequential(); model.add(tf.layers.dense({units: 18, inputShape: [2]})); model.add(tf.layers.dense({units: 14, activation: 'relu6'})); model.add(tf.layers.dense({units: 4, activation: 'softmax'})); const xs = tf.tensor([0,0, 1,0, 1,1, 0,1 ], [4, 2]); const ys = tf.oneHot([0, 1, 2, 3], 4) // 0 for black // 1 for red // 2 for green // 3 for blue ys.print() model.compile({loss: 'categoricalCrossentropy', optimizer: 'sgd'}); await model.fit(xs, ys, { epochs: 100, callbacks: {onEpochEnd: (epoch, logs) => console.log(logs.loss)} }); const input = tf.tensor([1,1], [1, 2]); const output = model.predict(input) output.print() output.argMax(1).print(); // result will be 2 for green })() <script src="https://cdn.jsdelivr.net/npm/#tensorflow/tfjs/dist/tf.min.js"> </script>
How to control range of output in tensorflow js
I have this model: const hidden = tf.layers.dense({ units: 8, inputShape: [58, 8, 8], activation: 'sigmoid' }); const output = tf.layers.dense({ units: 1, activation: 'softmax' }); var model = tf.sequential({ layers: [ hidden, output ] }); Now I want the output to be in the range -1 and 1 how do I achieve that?
The activation function is what determines the output. 'sigmoid' is good for 0 to 1 outputs and 'tanh' is good for -1 to 1, however you choose to use 'softmax' which is normally used for multi-class classification. Regardless when you make your prediction you can remap the values to another array.
DynamicSlope not working in d3 funnel chart
I trying to d3 funnel chart with 0 values its not working properly.I enabled dynamicSlope option is true funnel chart not working. See my example: http://jsfiddle.net/3x265bLj/7/ I added six values l0,l1,l2....L6 and l3 value is 0.Once i enabled dynamicSlope option is true Funnel chart only showing L0,L1,L2 value only but need L3,L4,L5. const data = [ ['l0', 13], ['l1', 7], ['l2', 12], ['l3', 0], ['l4', 8], ['l5', 3] ]; const options = { chart: { width: 200, height: 450, bottomWidth: 1 / 2, bottomPinch: 1, inverted: false, horizontal: false, animate: 0, curve: { enabled: true, height: 20, shade: -0.4 } }, block: { dynamicSlope: true, dynamicHeight: true, highlight: true } }; const chart = new D3Funnel('#funnel'); chart.draw(data, options); Console Issues: Error: <path> attribute d: Expected number, "MNaN,562.5 LNaN,5…".
The Error: <path> attribute d: Expected number, "MNaN,562.5 LNaN,5…". Means The function try to convert data to height format, but you input 0 that means 0 height, the function reject you. what is the purpose of drawing 0 data? axample, if the function can do it, the data will not showing on the chart, because it has 0 height, am i correct? thare is 2 solution for your problem, first you change the data value to 0.01 before draw it var data = [ ['l0', 13], ['l1', 7], ['l2', 12], ['l3', 0], ['l4', 8], ['l5', 3] ]; var newdata = data.map(function (d,i){ var new_d = d if (new_d[1] == 0){ new_d[1] = 0.01 } return new_d }) chart.draw(newdata, options); second, you add the filter function before draw it var data = [ ['l0', 13], ['l1', 7], ['l2', 12], ['l3', 0], ['l4', 8], ['l5', 3] ]; var newdata = data.filter(function (d,i){ return d[1] !== 0 }) chart.draw(newdata, options);
FP: Trees - map, fold, each. How?
I have a working OOP code that recursively renders a composition of graphical elements to a canvas. There's quite a bit to dislike about it and I'm trying to see what a functional version will look like. Sure, one can write a specialised recursive pure function, but as the framework involves similar algorithms, I'd like to: Harness the power of function composition. See how FP - and its data piping paradigm (transforming data through pure functions) - lands itself to more complex structures than lists (trees/graphs) and less trivial algorithms (than say, finding all odd number by sequentially iterating the list). Inspired by Lazy.js, I've started coding and got this far: LazyTree.from( drawing ) .keepNodes( visible ) .keepChildrenOf( nonClipping ) .traverse( log ); But as for map and fold - I have many unanswered questions. Goal Here's a simplified version of the problem I'm trying to solve: Data A composition (hierarchy) of rectangles. The bounds of each are in relative coordinates (to its parent): const drawing = { name: 'Face', bounds: { x: 10, y: 10, w: 100, h: 100 }, children: [{ name: 'Left eye', bounds: { x: 10, y: 10, w: 20, h: 20 }, // Abs: (20, 20, 20, 20) children: [{ name: 'Left pupil', bounds: { x: 5, y: 5, w: 10, h: 10 } // Abs: (25, 25, 10, 10) }] },{ name: 'Right eye', bounds: { x: 70, y: 10, w: 20, h: 20 }, // Abs: (80, 20, 20, 20) children: [{ name: 'Right pupil', bounds: { x: 5, y: 5, w: 10, h: 10 } // Abs: (85, 25, 10, 10) }] }] }; Task - getAbsoluteBounds The task is to convert this composition to one that has absolute coordinates (as shown in the comments). Issues and thoughts Fold? The absolute coordinates of a child is its relative coordinates transposed by its parent absolute coordinates. So a fold with its accumulator are candidates to do this. But fold is associated with catamorphism and verbs like combine, and typically returns a single value. The transformation in question takes a tree and returns an identical structure but with different values - so it sounds more like a map, but one that needs an accumulator. As far as the accumulator goes, it is worth noting that all the children of a specific node should get the same accumulator. For the data above, both Left eye and Right eye should get the same absolute coordinates of Face (as opposed to the Right eye getting the returned accumulator of Left eye in depth-first-traversal). Another thing I'm not clear about is who should be in charge of constructing the output tree. Should it be the high-order functions (fold, map, or whatever), or should it be the aggregator? Stop conditions Related the the previous section, consider all rectangles to clip their children, and the following composition: const drawing = { name: 'Parent', bounds: { x: 10, y: 10, w: 10, h: 10 }, children: [{ name: 'Child', bounds: { x: 1000000, y: 1000000, w: 10, h: 10 }, children: [{ name: 'Grandchild', bounds: { x: 5, y: 5, w: 5, h: 5 } }] }] }; The Child bounds are out-of-bound with relation to its parent (Parent), so branch traversal should stop when traversing to Child (no point traversing to Grandchild). The question is: How can this be implemented with a fold function? One solution is to stop branch traversal when the accumulator returns an agreed valued (say undefined). But this is somewhat a departure from the fold API for lists. Pre and post visit The rendering algorithm involves: fill( shape ); renderChildren( shape ); stroke( shape ); I wonder how this can be achieved with something like traverse() or each(). Should these take 2 callbacks (pre, post)? Traversal strategies Tree traversal may be: Depth or Breadth first. Top-down or Bottom-up (for the latter, see a specialised example for transforming an AST, using reduce). With lists, we have functions like reverse(). Lazy.js allows adding a custom iterator that can then be chained. So it seems the the FP way to handle traversal strategy is a transforming function. Is there anything else to it? Summary I've touched upon a few of the challenges in implementing a rendering algorithm for a tree structure using the data piping model. I question if any other FP approaches would be more appropriate here? And perhaps the data piping model is not fit for these sort of problems. Or perhaps, I should simply forget the APIs one sees in FP libraries (that deal nearly exclusively lists) and create one that is appropriate for the task at hand (eg, having a map function that also involves an accumulator). I couldn't find any FP library dedicated for trees, and information out there is typically limited to very simple problems. So hopefully, someone would reply with something along the lines of 'this is how it should be done'.
As far as I have understood the details you might do as follows. It will proceed traversing those items remaining within the parent's boundaries, converting their coordinates to absolute and rendering them just afterwards. However if the child's boundaries overlaps the parent's boundaries the child and it's descendants are skipped. No conversion to absolute coordinates and rendering are done for those. function render(bounds){ console.log("Rendered:", bounds); } function relToAbs(o, b = {x: 0, y:0, w:Infinity, h:Infinity}, go = true){ go = o.bounds.x < b.w && o.bounds.y < b.h ? (o.bounds.x += b.x, o.bounds.y += b.y, render(o.bounds), go) : !go; o.children && go && (o.children = o.children.map(p => relToAbs(p,o.bounds,go))); return o; } var drawing = { name: 'Face', bounds: { x: 10, y: 10, w: 100, h: 100 }, children: [{ name: 'Left eye', bounds: { x: 200, y: 10, w: 20, h: 20 }, // Abs: (20, 20, 20, 20) children: [{ name: 'Left pupil', bounds: { x: 5, y: 5, w: 10, h: 10 } // Abs: (25, 25, 10, 10) }] }, { name: 'Right eye', bounds: { x: 70, y: 10, w: 20, h: 20 }, // Abs: (80, 20, 20, 20) children: [{ name: 'Right pupil', bounds: { x: 5, y: 5, w: 10, h: 10 } // Abs: (85, 25, 10, 10) }] }] }; console.log(JSON.stringify(relToAbs(drawing),null,2));