The AWS Rekognition Javascript API states that for rekognition.compareFaces(params,...) method, the SourceImage and TargetImage can take Bytes or S3Object. I want to use the Bytes which can be
"Bytes — (Buffer, Typed Array, Blob, String)"
Blob of image bytes up to 5 MBs.
When I pass the Base64 encoded string of the images, the JS SDK is re-encoding again (i.e double encoded). Hence server responding with error saying
{"__type":"InvalidImageFormatException","Message":"Invalid image
encoding"}
Did anyone manage to use the compareFaces JS SDK API using base64 encoded images (not S3Object)? or any JavaScript examples using Bytes param would help.
The technique from this AWS Rekognition JS SDK Invalid image encoding error thread worked.
Convert the base64 image encoding to a ArrayBuffer:
function getBinary(base64Image) {
var binaryImg = atob(base64Image);
var length = binaryImg.length;
var ab = new ArrayBuffer(length);
var ua = new Uint8Array(ab);
for (var i = 0; i < length; i++) {
ua[i] = binaryImg.charCodeAt(i);
}
return ab;
}
Pass into rekognition as Bytes parameter:
var data = canvas.toDataURL('image/jpeg');
var base64Image = data.replace(/^data:image\/(png|jpeg|jpg);base64,/, '');
var imageBytes = getBinary(base64Image);
var rekognitionRequest = {
CollectionId: collectionId,
Image: {
Bytes: imageBytes
}
};
Based on the answer supplied by #Sean, I wanted to add another way to get the bytes from a URL request using axios and passed to rekognition.detectLabels() -- or other various detection methods for Amazon Rekognition.
I went ahead create a promise for fs.readFile that should work with the async/await structure. Then some regex to determine if you need a URL fetch or file read as a fallback.
I've also added a check for Gray and World Of Warcraft for the labels. Not sure if anyone else experiences that but lorempixel seems to throw those labels every once in a while. I've seen them show on an all black image before as well.
/* jshint esversion: 6, node:true, devel: true, undef: true, unused: true */
const AWS = require('aws-sdk'),
axios = require('axios'),
fs = require('fs'),
path = require('path');
// Get credentials from environmental variables.
const {S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY, S3_REGION} = process.env;
// Set AWS credentials.
AWS.config.update({
accessKeyId: S3_ACCESS_KEY,
secretAccessKey: S3_SECRET_ACCESS_KEY,
region: S3_REGION
});
const rekognition = new AWS.Rekognition({
apiVersion: '2016-06-27'
});
startDetection();
// ----------------
async function startDetection() {
let found = {};
found = await detectFromPath(path.join(__dirname, 'test.jpg'));
console.log(found);
found = await detectFromPath('https://upload.wikimedia.org/wikipedia/commons/9/96/Bill_Nye%2C_Barack_Obama_and_Neil_deGrasse_Tyson_selfie_2014.jpg');
console.log(found);
found = await detectFromPath('http://placekitten.com/g/200/300');
console.log(found);
found = await detectFromPath('https://loremflickr.com/g/320/240/text');
console.log(found);
found = await detectFromPath('http://lorempixel.com/400/200/sports/');
console.log(found);
// Sometimes 'Grey' and 'World Of Warcraft' are the only labels...
if (found && found.labels.length === 2 && found.labels.some(i => i.Name === 'Gray') && found.labels.some(i => i.Name === 'World Of Warcraft')) {
console.log('⚠️', '\n\tMaybe this is a bad image...`Gray` and `World Of Warcraft`???\n');
}
}
// ----------------
/**
* #param {string} path URL or filepath on your local machine.
* #param {Number} maxLabels
* #param {Number} minConfidence
* #param {array} attributes
*/
async function detectFromPath(path, maxLabels, minConfidence, attributes) {
// Convert path to base64 Buffer data.
const bytes = (/^https?:\/\//gm.exec(path)) ?
await getBase64BufferFromURL(path) :
await getBase64BufferFromFile(path);
// Invalid data.
if (!bytes)
return {
path,
faces: [],
labels: [],
text: [],
celebs: [],
moderation: []
};
// Pass buffer to rekognition methods.
let labels = await detectLabelsFromBytes(bytes, maxLabels, minConfidence),
text = await detectTextFromBytes(bytes),
faces = await detectFacesFromBytes(bytes, attributes),
celebs = await recognizeCelebritiesFromBytes(bytes),
moderation = await detectModerationLabelsFromBytes(bytes, minConfidence);
// Filter out specific values.
labels = labels && labels.Labels ? labels.Labels : [];
faces = faces && faces.FaceDetails ? faces.FaceDetails : [];
text = text && text.TextDetections ? text.TextDetections.map(i => i.DetectedText) : [];
celebs = celebs && celebs.CelebrityFaces ? celebs.CelebrityFaces.map(i => ({
Name: i.Name,
MatchConfidence: i.MatchConfidence
})) : [];
moderation = moderation && moderation.ModerationLabels ? moderation.ModerationLabels.map(i => ({
Name: i.Name,
Confidence: i.Confidence
})) : [];
// Return collection.
return {
path,
faces,
labels,
text,
celebs,
moderation
};
}
/**
* https://nodejs.org/api/fs.html#fs_fs_readfile_path_options_callback
*
* #param {string} filename
*/
function getBase64BufferFromFile(filename) {
return (new Promise(function(resolve, reject) {
fs.readFile(filename, 'base64', (err, data) => {
if (err) return reject(err);
resolve(new Buffer(data, 'base64'));
});
})).catch(error => {
console.log('[ERROR]', error);
});
}
/**
* https://github.com/axios/axios
*
* #param {string} url
*/
function getBase64BufferFromURL(url) {
return axios
.get(url, {
responseType: 'arraybuffer'
})
.then(response => new Buffer(response.data, 'base64'))
.catch(error => {
console.log('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/labels.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#detectLabels-property
*
* #param {Buffer} bytes
* #param {Number} maxLabels
* #param {Number} minConfidence
*/
function detectLabelsFromBytes(bytes, maxLabels, minConfidence) {
return rekognition
.detectLabels({
Image: {
Bytes: bytes
},
MaxLabels: typeof maxLabels !== 'undefined' ? maxLabels : 1000,
MinConfidence: typeof minConfidence !== 'undefined' ? minConfidence : 50.0
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/text-detection.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#detectText-property
*
* #param {Buffer} bytes
*/
function detectTextFromBytes(bytes) {
return rekognition
.detectText({
Image: {
Bytes: bytes
}
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/celebrities.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#recognizeCelebrities-property
*
* #param {Buffer} bytes
*/
function recognizeCelebritiesFromBytes(bytes) {
return rekognition
.recognizeCelebrities({
Image: {
Bytes: bytes
}
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/moderation.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#detectModerationLabels-property
*
* #param {Buffer} bytes
* #param {Number} minConfidence
*/
function detectModerationLabelsFromBytes(bytes, minConfidence) {
return rekognition
.detectModerationLabels({
Image: {
Bytes: bytes
},
MinConfidence: typeof minConfidence !== 'undefined' ? minConfidence : 60.0
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/faces.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#detectFaces-property
*
* #param {Buffer} bytes
* #param {array} attributes Attributes can be "ALL" or "DEFAULT". "DEFAULT" includes: BoundingBox, Confidence, Landmarks, Pose, and Quality.
*/
function detectFacesFromBytes(bytes, attributes) {
return rekognition
.detectFaces({
Image: {
Bytes: bytes
},
Attributes: typeof attributes !== 'undefined' ? attributes : ['ALL']
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
/**
* https://docs.aws.amazon.com/rekognition/latest/dg/API_CompareFaces.html
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Rekognition.html#compareFaces-property
*
* #param {Buffer} sourceBytes
* #param {Buffer} targetBytes
* #param {Number} similarityThreshold
*/
function compareFaces(sourceBytes, targetBytes, similarityThreshold) {
return rekognition
.detectModerationLabels({
SourceImage: {
Bytes: sourceBytes
},
TargetImage: {
Bytes: targetBytes
},
SimilarityThreshold: typeof similarityThreshold !== 'undefined' ? similarityThreshold : 0.0
})
.promise()
.catch(error => {
console.error('[ERROR]', error);
});
}
Resources:
https://github.com/axios/axios
https://docs.aws.amazon.com/rekognition/latest/dg/labels.html
https://docs.aws.amazon.com/rekognition/latest/dg/text-detection.html
https://docs.aws.amazon.com/rekognition/latest/dg/celebrities.html
https://docs.aws.amazon.com/rekognition/latest/dg/moderation.html
https://docs.aws.amazon.com/rekognition/latest/dg/faces.html
https://docs.aws.amazon.com/rekognition/latest/dg/API_CompareFaces.html
https://docs.aws.amazon.com/rekognition/latest/dg/image-bytes-javascript.html
AWS JavaScript SDK Reference:
detectLabels
detectText
recognizeCelebrities
detectModerationLabels
detectFaces
compareFaces
Reference:
Download an image using Axios and convert it to base64
Upload a binary file to S3 using AWS SDK for Node.js
AWS Rekognition JS SDK Invalid image encoding error
Pipe a stream to s3.upload()
untarring files to S3 fails, not sure why
Using Promises with fs.readFile in a loop
How do I return the response from an asynchronous call?
NodeJS UnhandledPromiseRejectionWarning
How do I check whether an array contains a string in TypeScript?
Do you need to use path.join in node.js?
I was running into a similar issue when reading in a file in Node as a byte array buffer and sending it to Rekognition.
I solved it by instead reading in the base64 representation, then turning it into a buffer like this:
const aws = require('aws-sdk');
const fs = require('fs');
const rekognition = new aws.Rekognition({
apiVersion: '2016-06-27'
});
// pull base64 representation of image from file system (or somewhere else)
fs.readFile('./test.jpg', 'base64', (err, data) => {
// create a new buffer out of the string passed to us by fs.readFile()
const buffer = Buffer.from(data, 'base64');
// now that we have things in the right type, send it to rekognition
rekognition.detectLabels({
Image: {
Bytes: buffer
}
}).promise()
.then((res) => {
// print out the labels that rekognition sent back
console.log(res);
});
});
This might also be relevant to people getting the: Expected params.Image.Bytes to be a string, Buffer, Stream, Blob, or typed array object message.
It seems that converting the string to a buffer works more consistently but documentation on it is very hard to find.
For Node, you can use this to convert the params from the string (make sure you take off the data... up to the ",".
var params = {
CollectionId: collectionId,
Image: {
Bytes: new Buffer(imageBytes, 'base64')
}
};
In normal JS, you'd want can convert with atob and pass the Array buffer using this code:
function getBinary(base64Image) {
var binaryImg = Buffer.from(base64Image, 'base64').toString();
var length = binaryImg.length;
var ab = new ArrayBuffer(length);
var ua = new Uint8Array(ab);
for (var i = 0; i < length; i++) {
ua[i] = binaryImg.charCodeAt(i);
}
return ab;
}
I had same issue you had and i'm going to tell you how i solved it.
Amazon Rekognition supports image type are JPEG and PNG
It means that if you input image file with encoding other formats like webp, you always get same that error.
After changing image formats which not encoding with jpeg or png to jpeg, i could solved that problem.
Hope you to solve this problem!
Related
Problem Statement:
I am getting a text file where the byte array of a binary file is stored in comma-separated values in a single line.
for Eg: 82,19,176,106,0,0,0,4,247,35,186,20,87,143,18,120,44,76,100
The string is very long and everything is in a single line , i have no control on this because it depends on binary file size.
I have to read this byte array and convert it back to the original binary file.
Implemented Logic:
using Node.js and FS
var instream = fs.createReadStream('stream1.txt',{ highWaterMark: 1 * 1024 , encoding: 'utf8' });
instream.on("data", function(line) {
lineCount++;
var splitArray = line.split(',');
var uintArray = new Uint8Array(splitArray);
chunks.push(uintArray);
console.log(lineCount);
});
instream.on("end", function() {
var fullUint8Array = concatenate(chunks);
fs.writeFile("abc.prt", Buffer.from(fullUint8Array), function (err) {
if (err) {
console.log(err);
} else {
console.log("Done");
}
});
});
I am not able to get the original binary file. It is always getting corrupted.
If I am reading a file in single chunk and try the above solution it will work. But always this cannot be done because if try to convert a very big string array to uint8Array it gives memory error.
But when I read the string in chunks and do I am not able to get the binary file.
I am not able to get what I am doing wrong. Technology to be used Node.JS, javascript.
Updated The Question with samples
This is a sample stream. (stream1.txt)
This is the original binary file which is needed as output after reading stream1.txt.
Link to the files
Code for concatenate
//For joining uInt8Arrays
function concatenate(arrays) {
let totalLength = 0;
for (const arr of arrays) {
totalLength += arr.length;
}
const result = new Uint8Array(totalLength);
let offset = 0;
for (const arr of arrays) {
result.set(arr, offset);
offset += arr.length;
}
return result;
}
I am not able to get the original binary file. It is always getting
corrupted.
No, it's not corrupted. The string was splitted by comma, its unencoded values were put in Uint8Array and later on file is saved with that data.
This is more or less what's happening
let line = "82,19,176,106,0,0,0,4,247,35,186,20,87,143,18,120,44,76,100";
let result = line.split(',').map(pr => String.fromCharCode(Number(pr))).join('');
console.log(result);
// Solution 1
let encoded = line.split('').map(npr => npr.charCodeAt(0));
result = encoded.map(pr => String.fromCharCode(pr)).join('');
console.log(result);
// Solution 2
const encoder = new TextEncoder();
const decoder = new TextDecoder();
encoded = encoder.encode(line);
result = decoder.decode(encoded);
console.log(result);
If you apply code above it might look like this:
const fs = require('fs');
let lineCount = 0;
let chunks = [];
const encoder = new TextEncoder();
function concatenate(chunks) {
return chunks.reduce((acc, chunk) => {
return new Uint8Array([...acc, ...chunk]);
}, new Uint8Array([]));
}
var instream = fs.createReadStream('stream1.txt',{ highWaterMark: 1 * 1024 , encoding: 'utf8' });
instream.on("data", function(line) {
lineCount++;
var splitArray = line.split(',');
var uintArray = encoder.encode(line);
chunks.push(uintArray);
});
instream.on("end", function() {
var fullUint8Array = concatenate(chunks);
fs.writeFile("abc.prt", Buffer.from(fullUint8Array, 'utf-8'), function (err) {
if (err) {
console.log(err);
} else {
console.log("Done");
}
});
});
If I am reading a file in single chunk and try the above solution it
will work. But always this cannot be done because if try to convert a
very big string array to uint8Array it gives memory error.
You can reduce memory footprint by creating write stream and putting data immediately there.
Example
const fs = require('fs');
let lineCount = 0;
let chunks = [];
const encoder = new TextEncoder();
var outputStream = fs.createWriteStream("abc.prt");
var inputStream = fs.createReadStream('stream1.txt',{ highWaterMark: 1 * 1024 , encoding: 'utf8' });
outputStream.on("open", function() {
inputStream.on("data", function(line) {
lineCount++;
var splitArray = line.split(',');
var uintArray = encoder.encode(line);
outputStream.write(uintArray);
});
inputStream.on("end", function() {
outputStream.close();
})
})
If you are reading a file in chunks, you need to adjust your splitting logic to cope with that. Your code probably does produce the corrupt results because an input string like 82,19,176,106,0,0 could be read as 82,19,17+6,106,0,0 or 82,19+,176,106,+0,0.
Instead, you need to make sure that you always read whole byte values. If it is not followed by a comma or the eof, you cannot process it yet. I'd recommend to do this with a Transform stream (see also this article about the technique):
import { createReadStream, createWriteStream } from 'fs';
import { pipeline, Transform } from 'stream';
const parseCommaSeparatedBytes = new Transform({
transform(chunk, encoding, callback) {
const prefix = this.leftover || '';
const string = prefix + chunk.toString();
// TODO: validate inputs to be numeric and in the byte range
const splitArray = string.split(',');
if (splitArray.length)
this.leftover = splitArray.pop();
this.push(new Uint8Array(splitArray));
callback();
},
flush(callback) {
const last = this.leftover || '';
if (last.length)
this.push(new Uint8Array([last]));
callback();
},
});
const instream = createReadStream('stream1.txt', {
highWaterMark: 1024,
encoding: 'utf8'
});
const outstream = createWriteStream('abc.prt');
pipeline(instream, parseCommaSeparatedBytes, outstream, function (err) {
if (err) {
console.error(err);
} else {
console.log("Done");
}
});
Currently my nodeJS code running on my local machine takes a JSON Object, creates a zip file locally of the JSON Data and then uploads that newly created zip file to a destination web-service. The JSON Object passed in as a parameter is the result of query a db and aggregating the results into 1 large JSON Object.
The problem I am facing is that I am unsure how to remove the need for the creation of the zip file locally prior to making the http post request.
/**
* Makes a https POST call to a specific destination endpoint to submit a form.
* #param {string} hostname - The hostname of the destination endpoint
* #param {string} path - The path on the destination endpoint.
* #param {Object} formData - the form of key-value pairs to submit.
* #param {Object} formHeaders - the headers that need to be attached to the http request.
* #param {Object} jsonData - a large JSON Object that needs to be compressed prior to sending.
* #param {Object} logger - a logging mechanism.
*/
function uploadDataToServer(hostname, path, formData, formHeaders, jsonData, logger){
var jsonData = {};// large amout of JSON Data HERE.
var hostname = ""; // destination serverless
var path = ""; //destination path
let url = 'https://'+hostname+path;
return new Promise((resolve, reject) => {
var zip = new JSZip();
zip.file("aggResults.json", JSON.stringify(jsonData));
const zipFile = zip
.generateNodeStream({type:'nodebuffer',streamFiles:true})
.pipe(fs.createWriteStream('test.zip'))
.on('finish', function () {
const readStream = fs.createReadStream('test.zip');
console.log("test.zip written.");
request.post(url, {
headers:formHeaders,
formData: {
uploadFile:readStream
},
}, function (err, resp, body) {
if (err) {
reject(err);
} else {
resolve(body);
}
})
})
});
}
I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.
The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
It's written as an Angular service called from a controller, I think just the service code is pertinent:
(function() {
'use strict';
angular
.module('app.core')
.factory('blobUploadService',
[
'$http', 'stringUtilities',
blobUploadService
]);
function blobUploadService($http, stringUtilities) {
var defaultBlockSize = 1024 * 1024; // Default to 1024KB
var stopWatch = {};
var state = {};
var initializeState = function(config) {
var blockSize = defaultBlockSize;
if (config.blockSize) blockSize = config.blockSize;
var maxBlockSize = blockSize;
var numberOfBlocks = 1;
var file = config.file;
var fileSize = file.size;
if (fileSize < blockSize) {
maxBlockSize = fileSize;
}
if (fileSize % maxBlockSize === 0) {
numberOfBlocks = fileSize / maxBlockSize;
} else {
numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
}
return {
maxBlockSize: maxBlockSize,
numberOfBlocks: numberOfBlocks,
totalBytesRemaining: fileSize,
currentFilePointer: 0,
blockIds: new Array(),
blockIdPrefix: 'block-',
bytesUploaded: 0,
submitUri: null,
file: file,
baseUrl: config.baseUrl,
sasToken: config.sasToken,
fileUrl: config.baseUrl + config.sasToken,
progress: config.progress,
complete: config.complete,
error: config.error,
cancelled: false
};
};
/* config: {
baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
sasToken: // Shared access signature querystring key/value prefixed with ?,
file: // File object using the HTML5 File API,
progress: // progress callback function,
complete: // complete callback function,
error: // error callback function,
blockSize: // Use this to override the defaultBlockSize
} */
var upload = function(config) {
state = initializeState(config);
var reader = new FileReader();
reader.onloadend = function(evt) {
if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
$http.put(uri,
requestData,
{
headers: {
'x-ms-blob-type': 'BlockBlob',
'Content-Type': state.file.type
},
transformRequest: []
})
.success(function(data, status, headers, config) {
state.bytesUploaded += requestData.length;
var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
).toFixed(2);
if (state.progress) state.progress(percentComplete, data, status, headers, config);
uploadFileInBlocks(reader, state);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
});
}
};
uploadFileInBlocks(reader, state);
return {
cancel: function() {
state.cancelled = true;
}
};
};
function cancel() {
stopWatch = {};
state.cancelled = true;
return true;
}
function startStopWatch(handle) {
if (stopWatch[handle] === undefined) {
stopWatch[handle] = {};
stopWatch[handle].start = Date.now();
}
}
function stopStopWatch(handle) {
stopWatch[handle].stop = Date.now();
var duration = stopWatch[handle].stop - stopWatch[handle].start;
delete stopWatch[handle];
return duration;
}
var commitBlockList = function(state) {
var uri = state.fileUrl + '&comp=blocklist';
var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < state.blockIds.length; i++) {
requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
}
requestBody += '</BlockList>';
$http.put(uri,
requestBody,
{
headers: {
'x-ms-blob-content-type': state.file.type
}
})
.success(function(data, status, headers, config) {
if (state.complete) state.complete(data, status, headers, config);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
// called asynchronously if an error occurs
// or server returns response with an error status.
});
};
var uploadFileInBlocks = function(reader, state) {
if (!state.cancelled) {
if (state.totalBytesRemaining > 0) {
var fileContent = state.file.slice(state.currentFilePointer,
state.currentFilePointer + state.maxBlockSize);
var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);
state.blockIds.push(btoa(blockId));
reader.readAsArrayBuffer(fileContent);
state.currentFilePointer += state.maxBlockSize;
state.totalBytesRemaining -= state.maxBlockSize;
if (state.totalBytesRemaining < state.maxBlockSize) {
state.maxBlockSize = state.totalBytesRemaining;
}
} else {
commitBlockList(state);
}
}
};
return {
upload: upload,
cancel: cancel,
startStopWatch: startStopWatch,
stopStopWatch: stopStopWatch
};
};
})();
Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.
I should say my solution is heavily based on Gaurav Mantri's blog post here:
http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480
I can't see any obvious memory leaks or things I can change to help
garbage collection. I store the block IDs in an array so obviously
there will be some memory creeep but this shouldn't be massive. It's
almost as if the File API is holding the whole file it slices into
memory.
You are correct. The new Blobs created by .slice() are being held in memory.
The solution is to call Blob.prototype.close() on the Blob reference when processing Blob or File object is complete.
Note also, at javascript at Question also creates a new instance of FileReader if upload function is called more than once.
4.3.1. The slice method
The slice() method returns a new Blob object with bytes ranging
from the optional start parameter up to but not including the
optional end parameter, and with a type attribute that is the
value of the optional contentType parameter.
Blob instances exist for the life of document. Though Blob should be garbage collected once removed from Blob URL Store
9.6. Lifetime of Blob URLs
Note: User agents are free to garbage collect resources removed from
the Blob URL Store.
Each Blob must have an internal snapshot state, which must be
initially set to the state of the underlying storage, if any such
underlying storage exists, and must be preserved through
StructuredClone. Further normative definition of snapshot state can
be found for Files.
4.3.2. The close method
The close() method is said to close a Blob, and must act as
follows:
If the readability state of the context object is CLOSED, terminate this algorithm.
Otherwise, set the readability state of the context object to CLOSED.
If the context object has an entry in the Blob URL Store, remove the entry that corresponds to the context object.
If Blob object is passed to URL.createObjectURL(), call URL.revokeObjectURL() on Blob or File object, then call .close().
The revokeObjectURL(url) static method
Revokes the Blob URL provided in the string url by removing the corresponding entry from the Blob URL Store. This method must act
as follows:
1. If the url refers to a Blob that has a readability state of CLOSED OR if the value provided for the url argument is
not a Blob URL, OR if the value provided for the url argument does
not have an entry in the Blob URL Store, this method call does
nothing. User agents may display a message on the error console.
2. Otherwise, user agents must remove the entry from the Blob URL Store for url.
You can view the result of these calls by opening
chrome://blob-internals
reviewing details of before and after calls which create Blob and close Blob.
For example, from
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3
to
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
following call to .close(). Similarly from
blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc
An alternative approach could be to send file as an ArrayBuffer or chunks of array buffers. Then re-assemble the file at server.
Or you can call FileReader constructor, FileReader.prototype.readAsArrayBuffer(), and load event of FileReader each once.
At load event of FileReader pass ArrayBuffer to Uint8Array, use ReadableStream, TypedArray.prototype.subarray(), .getReader(), .read() to get N chunks of ArrayBuffer as a TypedArray at pull from Uint8Array. When N chunks equaling .byteLength of ArrayBuffer have been processed, pass array of Uint8Arrays to Blob constructor to recombine file parts into single file at browser; then send Blob to server.
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<input id="file" type="file">
<br>
<progress value="0"></progress>
<br>
<output for="file"><img alt="preview"></output>
<script type="text/javascript">
const [input, output, img, progress, fr, handleError, CHUNK] = [
document.querySelector("input[type='file']")
, document.querySelector("output[for='file']")
, document.querySelector("output img")
, document.querySelector("progress")
, new FileReader
, (err) => console.log(err)
, 1024 * 1024
];
progress.addEventListener("progress", e => {
progress.value = e.detail.value;
e.detail.promise();
});
let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];
input.onchange = () => {
NEXT = CURR = progress.value = progress.max = chunks.length = 0;
if (url) {
URL.revokeObjectURL(url);
if (blob.hasOwnProperty("close")) {
blob.close();
}
}
if (input.files.length) {
console.log(input.files[0]);
progress.max = input.files[0].size;
progress.step = progress.max / CHUNK;
fr.readAsArrayBuffer(input.files[0]);
}
}
fr.onload = () => {
const VIEW = new Uint8Array(fr.result);
const LEN = VIEW.byteLength;
const {type, name:filename} = input.files[0];
const stream = new ReadableStream({
pull(controller) {
if (NEXT < LEN) {
controller
.enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
NEXT += CHUNK;
} else {
controller.close();
}
},
cancel(reason) {
console.log(reason);
throw new Error(reason);
}
});
const [reader, processData] = [
stream.getReader()
, ({value, done}) => {
if (done) {
return reader.closed.then(() => chunks);
}
chunks.push(value);
return new Promise(resolve => {
progress.dispatchEvent(
new CustomEvent("progress", {
detail:{
value:CURR += value.byteLength,
promise:resolve
}
})
);
})
.then(() => reader.read().then(data => processData(data)))
.catch(e => reader.cancel(e))
}
];
reader.read()
.then(data => processData(data))
.then(data => {
blob = new Blob(data, {type});
console.log("complete", data, blob);
if (/image/.test(type)) {
url = URL.createObjectURL(blob);
img.onload = () => {
img.title = filename;
input.value = "";
}
img.src = url;
} else {
input.value = "";
}
})
.catch(e => handleError(e))
}
</script>
</body>
</html>
plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview
You can also use utilize fetch()
fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))
To transmit body for a request request, run these
steps:
Let body be request’s body.
If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.
Let read be the result of reading a chunk from body’s stream.
When read is fulfilled with an object whose done property is false and whose value property is a Uint8Array object, run these
substeps:
Let bytes be the byte sequence represented by the Uint8Array object.
Transmit bytes.
Increase body’s transmitted bytes by bytes’s length.
Run the above step again.
When read is fulfilled with an object whose done property is true, queue a fetch task on request to process request end-of-body
for request.
When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing
fetch with reason fatal.
See also
Progress indicators for fetch?
Fetch with ReadableStream
I want to know how to save file in local disk? Like i want to save file in c: drive using meteor. Help me to achieve this.
Thanks in advance.
You can pass the data on the server methods than use file system you can write the file to a local disk.
Client side code :
Template code:
<template name="example">
<input type=file />
</template>
Template event code:
Template.example.events({
'change input': function(ev) {
_.each(ev.srcElement.files, function(file) {
Meteor.saveFile(file, file.name);
});
}
});
Meteor.saveFile function code:
/**
* #blob (https://developer.mozilla.org/en-US/docs/DOM/Blob)
* #name the file's name
* #type the file's type: binary, text (https://developer.mozilla.org/en-US/docs/DOM/FileReader#Methods)
*
* TODO Support other encodings: https://developer.mozilla.org/en-US/docs/DOM/FileReader#Methods
* ArrayBuffer / DataURL (base64)
*/
Meteor.saveFile = function(blob, name, path, type, callback) {
var fileReader = new FileReader(),
method, encoding = 'binary', type = type || 'binary';
switch (type) {
case 'text':
// TODO Is this needed? If we're uploading content from file, yes, but if it's from an input/textarea I think not...
method = 'readAsText';
encoding = 'utf8';
break;
case 'binary':
method = 'readAsBinaryString';
encoding = 'binary';
break;
default:
method = 'readAsBinaryString';
encoding = 'binary';
break;
}
fileReader.onload = function(file) {
Meteor.call('saveFile', file.srcElement.result, name, path, encoding, callback);
}
fileReader[method](blob);
}
Server side code :
/**
* TODO support other encodings:
* http://stackoverflow.com/questions/7329128/how-to-write-binary-data-to-a-file-using-node-js
*/
Meteor.methods({
saveFile: function(blob, name, path, encoding) {
var path = cleanPath(path), fs = __meteor_bootstrap__.require('fs'),
name = cleanName(name || 'file'), encoding = encoding || 'binary',
chroot = Meteor.chroot || 'public';
// Clean up the path. Remove any initial and final '/' -we prefix them-,
// any sort of attempt to go to the parent directory '..' and any empty directories in
// between '/////' - which may happen after removing '..'
path = chroot + (path ? '/' + path + '/' : '/');
// TODO Add file existance checks, etc...
fs.writeFile(path + name, blob, encoding, function(err) {
if (err) {
throw (new Meteor.Error(500, 'Failed to save file.', err));
} else {
console.log('The file ' + name + ' (' + encoding + ') was saved to ' + path);
}
});
function cleanPath(str) {
if (str) {
return str.replace(/\.\./g,'').replace(/\/+/g,'').
replace(/^\/+/,'').replace(/\/+$/,'');
}
}
function cleanName(str) {
return str.replace(/\.\./g,'').replace(/\//g,'');
}
}
});
How can I decode the payload of JWT using JavaScript? Without a library. So the token just returns a payload object that can consumed by my front-end app.
Example token: xxxxxxxxx.XXXXXXXX.xxxxxxxx
And the result is the payload:
{exp: 10012016 name: john doe, scope:['admin']}
Note: this does not validate the signature, it just extracts the JSON payload from the token, which could have been tampered with.
Browser
Working unicode text JWT parser function:
function parseJwt (token) {
var base64Url = token.split('.')[1];
var base64 = base64Url.replace(/-/g, '+').replace(/_/g, '/');
var jsonPayload = decodeURIComponent(window.atob(base64).split('').map(function(c) {
return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
}).join(''));
return JSON.parse(jsonPayload);
}
JWT uses base64url (RFC 4648 §5), so using only atob (which uses base64) isn't enough.
Node.js
function parseJwt (token) {
return JSON.parse(Buffer.from(token.split('.')[1], 'base64').toString());
}
Simple function with try - catch
const parseJwt = (token) => {
try {
return JSON.parse(atob(token.split('.')[1]));
} catch (e) {
return null;
}
};
Thanks!
You can use jwt-decode, so then you could write:
import jwt_decode from 'jwt-decode';
var token = 'eyJ0eXAiO.../// jwt token';
var decoded = jwt_decode(token);
console.log(decoded);
/*{exp: 10012016 name: john doe, scope:['admin']}*/
you can use pure javascript atob() function to decode token into a string:
atob(token.split('.')[1]);
or parse directly it into a json object:
JSON.parse(atob(token.split('.')[1]));
read about atob() and btoa() built-in javascript functions Base64 encoding and decoding - Web APIs | MDN.
function parseJwt(token) {
var base64Payload = token.split('.')[1];
var payload = Buffer.from(base64Payload, 'base64');
return JSON.parse(payload.toString());
}
let payload= parseJwt("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c");
console.log("payload:- ", payload);
If using node, you might have to use buffer package:
npm install buffer
var Buffer = require('buffer/').Buffer
As "window" object is not present in nodejs environment,
we could use the following lines of code :
let base64Url = token.split('.')[1]; // token you get
let base64 = base64Url.replace('-', '+').replace('_', '/');
let decodedData = JSON.parse(Buffer.from(base64, 'base64').toString('binary'));
It's working for me perfectly. Hope it helps.
If you're using Typescript or vanilla JavaScript, here's a zero-dependency, ready to copy-paste in your project simple function (building on #Rajan Maharjan 's answer).
This answer is particularly good, not only because it does not depend on any npm module, but also because it does not depend an any node.js built-in module (like Buffer) that some other solutions here are using and of course would fail in the browser (unless polyfilled, but there's no reason to do that in the first place). Additionally JSON.parse can fail at runtime and this version (especially in Typescript) will force handling of that. The JSDoc annotations will make future maintainers of your code thankful. :)
/**
* Returns a JS object representation of a Javascript Web Token from its common encoded
* string form.
*
* #template T the expected shape of the parsed token
* #param {string} token a Javascript Web Token in base64 encoded, `.` separated form
* #returns {(T | undefined)} an object-representation of the token
* or undefined if parsing failed
*/
export function getParsedJwt<T extends object = { [k: string]: string | number }>(
token: string,
): T | undefined {
try {
return JSON.parse(atob(token.split('.')[1]))
} catch {
return undefined
}
}
For completion, here's the vanilla javascript version too:
/**
* Returns a JS object representation of a Javascript Web Token from its common encoded
* string form.
*
* #param {string} token a Javascript Web Token in base64 encoded, `.` separated form
* #returns {(object | undefined)} an object-representation of the token
* or undefined if parsing failed
*/
export function getParsedJwt(token) {
try {
return JSON.parse(atob(token.split('.')[1]))
} catch (error) {
return undefined
}
}
If you use Node.JS,
You can use the native Buffer module by doing :
const token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWUsImp0aSI6ImU3YjQ0Mjc4LTZlZDYtNDJlZC05MTZmLWFjZDQzNzhkM2U0YSIsImlhdCI6MTU5NTg3NzUxOCwiZXhwIjoxNTk1ODgxMTE4fQ.WXyDlDMMSJAjOFF9oAU9JrRHg2wio-WolWAkAaY3kg4';
const tokenDecodablePart = token.split('.')[1];
const decoded = Buffer.from(tokenDecodablePart, 'base64').toString();
console.log(decoded)
And you're good to go :-)
#Peheje will work, but you will have problem with unicode.
To fix it I use the code on https://stackoverflow.com/a/30106551/5277071;
let b64DecodeUnicode = str =>
decodeURIComponent(
Array.prototype.map.call(atob(str), c =>
'%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2)
).join(''))
let parseJwt = token =>
JSON.parse(
b64DecodeUnicode(
token.split('.')[1].replace('-', '+').replace('_', '/')
)
)
let form = document.getElementById("form")
form.addEventListener("submit", (e) => {
form.out.value = JSON.stringify(
parseJwt(form.jwt.value)
)
e.preventDefault();
})
textarea{width:300px; height:60px; display:block}
<form id="form" action="parse">
<textarea name="jwt">eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkrDtGhuIETDs8OoIiwiYWRtaW4iOnRydWV9.469tBeJmYLERjlKi9u6gylb-2NsjHLC_6kZNdtoOGsA</textarea>
<textarea name="out"></textarea>
<input type="submit" value="parse" />
</form>
I use this function to get payload , header , exp(Expiration Time), iat (Issued At) based on this answer
function parseJwt(token) {
try {
// Get Token Header
const base64HeaderUrl = token.split('.')[0];
const base64Header = base64HeaderUrl.replace('-', '+').replace('_', '/');
const headerData = JSON.parse(window.atob(base64Header));
// Get Token payload and date's
const base64Url = token.split('.')[1];
const base64 = base64Url.replace('-', '+').replace('_', '/');
const dataJWT = JSON.parse(window.atob(base64));
dataJWT.header = headerData;
// TODO: add expiration at check ...
return dataJWT;
} catch (err) {
return false;
}
}
const jwtDecoded = parseJwt('YOUR_TOKEN') ;
if(jwtDecoded)
{
console.log(jwtDecoded)
}
If using node.js 16 or later, you can use the built-in base64url encoder/decoder.
let payload = JSON.parse(Buffer.from(token.split(".")[1], "base64url"));
I found this code at jwt.io and it works well.
//this is used to parse base64
function url_base64_decode(str) {
var output = str.replace(/-/g, '+').replace(/_/g, '/');
switch (output.length % 4) {
case 0:
break;
case 2:
output += '==';
break;
case 3:
output += '=';
break;
default:
throw 'Illegal base64url string!';
}
var result = window.atob(output); //polifyll https://github.com/davidchambers/Base64.js
try{
return decodeURIComponent(escape(result));
} catch (err) {
return result;
}
}
In some cases(certain development platforms),
the best answer(for now) faces a problem of invalid base64 length.
So, I needed a more stable way.
I hope it would help you.
You can define & use this one liner func:
jwtDecode = b => JSON.parse(Buffer.from(b.split('.')[1], 'base64').toString('binary'));
all features of jwt.io doesn't support all languages. In NodeJs you can use
var decoded = jwt.decode(token);
Answer based from GitHub - auth0/jwt-decode. Altered the input/output to include string splitting and return object { header, payload, signature } so you can just pass the whole token.
var jwtDecode = function (jwt) {
function b64DecodeUnicode(str) {
return decodeURIComponent(atob(str).replace(/(.)/g, function (m, p) {
var code = p.charCodeAt(0).toString(16).toUpperCase();
if (code.length < 2) {
code = '0' + code;
}
return '%' + code;
}));
}
function decode(str) {
var output = str.replace(/-/g, "+").replace(/_/g, "/");
switch (output.length % 4) {
case 0:
break;
case 2:
output += "==";
break;
case 3:
output += "=";
break;
default:
throw "Illegal base64url string!";
}
try {
return b64DecodeUnicode(output);
} catch (err) {
return atob(output);
}
}
var jwtArray = jwt.split('.');
return {
header: decode(jwtArray[0]),
payload: decode(jwtArray[1]),
signature: decode(jwtArray[2])
};
};
Simple NodeJS Solution for Decoding a JSON Web Token (JWT)
function decodeTokenComponent(value) {
const buff = new Buffer(value, 'base64')
const text = buff.toString('ascii')
return JSON.parse(text)
}
const token = 'xxxxxxxxx.XXXXXXXX.xxxxxxxx'
const [headerEncoded, payloadEncoded, signature] = token.split('.')
const [header, payload] = [headerEncoded, payloadEncoded].map(decodeTokenComponent)
console.log(`header: ${header}`)
console.log(`payload: ${payload}`)
console.log(`signature: ${signature}`)
In Node.js (TypeScript):
import { TextDecoder } from 'util';
function decode(jwt: string) {
const { 0: encodedHeader, 1: encodedPayload, 2: signature, length } = jwt.split('.');
if (length !== 3) {
throw new TypeError('Invalid JWT');
}
const decode = (input: string): JSON => { return JSON.parse(new TextDecoder().decode(new Uint8Array(Buffer.from(input, 'base64')))); };
return { header: decode(encodedHeader), payload: decode(encodedPayload), signature: signature };
}
With jose by panva on GitHub, you could use the minimal import { decode as base64Decode } from 'jose/util/base64url' and replace new Uint8Array(Buffer.from(input, 'base64')) with base64Decode(input). Code should then work in both browser and Node.js.
Here is a more feature-rich solution I just made after studying this question:
const parseJwt = (token) => {
try {
if (!token) {
throw new Error('parseJwt# Token is required.');
}
const base64Payload = token.split('.')[1];
let payload = new Uint8Array();
try {
payload = Buffer.from(base64Payload, 'base64');
} catch (err) {
throw new Error(`parseJwt# Malformed token: ${err}`);
}
return {
decodedToken: JSON.parse(payload),
};
} catch (err) {
console.log(`Bonus logging: ${err}`);
return {
error: 'Unable to decode token.',
};
}
};
Here's some usage samples:
const unhappy_path1 = parseJwt('sk4u7vgbis4ewku7gvtybrose4ui7gvtmalformedtoken');
console.log('unhappy_path1', unhappy_path1);
const unhappy_path2 = parseJwt('sk4u7vgbis4ewku7gvtybrose4ui7gvt.malformedtoken');
console.log('unhappy_path2', unhappy_path2);
const unhappy_path3 = parseJwt();
console.log('unhappy_path3', unhappy_path3);
const { error, decodedToken } = parseJwt('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c');
if (!decodedToken.exp) {
console.log('almost_happy_path: token has illegal claims (missing expires_at timestamp)', decodedToken);
// note: exp, iat, iss, jti, nbf, prv, sub
}
I wasn't able to make that runnable in StackOverflow code snippet tool, but here's approximately what you would see if you ran that code:
I made the parseJwt function always return an object (to some degree for static-typing reasons).
This allows you to utilize syntax such as:
const { decodedToken, error } = parseJwt(token);
Then you can test at run-time for specific types of errors and avoid any naming collision.
If anyone can think of any low effort, high value changes to this code, feel free to edit my answer for the benefit of next(person).
Based on answers here and here:
const dashRE = /-/g;
const lodashRE = /_/g;
module.exports = function jwtDecode(tokenStr) {
const base64Url = tokenStr.split('.')[1];
if (base64Url === undefined) return null;
const base64 = base64Url.replace(dashRE, '+').replace(lodashRE, '/');
const jsonStr = Buffer.from(base64, 'base64').toString();
return JSON.parse(jsonStr);
};
An es-module friendly simplified version of jwt-decode.js
function b64DecodeUnicode(str) {
return decodeURIComponent(
atob(str).replace(/(.)/g, function (m, p) {
var code = p.charCodeAt(0).toString(16).toUpperCase();
if (code.length < 2) {
code = "0" + code;
}
return "%" + code;
})
);
}
function base64_url_decode(str) {
var output = str.replace(/-/g, "+").replace(/_/g, "/");
switch (output.length % 4) {
case 0:
break;
case 2:
output += "==";
break;
case 3:
output += "=";
break;
default:
throw "Illegal base64url string!";
}
try {
return b64DecodeUnicode(output);
} catch (err) {
return atob(output);
}
}
export function jwtDecode(token, options) {
options = options || {};
var pos = options.header === true ? 0 : 1;
try {
return JSON.parse(base64_url_decode(token.split(".")[pos]));
} catch (e) {
console.log(e.message);
}
}