Pdfjs cant extract language other than English properly - javascript

I'm trying to extract texts and images from some pdf files of bengali language with same structure using pdf.js. But the problem is pdf.js can't output bengali properly.
Most of the characters are shown as black square with question mark inside.
Also the signs used as vowels in bengali aren’t positioned properly.
For example [মোঃ রাজু মিয়া] outputs as [ �মাঃ রাজু িময়া]. As I'm extracting images and that part is working properly. It would be nice if i can also do the text part in pdf.js with custom font or something.
My sample code:
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.9.359/pdf.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.9.359/pdf.worker.min.js"></script>
<input type="file" id="file-pdf" accept=".pdf">
<script>
const onLoadFile = async event => {
try {
// turn array buffer into typed array
const typedArray = new Uint8Array(event.target.result);
const loadingPdfDocument = pdfjsLib.getDocument(typedArray);
const pdfDocumentInstance = await loadingPdfDocument.promise;
const totalNumPages = pdfDocumentInstance.numPages;
let pn = 1
let page = await pdfDocumentInstance.getPage(pn)
let textContent = await page.getTextContent()
var textItems = textContent.items;
console.log(textItems)
} catch (error) {
console.log(error);
}
};
document.getElementById('file-pdf').addEventListener('change', event => {
const file = event.target.files[0];
if (file.type !== 'application/pdf') {
alert(`File ${file.name} is not a PDF file type`);
return;
}
const fileReader = new FileReader();
fileReader.onload = onLoadFile;
fileReader.readAsArrayBuffer(file);
});
</script>

Related

PDFtron lastest versoin imported base 64 images are low quality

I have a project in laravel 8 with vue 2, i made an implementation of pdftron where i save the signatures user creates into a database as base64 images, so far all good, problem comes when i load those images from the database, although i can see them in the signature tool they are extremely low quality and i can no longer change color to the image, which doesn't happen in the demo sites, i think that when they get exported they become something that pdftron can no longer manipulate as a newly created signature, so i would like to know if someone else has this problem and a possible solution for it, i'll put the code i'm using and the images for you here
Above saved siganture, down new signature
let that = this;
const viewerElement = document.getElementById('webviewer');
WebViewer({
path: '/js/WebViewer/lib',
initialDoc: this.initialDoc,
extension: 'pdf',
}, viewerElement)
.then((instance) => {
const { documentViewer, annotationManager } = instance.Core;
const signatureTool = documentViewer.getTool('AnnotationCreateSignature');
documentViewer.addEventListener('documentLoaded', () => {
instance.UI.setLanguage(that.locale);
let signatures = JSON.parse(that.savedSignatures);
signatures = signatures.map(a => a.base64_signature);
signatureTool.importSignatures(["data:image/png;base64, " + signatures]); //base64 images array
document.getElementById('app').setAttribute('style', 'padding: 0');
document.getElementById('loader-container').classList.add('d-none');
document.getElementById('all-pages-content').removeAttribute('style')
document.getElementById('downloadButton').setAttribute('style', 'visibility: visible')
document.getElementById('pdf-ui').setAttribute('style', 'visibility: visible')
});
documentViewer.addEventListener('annotationsLoaded', async () => {
annotationManager.addEventListener('annotationDrawn', async (annotationList) => {
console.log('1')
annotationList.forEach(annotation => {
if (annotation.Subject === "Signature")
that.extractAnnotationSignature(annotation, documentViewer);
})
})
});
let saveSignedPdf = document.getElementById('downloadButton');
saveSignedPdf.addEventListener('click', async () => {
const doc = documentViewer.getDocument();
const xfdfString = await annotationManager.exportAnnotations();
const data = await doc.getFileData({
// saves the document with annotations in it
xfdfString
});
const arr = new Uint8Array(data);
const blob = new Blob([arr], {type: 'application/pdf'});
await that.processDocument(blob)
// Add code for handling Blob here
})
// instance.disableElements(['downloadButton', 'printButton']);
// instance.disableElements(['toolbarGroup-Insert']);
return instance
});
and here is the code i use to export the images taken from official docs
async extractAnnotationSignature(annotation, docViewer) {
let that = this;
// Create a new Canvas to draw the Annotation on
const canvas = document.createElement('canvas');
// Reference the annotation from the Document
const pageMatrix = docViewer.getDocument().getPageMatrix(annotation.PageNumber);
// Set the height & width of the canvas to match the annotation
canvas.height = annotation.Height;
canvas.width = annotation.Width;
const ctx = canvas.getContext('2d');
// Translate the Annotation to the top Top Left Corner of the Canvas ie (0, 0)
ctx.translate(-annotation.X, -annotation.Y);
// Draw the Annotation onto the Canvas
annotation.draw(ctx, pageMatrix);
// Convert the Canvas to a Blob Object for Upload
canvas.toBlob((blob) => {
let formData = new FormData();
formData.append('signature', blob);
formData.append('customer_id', that.customerId);
const config = {
headers: {
'content-type': 'multipart/form-data',
'X-CSRF-TOKEN': document.querySelector('meta[name="csrf-token"]').content,
}
}
axios.post(that.saveSignatureUrl, formData, config)
.then(function (response) {
if (!response.data.success) {
console.log("could not save signature for future use")
}
else {
console.log("saved signature for future use")
}
})
.catch(function (error) {
that.output = error;
console.log("could not reach backend")
});
});
}
I know you will see several flaws in this code, I'm a rookie so please bear with me, thank you for your time, and any help is appreciated
test base64 string generated with this code
iVBORw0KGgoAAAANSUhEUgAAAMgAAAB/CAYAAACql41TAAAS0UlEQVR4Xu1dC5BU1Zk+59yenlGUpz3dvAxgVkAeJUYcEzdoBaOLy3Qju8Nu0GgeCoJAYcwCZqbHm+kZCFJmV0yMsFsxtUnAMAnQ3UMsTCCmjEkQJSwPWZIViDow3S3ER9AZpu85+/UwYwSGmenu29333v5vVVe/7vnP/3//+e55/4czuggBQuCiCHDChhAgBC6OABGESgch8DEEAsGWCsnYC5zxWyMh70tEECoehEAnAnfocZ/LUL9VjNdFQ+U/SP1MBKHiQQh0IuAPtjSBHK9GQ95Hu0AhglDxIASAgL82vpIpNQ7NqtkfB4QIQsWj6BHwV5+oYkKsKtXKbmjUB5wighR9kSAAuhAIVJ8cqURyDzobcyN13l+cjwzVIFRWihqBVL+DKb4rUu8NdQcEEaSoi0dxG++viQUZVxWRkG/mxZAgghR3GSla6ytrWj6HZlWjkCXXhhuGvEkEKdqiQIafj8C8daqkpTm+VxmqPtrg29gTQlSDUPkpOgTQ71iHmfJkOOR9sDfjiSC9IUT/OwoBfzB2Nwxadp1Wfq2uc6wq6fkigvSGEP3vGARmVx//RFJoexWTs6Khob/ui2FEkL6gRPc4AoHUkC5X4nfh+vKGvhpEBOkrUnSfrRHw17Q8wjj/LJaS3JGOIUSQdNCie22JgL86Po0JFTU0NmWb7j2SjhFEkHTQontth0DVJqW17ovt5ZyvRu3xo3QNIIKkixjdbysEKmtiT3POFMixIBPFiSCZoEZpbIEAhnTnMcXmX+cqn9qXId3ujCKC2MLVpGS6CFQGT0zFiNUuwY2pW0PDXk03fdf9RJBMkaN0lkXgFv1Xrv7Ja3Zzrr4bDvn+KxtFiSDZoEdpLYkAFiJ+X3D+YV+WkvRmABGkN4Tof1shgKgkKxQXMyJ15TeboTgRxAwUSYYlEAjUxP9ZcfWES7o/s7lh0J/NUIoIYgaKJKPgCPiD8SlMyZekYJVNdb4dZilEBDELSZJTMAQq9eNXckPDfnLxeCTkWW+mIkQQM9EkWXlHoGr5qQFt7uQvUHtsidT7VpmtABHEbERJXl4RqAzGfon4h7sRCfGRXGRMBMkFqiQzLwig3/EzBF2IIVzPwlxlSATJFbIkN6cIBIKxZxRnJSBHaodgzi4iSM6gJcG5QgBrrFYzpq5BuJ7KXOXRJZcIkmuESb6pCARqY/OlZIuSLtdNz+lD3jNVeDfCiCC5Rpjkm4YAlq7fhqXrm4XB/37ryvK9pgnuQRARJB8oUx5ZI+DX459khnoJBHkgXOfdkrXAPgoggvQRKLqtsAgg4MILiKG7DTF01+RTEyJIPtGmvDJCAOR4EltmB6DmuCcjAVkkIoJkAR4lzT0CHbsCOVtQKt6uaNQnnMl9jufmQATJN+KUX58RmBmMfVow/iJeFVtDnox3BfY5QxrFygYqSptPBKp05T4j469KxR/vOlAzn/l35UU1SCFQpzx7RaCyNv59LtUH6JQv6vXmHN5ABMkhuCQ6MwRwsM0CxdSXovW+iswkmJeKCGIeliTJBAT81cevZ1x7GWFCPxUJlf/BBJFZiSCCZAUfJTYbAYxavQKZ6xHozdSNT5nqSQTJFDlKZzoCIMf3FOMudMrvN114hgKJIBkCR8nMRQChehZzLr54nea5MdMoiOZqdFYaESQXqJLMtBBIHaiJNVap6OsV23TfgbQS5/hmIkiOASbxPSNwhx73lRjq95LxZWhabbIaXkQQq3mkyPRBv+PnijHsKfc+akXTiSBW9EqR6IRts2tAjlEYsaqyqslEEKt6xuF64Ui0+zHXsZS1iRsjj3net6q5RBCresbBelVWx/6JC4ao6/xzVpgM7AlqIoiDC6IVTZulJ26RSblTCPX5rSaGCM2VrUSQXCFLci9AwK/HJmPb7E7GxQJEX2+0A0REEDt4yQE6ztZPjkgayZ2YDHw8XOdZZxeTiCB28ZSN9URw6Uu5IXZixCoSDflW2skUIoidvGVTXQPB+Dal1CHs7fi63UwggtjNYzbTtzIYj+AU5hOY65hvM9U71CWC2NFrNtEZ0UiaUMTezPSMciuYSQSxghccpoOuK7HHiDehcB014yDNQsJDBCkk+g7Mu+MIZmNClCn1eqH3k5sBLxHEDBRJRgcC89apkthb8SaQ43C43rfECbAQQZzgRQvYMGOtKi1JgBxcHYzU+ZZaQCVTVCCCmAJjcQu5U28pNwyxCUcw74nWeb/mJDSIIE7yZgFsqdTfHseSyUbB+WZ0yC25pyMbWIgg2aBX5GkDeuwzymCN6HM8hhNmn3AiHEQQJ3o1DzbNrG2pFBLk4Pw+zHP8KA9ZFiQLIkhBYLd3pv7q+L1MyKe4UlXh+qE/t7c1PWtPBHGyd3NgW6C25SGlxEOciapw6IpdOcjCUiKJIJZyh7WVqaxtqeeKz0TE9aqm+vI/WVtbc7QjgpiDo+OlVAZb/pMzPqZUK6tq1AeccrzBnQYSQYrF0xnaOUM/2d9ltG8EOU6hM/7FDMXYNhkRxLauy73i2Og0jie1jVjz/TzIsTz3OVovByKI9XxiCY3OhgPlGzHHsdKpcxx9AZoI0heUiuweRDu8G/s4nkHNMdcuwRVy5SIiSK6QzUKuf1niclkqx3DFRnMhPNiu6uFKlqPQXoEgz/2wt7sfCm8/JtlleC9FVhpjynX2XaQ+iyyy74e0l+KEp3fR7+juVNlWNLdGZyHfVkmJIAV0V2qRXzLJJiLSxzUo1BMwK30NmjRjodJlIMVRkOEI5hviirMElzKhOL8avz0vJTuN99Ngwl+TmtamktLgzJ1krg8M1W4YblUmMzGrXVN1grMpTMkHNUN7ozsZm1d6TmQi265piCB58Nwt+tGyy42ySYJrE6WUeOcTUAtMBPhlKPypcP8HUShfQ21wkAnX4YjuOZ4HtT7KYt48VdLijW9An4O7hWduo95tzZFPlSyTFxHEZFdU1rwzmonWyVyJSagVJqOZMhHNlatR+Pcz1UGG/ZhsOyBdyQNRfVi3T2mTVepR3Cz9xChliA2ouV5xyiYnM/EjgmSIpl9/Y5iU7vFCifEAcTxqgokoZJPRT3hfMQky8P2Ci33KkAciK8v343dUGta60Bm/CcTdoKR8Klo/dLW1tLOGNkSQi/jhFl25Bre/M7xdJEdxrkYxqfDOR6E2uApJxuOFdr46lHoprr3GpDyYNNT+51YNTVjDtT1rgXA8cxCOZyPuutfJq3Gz9YXjCFK1/NSADy5pH6IxMRgd2yFSqSFC8cvRAS5VzCjDAE+pErwUh9SX4aGO31gZ+gSlUrHLAYanY6SIKQ9+H4jPzSDAMTz7j2Fc6BhqiGNIc5QlxWuRld5YtuAXKn2gNoEFh3IZ4xLDuEN/VSg97JCvrQji/0bMq9xqNDfYKDRhhiOE/nCFdxTi4WgqDEfbfjie8B/iO9YK8ZMw7iS+pz6/h6XZbZLzNvQJWvHwb0Nhb1NMa+WCt2IItU0p9lfUEAmpiYSrzZ3Ysqr/STs4MF0dMQH4bQwdf1ZrP3PXllUj/phu+mK733IECVSfHCl5Ek0YDHcKPlpgLgCFdwwIgLF31YZCfARDoMcw5NnMJWvGE70ZQ6HNSU01y8Hlzc8t4W3F5sS+2JtqMvY3EhtQO5a4B5XPbfwax4OErt4QKBhBAtXNI5mmXYsa4Gwnlym8i3F4P43vh9DAP4ymzREYcJTx5NHSfpccaVwx+N3eDKL/L0QAtcZoYLoBNcfucF25I8Lx5MvPeSHI7fqbg91J9/XoB1yPJ9j1aPLgpdwwcg9eh/DbIRwgf0hLtv8vqn1HNm3y5dDz8/FXH5+GyfUf4/cn0Rl/rFB62DXfnBCkqmqT1jr25umY7Z0OYFKvsWgWvcKFtjv1LjX1yjbdm6od6MohAmfXVKkfYsDhLhw7sCGHWTlWtGkEmaX/ZaA0zvwrkPLDKdNRW7yMzzu4MnaEQ8NedCyCFjUsUNNSLTm7H8PPd0cbhv3GompaXq2sCRKojd2JWgHE4HPwehbBw36qCfeOrfqgdyxvvUMVRM2xDoMa47iRvDvcMPxNh5qZF7MyJkjnMb7/htriLSwifbb0jPaTxtXUic6L1y6SSecxZ8+gaXsiXOe9p5C6OCXvtAnS2a5dlppEQ82xJlrv2+kUMOxsh7829nms9XoGD6ynIyFfvZ1tsZLufSbIjMV/KnUPHPB0an5CcdmAGdhtVjKkmHVBQIUlmACtx3qwLyM27s+KGQuzbe8TQe4Mnvy0wZI4mZT/Gge/LzZbCZKXOQKp/gaaVNdimfyXw/qQ1zKXRCm7Q6BXgnRW3dsxT/GVaKj8BwSjNRCo1BPXYT3ZOiyZ2WPX8/+sgWTPWvRIkFn62zfIpPFLrHm6F52+LXYwqBh0DAQT92Gx4Xr4ZQH8Ypszx+3om4sS5I5H3rra5XLvwCRTkGoO67gWTarvoalboZiYHw1dsds6mjlTk4sSBI7YjI7fy+FQ+becabq9rJoVTHwKgyNPoam7v/SQZ35jI9Y005VzBLolSOdQ7hIMF96Qcw0og14RCNTGH8SQ+lqsal4YracmVa+AmXjDBQRBNL1LueE6LLn8UlOdb4eJeZGoNBFIhf9hpfIpzG+Mkbx9YVNoxP+kKYJuzxKBCwiCw99rUvsvMAH4lSxlU/IsEOgYPWQM5JCRSGjow1mIoqRZIHAOQap05W4z4s3MYNOxpXRfFnIpaRYI+GtiQaylWoKFhgsjDUMbsxBFSbNE4ByCoO+BJSRsUjFG8c4SR1OS/6MeG6MZ7DtYrWBwaSykhYamwJqVkHMIEgjGjhlSzmlqGJpaqk5XHhEA9vegaZsixyoEi16Vx6wpqx4Q+Igg6HugzyFmYynJTEIsfwhU6QfdbckhIAavwH77RRhWp70z+YO/15z+RpDa2IuI/bQGT69Ir6noBlMQ6DhiQPAnsff+BewVX4RNZpYLLmeKoTYW0kGQQLBlEiagwuh7jLGxLbZS3V+TCHIul0ouFkXrPKkAbnRZEIGzBKlt+ReMta8Oh3yjLKijo1SaWRP/O42r76CqaOXStSjcMIR2/FnYwx0ESe31KBnY/22cLnFTRKfh3Vz5C02qr6KfsRYPIz1S712Tq3xIrnkIfKyTHnsAIyiLtWRyGoXeMQ/glKRZ+tGBKnnpWgS7G6c0uSSq+35vbg4kLVcInDPMmzoHG3FsbxSamrtF98VzlWkxyQWmAYREXQugN4VDXuzhp8tOCFy4FisY+yaaAXMQrPn+qF5O4WKy8CYCW/wHZsQDSmmLo/WepixEUdICIdD9at7q+L1MqCexW40mrTJwzMzalumoif+9IzZYK38o8pjn/QzEUBILIHDR/SBdyx4w/HuJYHIFRrh2WUBfy6uAWmMliPFVxcVSGr61vLt6VbD3PenB2APYOJXaNPXdKZonqOuIqU7XBQhUBhM3cyYfR637R83FllIfzhmFpFeCpMyc/Y3EUEOTKzF2f7OS7NFog/eHzjDfHCsQdmcVgLwPD5KH0RH/b3OkkhQrINAngnQpmtqjgMUQ30Sz610QpSHaUNydeH9N/B8wNL4aHfF9mqYeplrDCkXaXB3SIkhX1me3gMqvY4HdH5Tk3y42ovhXY6ffabkaE34zpFQrmhp8PzHXLSTNKghkRJAu5TEzvBhDwg/h+14s1X4a+6Wft4phudKjM+ROPU6/evY9cXrFC/poHOlGl1MRyIogHzW9gvFFOPfvHsbFAJwF+GPVfmZD5Fsj/89JoPn1+DRuqFrJuIbzbYPFVms6yZfp2GIKQT5qegVbKtDsuAuHZd4Fwbs5F9uZENvtHBKzc6VzaqflNLwasOJ5fToA0732RsBUgnwcCuyrno3If7eh6XU7+io4UZZtx4Gc24VRtsvqJ8h2BOoeNPA2JY0vYE4DwRP4E5E6TwPt17B3Yc9E+5wR5ByyBONTkNHtkslbMRQ6Ff+dQMDll9HB3S3w7h3h3bN+Pm/PxIBs01Tp8ctamZzIDT4ZRJiEUamUfhWpQN14/6kv5lm3fn1hdMvWNkqfPQJ5Icj5auK888lKqKlC8KmoYVLB6aagYB7Elt/9qGn249zyA4aLHzDrHMMUCdrOqDFYXzZGCDFaMXkVmoJjMTx7NQKylYO0B/G+D92LfYKpV91aYlejPuFM9vCSBLsjUBCCnA9a1Salte9NTMLhnhPxX2p340TMSuMzH4KZ6Y4TcLE19RDI9LrG+Os8WXrsbDNN8arlf+n/YUmynLuYlxtyBJpBw5FmBNJeic9X4vMnILMf7j2KeLZHUHMdQTidoyDjYRDlMOJ/4TNdhED3CFiCIBdzzq3LXx9wiavfeKGJ8bhnrFTqk5iovApP/pH4Pggvgdd7eKWW5sdAApx6xZox9PwWlpi/ge2sf+aG8QZifOE/ugiB9BGwNEF6MidV6zTOoQDO6bucUqSDwP8DoZsT2sPJFDkAAAAASUVORK5CYII=
i also tried exportSIgnatures and i got the same base64 string for already saved signature that look blurry but for new ones i got this, which i don't quite understand what it is and how to use it for reconstructing the images
[[[{"x":181.15942028985506,"y":1.4492753623188406},{"x":178.2608695652174,"y":1.4492753623188406},{"x":168.1159420289855,"y":7.246376811594203},{"x":150.7246376811594,"y":23.18840579710145},{"x":128.9855072463768,"y":40.57971014492754},{"x":85.5072463768116,"y":66.66666666666667},{"x":56.52173913043478,"y":82.6086956521739},{"x":17.391304347826086,"y":98.55072463768116},{"x":2.898550724637681,"y":104.34782608695652},{"x":1.4492753623188406,"y":104.34782608695652},{"x":1.4492753623188406,"y":102.89855072463769},{"x":4.3478260869565215,"y":89.85507246376811},{"x":17.391304347826086,"y":65.21739130434783},{"x":24.63768115942029,"y":57.971014492753625},{"x":44.927536231884055,"y":43.47826086956522},{"x":50.72463768115942,"y":43.47826086956522},{"x":59.42028985507246,"y":43.47826086956522},{"x":73.91304347826087,"y":50.72463768115942},{"x":79.71014492753623,"y":57.971014492753625},{"x":95.65217391304348,"y":73.91304347826087},{"x":123.18840579710145,"y":97.10144927536231},{"x":149.2753623188406,"y":115.94202898550725},{"x":176.81159420289856,"y":128.9855072463768},{"x":182.6086956521739,"y":130.43478260869566},{"x":194.20289855072463,"y":130.43478260869566},{"x":198.55072463768116,"y":130.43478260869566}]]]
Hello David Gabriel Lopez Duarte,
We were able to reproduce this issue, it is likely due to our canvas isnt being adjusted to match the zoom level. This has been reported before and is in our backlog!

How to read the zip file contents in react application using jszip preferably

I have a requirement to read the zip file contents , This is how the zip folder will look like Manifest.zip , The files in the Manifest.zip are outline.png, publish.png, manifest.json. I need to read the manifest.json(key value pairs) on the fly in a react application.
Basically requirement is when user clicks on "maifest" link a pop up needs to shown which show manifest.json contents in the UI react components.
I tried using JSZip library, and the code is as below
var zip = new JSZip();
zip.loadAsync("Manifest.zip")
.then(function (zip) {
console.log(zip.files);
// Expected outline.png, publish.png, manifest.json
});
I get the error as shown below?
Don't know what the error is about, have you tried another zip file?
Anyway, here is my tsx code with reactjs hooks:
const [files, setFiles] = useState<File[]>(undefined);
const [fileInfo, setFileInfo] = useState<IFileInfo>(undefined);
useEffect(() => {
if (files) {
const f = files[0];
const dateBefore = new Date();
JSZip.loadAsync(f) // 1) read the Blob
.then((zip) => {
const contents = [];
zip.forEach((relativePath, zipEntry) => { // 2) print entries
contents.push(zipEntry.name);
});
const loadTime = moment(new Date()).diff(moment(dateBefore));
setFileInfo({
loadTime,
contents: contents.sort(),
error: null
});
}, (e) => {
const loadTime = moment(new Date()).diff(moment(dateBefore));
setFileInfo({
loadTime,
contents: [],
error: "Error reading " + f.name + ": " + e.message
});
});
}
}, [files]);
// below some code that adds a file to files with drag and drop interface
Take a look at this library. Seems stable and easy to use.
var AdmZip = require('adm-zip');
// reading archives
var zip = new AdmZip("./my_file.zip");
var zipEntries = zip.getEntries(); // an array of ZipEntry records
zipEntries.forEach(function(zipEntry) {
console.log(zipEntry.toString()); // outputs zip entries information
});

Transfer Learning Tensorflow.js size/shape error

I am trying to apply transfer learning by using a knnClassifier and the mobileNet image recognition model in Tensorflow.js I am, however, receiving the following error:
Size(28672) must match the product of shape 28,3072
I don't know how to tackle this issue, I've tried creating tensor3D, resizing using bilinear and nearest neighbor but to no avail. I was wondering if someone here could check this out.
Note that my idea here is to train images from certain folders and assign them to their class using the add example of the knnClassifier. I have a function that reads the image from a path, and an async function that trains the model and makes a prediction from an image.
................................................................................................
const tf = require('#tensorflow/tfjs');
//MobileNet : pre-trained model for TensorFlow.js
const mobilenet = require('#tensorflow-models/mobilenet');
//The module provides native TensorFlow execution
//in backend JavaScript applications under the Node.js runtime.
const tfnode = require('#tensorflow/tfjs-node');
const knnClassifier = require('./node_modules/#tensorflow-models/knn-classifier/dist/knn-classifier');
var glob = require('glob')
//The fs module provides an API for interacting with the file system.
const fs = require('fs');
const readImage = path => {
//reads the entire contents of a file.
//readFileSync() is synchronous and blocks execution until finished.
const imageBuffer = fs.readFileSync(path);
//Given the encoded bytes of an image,
//it returns a 3D or 4D tensor of the decoded image. Supports BMP, GIF, JPEG and PNG formats.
var tfimage = tfnode.node.decodeImage(imageBuffer);
// const t3d = tf.tensor3d(Array.from(tfimage.dataSync()),[tfimage.shape[0], tfimage.shape[1], 1])
const smalImg = tf.image.resizeNearestNeighbor(tfimage, [32, 32]);
const resized = tf.cast(smalImg, 'float32');
// t3d.reshape([32,32,3])
// var smalImg = tf.image.resizeBilinear(tfimage, [368, 432]);
// const resized = tf.cast(smalImg, 'float32');
return resized;
}
var mainDirectory = "./img_samples/";
const imageClassification = async path => {
const classifier = await knnClassifier.create();
const image = await readImage(path);
// Load the model.
const model = await mobilenet.load();
// Classify the image.
const predictions = await model.classify(image);
// print results on terminal
console.log('Classification Results:', predictions);
var folders = fs.readdirSync(mainDirectory);
var filesPerClass = [];
for(var i=0;i<folders.length;i++){
files = fs.readdirSync(mainDirectory+folders[i]);
var files_complete = [];
for(var j=0;j<files.length;j++){
files_complete.push(mainDirectory+folders[i]+"/"+files[j]);
}
filesPerClass.push(files_complete);
}
for(var i=0;i<filesPerClass.length;i++){
for(var j=0;j<filesPerClass[i].length;j++){
imageSample = readImage(filesPerClass[i][j]);
console.log(imageSample);
activation = await model.infer(imageSample, 'conv_preds'); //main directory
classifier.addExample(activation,i);
}
}
console.log(readImage('./hospitalTest.jpg'))
const predictionsTest = await classifier.predictClass(readImage('./hospitalTest.jpg'));
console.log('classficationTest:',predictionsTest);
}
if (process.argv.length !== 3) throw new Error('Incorrect arguments: node classify.js <IMAGE_FILE>');
imageClassification(process.argv[2]);
Since the knn classifier is trained using an output from a node of mobilenet, the prediction needs to be done likewise
outputMobilenet = await model.infer(readImage('./hospitalTest.jpg'), 'conv_preds')
predicted = await classifier.predictClass(outputMobilenet)

Javascript: Compare two selected files by file path

I'm looking for a way to check if two files/documents (PDF, JPG, PNG) are the same.
If a user selects one or more files, I convert the File Object to a javascript object. I'm keeping the size, type, filename and I create a blob so I can store the object in my redux store.
When a user selects another file I want to compare this file with the files that already has been added (so I can set the same blobURL).
I can check if two files has the same name, type and size but there is a change that all these properties match and the files aren't the same so I would like to check the file path. Unfortunately, that property isn't provided in the File Object. Is there a way to get this or another solution to make sure both files are (not) the same?
No there is no way to get the real path, but that doesn't matter.
All you have access to is a FakePath, in the form C:\fakepath\yourfilename.ext (from input.value), and sometimes a bit more if you gained access to a directory.
But anyway you probably don't want to check that two files came from the same place on the hard-disk, this has no importance whatsoever, since they could very well have been modified since first access.
What you can and probably want to do however, is to check if their content
are the same.
For this, you can compare their byte content:
inp1.onchange = inp2.onchange = e => {
const file1 = inp1.files[0];
const file2 = inp2.files[0];
if(!file1 || !file2) return;
compare(file1, file2)
.then(res => console.log('are same ? ', res));
};
function compare(file1, file2) {
// they don't have the same size, they are different
if(file1.size !== file2.size)
return Promise.resolve(false);
// load both as ArrayBuffers
return Promise.all([
readAsArrayBuffer(file1),
readAsArrayBuffer(file2)
]).then(([buf1, buf2]) => {
// create views over our ArrayBuffers
const arr1 = new Uint8Array(buf1);
const arr2 = new Uint8Array(buf2);
return !arr1.some((val, i) =>
arr2[i] !== val // search for diffs
);
});
}
function readAsArrayBuffer(file) {
// we could also have used a FileReader,
// but Response is conveniently already Promise based
return new Response(file).arrayBuffer();
}
<input type="file" id="inp1">
<input type="file" id="inp2">
Now, you say that you don't have access to the original Files anymore, and that you can only store serializable data. In this case, one less performant solution is to generate a hash from your Files.
This can be done on front-end, thanks to the SubtleCrypto API,
but this operation being quite slow for big files, you may want to do it systematically from server instead of doing it on front, and to only do it on front when the sizes are the same:
// a fake storage object like OP has
const store = [
{ /* an utf-8 text file whose content is `hello world`*/
name: "helloworld.txt",
size: 11,
hash: "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" // generated from server
}
];
// the smae file as the one we fakely stored
const sameFile = new File(['hello world'], 'same-file.txt');
// a file the same size as the one we stored (needs deep check)
const sameSizeButDifferentContent = new File(['random text'], 'differentcontent.txt');
inp.onchange = e => tryToStore(inp.files[0]);
tryToStore(sameFile); // false
tryToStore(sameSizeButDifferentContent);
// hash: "a4e082f56a58e0855a6abbf2f4ebd08895ff85ea80e634e02b210def84b557dd"
function tryToStore(file) {
checkShouldStore(file)
.then(result => {
console.log('should store', file.name, result)
if(result)
store.push(result);
// this is just for demo, in your case you would do it on the server
if(!result.hash)
generateHash(file).then(h => result.hash = h);
});
}
async function checkShouldStore(file) {
const {name, size} = file;
const toStore = {name, size, file}; // create a wrapper object
// first check against the sizes (fast checking)
const sameSizes = store.filter(obj => obj.size === file.size);
// only if some files have the same size
if(sameSizes.length) {
// then we generate a hash directly
const hash = await generateHash(file);
if(sameSizes.some(obj => obj.hash === hash)) {
return false; // is already in our store
}
toStore.hash = hash; // save the hash so we don't have to generate it on server
}
return toStore;
}
async function generateHash(file) {
// read as ArrayBuffer
const buf = await new Response(file).arrayBuffer();
// generate SHA-256 hash using crypto API
const hash_buf = await crypto.subtle.digest("SHA-256", buf);
// convert to Hex
const hash_arr = [...new Uint8Array(hash_buf)]
.map(v => v.toString(16).padStart(2, "0"));
return hash_arr.join('');
}
<input type="file" id="inp">

How to read a file with JavaScript to WebAssembly?

How can I pass a File to be read within the WebAssembly memory context?
Reading a file in the browser with JavaScript is easy:
<input class="file-selector" type="file" id="files" name="files[]" />
I was able to bootstrap WebAssembly code written in Rust with the crate stdweb, add an event listener to the DOM element and fire up a FileReader:
let reader = FileReader::new();
let file_input_element: InputElement = document().query_selector(".file-selector").unwrap().unwrap().try_into().unwrap();
file_input_element.add_event_listener(enclose!( (reader, file_input_element) move |event: InputEvent| {
// mystery part
}));
In JavaScript, I would get the file from the element and pass it to the reader, however, the API of stdweb needs the following signature:
pub fn read_as_array_buffer<T: IBlob>(&self, blob: &T) -> Result<(), TODO>
I have no idea how to implement IBlob and I am sure that I am missing something obvious either with the stdweb API or in my understanding of WebAssembly/Rust. I was hoping that there is something less verbose than converting stuff to UTF-8.
It works when the FileReader itself is passed from JavaScript to WebAssembly. It also seems like a clean approach because the data has to be read by the JavaScript API anyway - no need to call JS from WASM.
index.html
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Read to wasm</title>
</head>
<body>
<input type="file" id="file-input"/>
<script src="reader.js"></script>
<script>
var fileReader = new FileReader();
fileReader.onloadend = e => Rust.reader
.then(reader=> {
window.alert(reader.print_result(fileReader));
});
var fileInputElement = document.getElementById("file-input");
fileInputElement.addEventListener("change", e => fileReader.readAsText(fileInputElement.files[0]));
</script>
</body>
</html>
main.rs
#![feature(proc_macro)]
#[macro_use]
extern crate stdweb;
use stdweb::js_export;
use stdweb::web::FileReader;
use stdweb::web::FileReaderResult;
#[js_export]
fn print_result(file_reader: FileReader) -> String {
match file_reader.result() {
Some(value) => match value {
FileReaderResult::String(value) => value,
_ => String::from("not a text"),
}
None => String::from("empty")
}
}
fn main() {
stdweb::initialize();
stdweb::event_loop();
}
The following code is what I use to interact with another javascript library to read a sql file all without using javascript. This is based on the wasm-bindgen library, and I believe may be helpful to newer folks stumbling onto this answer.
[wasm_bindgen]
pub fn load_accounts_from_file_with_balances(file_input : web_sys::HtmlInputElement) {
//Check the file list from the input
let filelist = file_input.files().expect("Failed to get filelist from File Input!");
//Do not allow blank inputs
if filelist.length() < 1 {
alert("Please select at least one file.");
return;
}
if filelist.get(0) == None {
alert("Please select a valid file");
return;
}
let file = filelist.get(0).expect("Failed to get File from filelist!");
let file_reader : web_sys::FileReader = match web_sys::FileReader::new() {
Ok(f) => f,
Err(e) => {
alert("There was an error creating a file reader");
log(&JsValue::as_string(&e).expect("error converting jsvalue to string."));
web_sys::FileReader::new().expect("")
}
};
let fr_c = file_reader.clone();
// create onLoadEnd callback
let onloadend_cb = Closure::wrap(Box::new(move |_e: web_sys::ProgressEvent| {
let array = js_sys::Uint8Array::new(&fr_c.result().unwrap());
let len = array.byte_length() as usize;
log(&format!("Blob received {}bytes: {:?}", len, array.to_vec()));
// here you can for example use the received image/png data
let db : Database = Database::new(array);
//Prepare a statement
let stmt : Statement = db.prepare(&sql_helper_utility::sql_load_accounts_with_balances());
stmt.getAsObject();
// Bind new values
stmt.bind();
while stmt.step() { //
let row = stmt.getAsObject();
log(&("Here is a row: ".to_owned() + &stringify(row).to_owned()));
}
}) as Box<dyn Fn(web_sys::ProgressEvent)>);
file_reader.set_onloadend(Some(onloadend_cb.as_ref().unchecked_ref()));
file_reader.read_as_array_buffer(&file).expect("blob not readable");
onloadend_cb.forget();
}
I managed to access the file object and pass it to the FileReaderin the following way:
let reader = FileReader::new();
let file_input_element: InputElement = document()
.query_selector(".file-selector")
.unwrap()
.unwrap()
.try_into()
.unwrap();
file_input_element.add_event_listener(
enclose!( (reader, file_input_element) move |event: InputEvent| {
let file = js!{return #{&file_input_element}.files[0]};
let real_file: stdweb::web::Blob = file.try_into().unwrap();
reader.read_as_text(&real_file);
}
This code compiles. However, the data never gets available via reader.result().

Categories

Resources