Load offline lang data in tesseract.js

Load offline lang data in tesseract.js - javascript

I am trying to load my own trained data to tesseract.js. As the file is placed locally, I tried to load everything offline. The code I used is shown below:
<script src="tesseract.js"></script>
<script>
//Set the worker, core and lang to local files
(function() {
var path = (function() { //absolute path
var pathArray = window.location.pathname.split( '/' );
pathArray.pop(); //Remove the last ("**.html")
return window.location.origin + pathArray.join("/");
})();
console.log(path);
window.Tesseract = Tesseract.create({
workerPath: path + '/worker.js',
//langPath: path + '/traineddata/',
corePath: path + '/index.js',
});
})();
</script>
<script>
function recognizeFile(file){
document.querySelector("#log").innerHTML = ''
Tesseract.recognize(file, {
lang: document.querySelector('#langsel').value
})
.progress(function(packet){
console.info(packet)
progressUpdate(packet)
})
.then(function(data){
console.log(data)
progressUpdate({ status: 'done', data: data })
})
}
</script>
The code above is working fine if the langPath is not set, but when I point the langPath to a local folder, Tesseract failed to load anything with the following error:
Failed loading language 'eng'
Tesseract couldn't load any languages!
...
AdaptedTemplates != NULL:Error:Assert failed:in file ../classify/adaptmatch.cpp, line 190
SCRIPT0: abort() at Error
at Na (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:36:24)
at ka (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:511:83)
at Module.de._abort (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:377:166)
at $L (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:55709)
at jpa (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:392:22274)
at lT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80568)
at mT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80698)
at BS (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:69009)
at bP (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:110094)
at jT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80280)
at RJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:19088)
at QJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:17789)
at zI (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:403:90852)
at tw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:49079)
at rw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:48155)
at lw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:39071)
at _v (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:22565)
at aw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:24925)
at cw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:27237)
at oj (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:24689)
at Og (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:10421)
at $.prototype.Recognize (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:558:379)
at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8814:9)
at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8786:9)
at xhr.onerror (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8429:9)
If this abort() is unexpected, build with -s ASSERTIONS=1 which can give more information.
index.js (8,1)
I have both eng.traineddata and eng.traineddata.gz in the /traineddata folder as apparently the ungzip process is skipped. Is there anything I neglected? Any help is appreciated.

I know this question is an old but recently I needed to use Tesseract.js in one of my projects. I needed to load Data Files locally so here is what I have done.
Instead of creating a new worker. I modified the default worker options available. So I didn't use Tesseract.createWorker and directly set the path and used recognize instead.
Tesseract.workerOptions.langPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/"; // location of data files
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.workerPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/worker.js"; // location of worker.js
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.corePath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/index.js"; // location of index.js
//example lang path would be protocol://domain.com/scripts/tesseract/dist/
By doing this, I left the worker and core paths untouched pointing to Default CDN.
PS: When using local worker.js and core.js paths I was getting uncaught error on postMessage() in worker.js. That's why I am using local path for langData only. I still don't know how to fix it or why it is happening. But, You can follow this issue here and here

I solved the problem by taking the corePath file from tesseract.js-core 0.1.0
window.Tesseract = Tesseract.create({
workerPath: window.location.origin + "/tesseract/worker.js", //tesseract.js-1.0.10
langPath: window.location.origin + "/tesseract/",
corePath: window.location.origin + "/tesseract/index.js", //tesseract.js-core-0.1.0
});
And language gz from https://github.com/naptha/tessdata/tree/gh-pages/3.02

Related

How do I fix my Blazor server app file download process?

I'm building Blazor server app .net5 , I'm using the following code for downloading files:
//After button click
else if(buttonName == "Download")
{
JSRuntime.InvokeVoidAsync("downloadFromUrl", new { Url = "api/files", FileName = "test.pdf" });
}
//this is the function for the download proccess
function downloadFromUrl(options) {
var _a;
var anchorElement = document.createElement('a');
anchorElement.href = options.url;
anchorElement.download = (_a = options.fileName) !== null && _a !== void 0 ? _a : '';
anchorElement.click();
anchorElement.remove();
}
//# sourceMappingURL=helper.js.map
The above half-works, I do start a download but the file I get downloaded is corrupt, the size of
the file is much smaller compared to the original file, no errors I can post here, I don't
understand what could be wrong, any ideas ?

I'm not sure using InvokeVoidAsync and a fake anchor is ideal. Here is another approach.
Create a middleware. Register it in your Startup.cs, above UseStaticFiles().
In the Invoke of the middleware, retrieve the rawUrl
string rawUrl = context.Request.Path.ToString() + context.Request.QueryString.ToString();
If in this rawUrl, you recognise an URL for a file download, then process it and return. Otherwise await _next(context);.
The process (where I wrote "process it") will be:
byte[] bytes = System.IO.File.ReadAllBytes("..."); // or anything else, e.g. the bytes come from a DB
context.Response.ContentType = "application/pdf"; // should be adapted to the file type
context.Response.Headers.Add("Content-Disposition", "attachment; filename=\"myFileName.pdf\"; size=" + bytes.Length.ToString());
context.Response.Body.WriteAsync(bytes);
In the HTML source, you don't have to create a button with a click handler. Just place an anchor, with an HREF recognized by the middleware.
Middleware info : https://learn.microsoft.com/en-us/dotnet/architecture/blazor-for-web-forms-developers/middleware, see Custom middleware

Flask, serving Static File and or folder inside Javascript

I m trying to insert the following static url for a static folder inside a javascript so it can properly load a saved file, but i m still facing error.
Here is what happens.
the normal file location is http://localhost/static/uploads/filename.ext but with the following javascript, it fetch the location based on the views' url_prefix='/media' hence the url it fetches is http://localhost/media/static/uploads/filename.ext
here is the following code:
<script>
$(function(){
$('#fileupload').fileupload({
url: 'upload',
dataType: 'json',
add: function (e, data) {
data.submit();
},
success:function(response,status) {
console.log(response.filename);
var filePath = 'static/uploads/' + response.filename;
$('#imgUpload').attr('src',filePath);
$('#filePath').val(filePath);
console.log('success');
},
error:function(error){
console.log(error);
}
});
})
I m trying to replace,
var filePath = 'static/uploads/' + response.filename;
with
var filePath = {{ url_for('static', filename='/uploads/') }} + response.filename;
but with no success. The original filename settings leads to the Blueprint url_prefix, which i wanted to bypass.
Edit
Here is my Views
#media.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'POST':
file = request.files['file']
extension = os.path.splitext(file.filename)[1]
f_name = str(uuid.uuid4()) + extension
file.save(os.path.join(app.config['UPLOAD_FOLDER'], f_name))
return json.dumps({'filename':f_name})

There are two paths to consider here and you need to pay close attention to which you're using where:
the absolute filepath on the server eg: /opt/myapp/media/upload/<filename>, and
the relative urlpath on the client eg: https://localhost/static/upload/<filename>
Your easiest solution may be to simply return the filename, without any directory preamples, then prepend the appropriate directory for the context in which you use it.
So in the python you can still return 'somefile.jpg' with:
return json.dumps({'filename': f_name})
And in the javascript you can reference '/static/uploads/somefile.jpg' with:
var filepath = '/static/uploads/' + response.filename;

Well, I have fixed the issue, I have stripped the url_prefix parameters so i can call from the root url path. to avoid the previous issue.

document generation only works the first time

I'm using openxml in my HTML5 mobile app to generate word documents on the mobile device.
In general openxml works fine and straight forward, but I'm struggling with an annyoing problem.
The document generation only works the first time after I've started the app. This time I can open and view the document. Restart the app means:
- Redeploy from development machine
- Removing the app from the task pane (pushing aside; I assume the app is removed then?)
The second time I get the message the document is corrupted and I'm unable to view the file
UPDATE:
I can't reproduce this behaviour when I'm running the app connected to the remote debugger without having a breakpoint set. Doing it this way I always get a working document.
I doesn't make a difference wether I do any changes on the document or not. Simply open and saving reproduce this error.
After doing some research I've found that structure of the docx.zip file of the working and the corrupt file is the same. They also have the same file length. But in the corrupt docx there are some files I've found some files having a wrong/invalid CRC. See here an example when trying to get a corrupt file out of the zip. Other files are working as expected.
The properties for this file are->
(CRC in a working version is: 44D3906C)
Code for processing the doc-template:
/*
* Process the template
*/
function processTemplate(doc64, callback)
{
"use strict";
console.log("PROCESS TEMPLATE");
var XAttribute = Ltxml.XAttribute;
var XCData = Ltxml.XCData;
var XComment = Ltxml.XComment;
var XContainer = Ltxml.XContainer;
var XDeclaration = Ltxml.XDeclaration;
var XDocument = Ltxml.XDocument;
var XElement = Ltxml.XElement;
var XName = Ltxml.XName;
var XNamespace = Ltxml.XNamespace;
var XNode = Ltxml.XNode;
var XObject = Ltxml.XObject;
var XProcessingInstruction = Ltxml.XProcessingInstruction;
var XText = Ltxml.XText;
var XEntity = Ltxml.XEntity;
var cast = Ltxml.cast;
var castInt = Ltxml.castInt;
var W = openXml.W;
var NN = openXml.NoNamespace;
var wNs = openXml.wNs;
var doc = new openXml.OpenXmlPackage(doc64);
// add a paragraph to the beginning of the document.
var body = doc.mainDocumentPart().getXDocument().root.element(W.body);
var tpl_row = ((doc.mainDocumentPart().getXDocument().descendants(W.tbl)).elementAt(1).descendants(W.tr)).elementAt(2);
var newrow = new XElement(tpl_row);
doc.mainDocumentPart().getXDocument().descendants(W.tbl).elementAt(1).add(newrow);
// callback(doc);
var mod_file = null;
var newfile;
var path;
if (doc != null && doc != undefined ) {
mod_file = doc.saveToBlob();
// Start writing document
path = "Templates";
newfile = "Templates/Bau.docx";
console.log("WRITE TEMPLATE DOCUMENT");
fs.root.getFile("Templates/" + "MyGenerated.docx", {create: true, exclusive: false},
function(fileEntry)
{
fileEntry.createWriter(
function(fileWriter)
{
fileWriter.onwriteend = function(e) {
console.log("TEMPLATE DOCUMENT WRITTEN:"+e.target.length);
};
fileWriter.onerror = function(e) {
console.log("ERROR writing DOCUMENT:" + e.code + ";" + e.message);
};
var blobreader = new FileReader();
blobreader.onloadend = function()
{
fileWriter.write(blobreader.result); // reader.result contains the contents of blob as a typed array
};
blobreader.readAsArrayBuffer(mod_file);
},
null);
}, null);
};
Any ideas what I'm doing wrong?

Thanks for posting about the error. There were some issues with jszip.js that I encountered when I was developing the Open XML SDK for JavaScript.
At the following link, there is a sample javascript app that demonstrates generating a document.
Open XML SDK for JavaScript Demo
In that app you can save multiple DOCXs, one after another, and they are not corrupted.
In order to work on this issue, I need to be able to re-produce locally. Maybe you can take that little working web app and replace parts with your parts until it is generating invalid files?
Cheers, Eric
P.S. I am traveling and have intermittent access to internet. If you can continue the thread on OpenXmlDeveloper.org, then it will help me to answer quicker. :-)

What made it work for me, was changing the way of adding images (Parts) to the document. I was using the type "binary" for adding images to document. I changed this to "base64"
So I changed the source from:
mydoc.addPart( "/word/"+reltarget, openXml.contentTypes.png, "binary", fotodata ); // add Image Part to doc
to:
mydoc.addPart( "/word/"+reltarget, openXml.contentTypes.png, "base64", window.btoa(fotodata) ); // add Image Part to doc

Phonegap - How to access file in www-folder?

I saw multiple solutions how to access a file in the www folder but no solution works for me. I test the application under iOS with the iOS-simulator.
I want to access the file test.txtin the www folder.
My current solution looks like this:
var filePathURI = getPhoneGapPath() + "test.txt";
window.resolveLocalFileSystemURI(filePathURI, onResolveSuccess, onFail);
function getPhoneGapPath() {
'use strict';
var path = window.location.pathname;
var phoneGapPath = path.substring(0, path.lastIndexOf('/') + 1);
return phoneGapPath;
};
This solution does not work for me. I get an error with errorCode = 2 which obviously means FileError.SECURITY_ERR. However I try, with resolveLocalFileSystemURI I can not access to the file.
INFO: I tried following filePathURI:
/Users/UserName/Library/Application%20Support/iPhone%20Simulator/7.0/Applications/GUID/AppName.app/www/test.txt
file:///Users/UserName/Library/Application%20Support/iPhone%20Simulator/7.0/Applications/GUID/AppName.app/www/test.txt
Can anyone give me a working solution?

I would suggest utilizing the resolveLocalFileSystemURL method provided by PhoneGap's file plugin. You can then use the cordova.file.applicationDirectory property to access where your www folder is located.
Make sure you install the plugin: $ cordova plugin add org.apache.cordova.file
Then you could use an object such as the following to parse the files and do whatever is needed:
var FileManager = {
/**
* Execute this.entryHandler against all files and directories in phonegap's www folder
*/
run: function () {
window.resolveLocalFileSystemURL(
cordova.file.applicationDirectory + 'www/',
this.directoryFoundHandler,
this.errorHandler
);
},
/**
* The directory has been successfully read. Now read the entries.
*
* #param {DirectoryEntry} directoryEntry
*/
directoryFoundHandler: function (directoryEntry) {
var directoryReader = directoryEntry.createReader();
directoryReader.readEntries(
this.entryHandler,
this.errorHandler
);
},
/**
* Files were successfully found. Parse them!
*
* #param {Array.<FileEntry>} entries
*/
entryHandler: function (entries) {
entries.forEach(function (entry) {
// Deal with your files here
if (entry.isDirectory) {
// It's a directory might need to loop through again
} else {
// It's a file, do something
}
});
},
/**
* #param {FileError} error
*/
errorHandler: function (error) {
console.log("ERROR", error);
}
};

I load my language files with ajax like this...
$.get( "test.txt", function( data ) {
console.log( "Load was performed.", data );
});
I think for your solution you have to add read access to your app --> config.xml
<feature name="http://api.phonegap.com/1.0/file" />

Try this, part of my functions. You first need to get the file system and then get the root path. Modify it to fit your needs.
The you just can do the following.
app_FileSystem for me is a global variable that gets assigned by GetAppFS
After getting the FS and the root path you can just simple use a ajax call or a getjson call with the appropriate dataType set. It works for me.
Also check the doc which is helpful:
http://docs.phonegap.com/en/3.3.0/cordova_file_file.md.html#LocalFileSystem
app_FileSystem.root.fullPath; // Get the app file system root full path
function GetAppFS ()
{
var self = this;
self.state = ""; // store the state of the process for debuggin purposes
self.fileSystem = {};
window.requestFileSystem ( LocalFileSystem.PERSISTENT, 0, getFileSystemSuccess, dispatchFailure );
/**
*
* Called when we receive a valid file system. Once we do that, we need to ask for all
* the documents within the file system.
*
*/
function getFileSystemSuccess ( fileSystem )
{
self.state = "Received File System";
self.fileSystem = fileSystem;
app_FileSystem = fileSystem;
OnFSReady ();
};
/**
*
* All our functions need a failure callback, so we provide dispatchFailure. If an error occurs, we'll
* at least log it to the console, and then call the failure function attached to self.failure(), if any.
*
*/
function dispatchFailure ( e )
{
// some sort of failure :-(
console.log ("While " + self.state + ", encountered error: " + JSON.stringify(e));
alert ("dev FS ERROR ");
};
};

As I just ran into the same problem but did not want to use jQuery, I thought I post my solution here as well.
But before that an import remark: The files in the www folder of Cordova / Phone Gap are stored in the Android world as so called assets which means:
They are part of the .apk distribution file which is a zipped archive. Android reads the files directly from this .apk file and does not store these files separately in the local file system.
Therefore the files are read only and cannot be accessed with the Cordova File plugin.
If you take a deep dive in the corresponding Android sources of Cordova you can see, that Cordova filters all URIs with a 'file' scheme, whose path starts with '/android_asset/' and handles them specially using Android's asset access functions. (Would be interesting to hear from the iOS experts how Cordova handles it in their world.)
This means all in all that using a XMLHttpRequest is probably the only portable way to access www folder files if you need access to the file contents. (If you only need the path to the file for some system functions other methods may work as well.)
Here is the code, filename is the path within the www folder without a "www/" prefix:
var readFileInWWWFolder = function(filename, onSuccess, onFailure){
var request = new XMLHttpRequest();
request.onload = function() {
var arrayBuffer = request.response;
if (arrayBuffer) {
onSuccess(new Uint8Array(arrayBuffer));
}
else {
onFailure();
}
};
request.open("GET", filename, true);
request.responseType = "arraybuffer";
request.send();
};
This has been tested with Cordova 4.3.0 and Android 4.4.2 (Kitkat).

One trick that works is to fs.download each file from the www folder into Cordova’s persistent file system. See my original post.
First, in Terminal:
npm install cordova-promise-fs
cordova plugin add cordova-plugin-file --save
cordova plugin add cordova-plugin-file-transfer --save
Then, in your front-end:
import CordovaPromiseFS from 'cordova-promise-fs'
const fs = CordovaPromiseFS({
persistent: true,
storageSize: 200 * 1024 * 1024,
concurrency: 3
})
If you use React, the above has to be declared before the component Class is created, while the below code should be in its own function inside the component Class. See my GitHub comment for more details.
window.resolveLocalFileSystemURL(
cordova.file.applicationDirectory + 'www/epubs/alice.epub',
// If successful...
(fileSystem) => {
const downloadUrl = fileSystem.toURL()
const localUrl = 'alice.epub' // the filename it is stored as in the device
fs.download(
downloadUrl,
localUrl,
(progressEvent) => {
if (progressEvent.loaded && progressEvent.total) {
console.log('progress', Math.round((progressEvent.loaded / progressEvent.total) * 100))
}
}
).then((filedata) => {
return fs.toInternalURL(localUrl)
})
.then((localPath) => {
this.setState({ epubPath: localPath })
}).catch((error) => {
console.log('some error happend', error)
})
},
// If unsuccessful
(err) => {
console.log(err)
}
)

Creating File using extension firefox

I am trying to create a file in my extension directory and I have this code:
AddonManager.getAddonByID(" extension id here ", function(addon)
{
var uri = addon.getResourceURI("hello.txt");
var file = Components.classes["#mozilla.org/file/local;1"]
.createInstance(Components.interfaces.nsILocalFile);
var stringUri = uri.asciiSpec;
stringUri = stringUri.replace(new RegExp(/\//g), '\\');
stringUri = stringUri.slice(8);
alert(stringUri);
try{
file.initWithPath(stringUri);
} catch(e) {
alert(e);
}
alert(addon.hasResource("hello.txt"));
});
For some reason, the last alert shows always false and file doesn't exist. What am I doing wrong?
I also put unpack true unpack tags in the install.rdf to see my extension directory.

initWithPath accepts only local filesystem paths. Assuming uri is a file url, you can do the conversion like this
var path = uri.QueryInterface(Components.interfaces.nsIFileURL).file.path

Develop Reference

JavaScript is the programming language of the Web.

Load offline lang data in tesseract.js - javascript

Related

How do I fix my Blazor server app file download process?

Flask, serving Static File and or folder inside Javascript

document generation only works the first time

Phonegap - How to access file in www-folder?

Creating File using extension firefox

Categories

Resources