fix(ocr): ocr, switch tessdata_best with fast

This commit is contained in:
Matyáš Caras 2023-10-05 17:47:20 +02:00
parent 5761f686dd
commit d5e3d8b76c
Signed by untrusted user who does not match committer: hernik
GPG key ID: 2A3175F98820C5C6
7 changed files with 129 additions and 58 deletions

View file

@ -20,4 +20,4 @@ Expense manager
along with this program. If not, see <https://www.gnu.org/licenses/>.
```
The base app includes the [tessdata_best](https://github.com/tesseract-ocr/tessdata_best) English trained data, ©️ [tessdata_best / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_best/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_best/blob/main/LICENSE)
The base app includes the [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast) English trained data, ©️ [tessdata_fast / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_fast/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_fast/blob/main/LICENSE)

View file

@ -1,4 +1,6 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<uses-permission android:name="android.permission.INTERNET" />
<application
android:label="Prašule"
android:name="${applicationName}"

Binary file not shown.

View file

@ -12,7 +12,7 @@ class TessdataApi {
);
static Future<List<String>> getAvailableData() async {
var res = await _client.get(
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_best/contents",
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_fast/contents",
options: Options(headers: {"Accept": "application/json"}));
if ((res.statusCode ?? 500) > 399) {
return Future.error("The server returned status code ${res.statusCode}");
@ -28,25 +28,36 @@ class TessdataApi {
static Future<void> deleteData(String name) async {
var dataDir = Directory(await FlutterTesseractOcr.getTessdataPath());
if (!dataDir.existsSync()) {
dataDir.createSync();
}
var dataFile = File("${dataDir.path}/$name.traineddata");
if (!dataFile.existsSync()) return;
dataFile.deleteSync();
}
static Future<List<String>> getDownloadedData() async =>
Directory(await FlutterTesseractOcr.getTessdataPath())
static Future<List<String>> getDownloadedData() async {
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
if (!tessDir.existsSync()) {
tessDir.createSync();
}
return tessDir
.listSync()
.where((element) => element.path.endsWith(".traineddata"))
.map<String>((e) => e.path.split("/").last)
.toList();
}
static Future<void> downloadData(String isoCode,
{void Function(int, int)? callback}) async {
var file = File(
"${(await FlutterTesseractOcr.getTessdataPath())}/$isoCode.traineddata");
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
if (!tessDir.existsSync()) {
tessDir.createSync();
}
var file = File("${tessDir.path}/$isoCode.traineddata");
if (file.existsSync()) return; // TODO: maybe ask to redownload?
var res = await _client.get(
"https://git.mnau.xyz/hernik/tessdata_best/raw/branch/main/$isoCode.traineddata",
"https://git.mnau.xyz/hernik/tessdata_fast/raw/branch/main/$isoCode.traineddata",
options: Options(responseType: ResponseType.bytes),
onReceiveProgress: callback);
if ((res.statusCode ?? 500) > 399) {

View file

@ -1,6 +1,7 @@
import 'package:flutter/material.dart';
import 'package:flutter_slidable/flutter_slidable.dart';
import 'package:flutter_speed_dial/flutter_speed_dial.dart';
import 'package:flutter_tesseract_ocr/flutter_tesseract_ocr.dart';
import 'package:grouped_list/grouped_list.dart';
import 'package:image_picker/image_picker.dart';
import 'package:intl/date_symbol_data_local.dart';
@ -14,6 +15,7 @@ import 'package:prasule/pw/platformbutton.dart';
import 'package:prasule/pw/platformdialog.dart';
import 'package:prasule/views/create_entry.dart';
import 'package:prasule/views/settings/settings.dart';
import 'package:prasule/views/settings/tessdata_list.dart';
import 'package:prasule/views/setup.dart';
class HomeView extends StatefulWidget {
@ -85,48 +87,8 @@ class _HomeViewState extends State<HomeView> {
SpeedDialChild(
child: const Icon(Icons.image),
label: "Add through saved image",
onTap: () async {
var availableLanguages = await TessdataApi.getDownloadedData();
if (mounted) {
var selectedLanguages =
List<bool>.filled(availableLanguages.length, false);
selectedLanguages[
availableLanguages.indexOf("eng.traineddata")] = true;
showDialog(
context: context,
builder: (c) => PlatformDialog(
title: "Select languages for OCR",
content: Column(
children: [
...List.generate(
availableLanguages.length,
(index) => Row(
children: [
Checkbox(
value: selectedLanguages[index],
onChanged: (value) {
if (value == null ||
(selectedLanguages
.where((element) => element)
.length <=
1 &&
!value)) return;
selectedLanguages[index] = value;
setState(() {}); // todo: builder
},
),
const SizedBox(
width: 10,
),
Text(availableLanguages[index].split(".").first)
],
),
)
],
),
),
);
}
onTap: () {
startOcr(ImageSource.gallery);
},
),
],
@ -286,6 +248,102 @@ class _HomeViewState extends State<HomeView> {
);
}
Future<void> startOcr(ImageSource imgSrc) async {
var availableLanguages = await TessdataApi.getDownloadedData();
if (availableLanguages.isEmpty) {
if (!mounted) return;
ScaffoldMessenger.of(context).showSnackBar(
SnackBar(
content:
const Text("You do not have any OCR language data downloaded"),
action: SnackBarAction(
label: "Download",
onPressed: () {
Navigator.of(context).push(
MaterialPageRoute(
builder: (c) => const TessdataListView(),
),
);
},
),
),
);
return;
}
if (!mounted) return;
var selectedLanguages = List<bool>.filled(availableLanguages.length, false);
if (selectedLanguages.length == 1) {
selectedLanguages[0] = true;
}
showDialog(
context: context,
builder: (c) => PlatformDialog(
actions: [
TextButton(
onPressed: () async {
final ImagePicker picker = ImagePicker();
final XFile? media = await picker.pickImage(source: imgSrc);
if (media == null) {
if (mounted) Navigator.of(context).pop();
return;
}
// get selected languages
var selected = availableLanguages
.where((element) =>
selectedLanguages[availableLanguages.indexOf(element)])
.join("+")
.replaceAll(".traineddata", "");
logger.i(selected);
var string = await FlutterTesseractOcr.extractText(media.path,
language: selected,
args: {
//"psm": "4",
"preserve_interword_spaces": "1",
});
logger.i(string);
if (mounted) Navigator.of(context).pop();
return;
},
child: const Text("Ok")),
TextButton(
onPressed: () {
Navigator.of(c).pop();
},
child: const Text("Cancel")),
],
title: "Select languages for OCR",
content: Column(
children: [
...List.generate(
availableLanguages.length,
(index) => Row(
children: [
Checkbox(
value: selectedLanguages[index],
onChanged: (value) {
if (value == null ||
(selectedLanguages
.where((element) => element)
.length <=
1 &&
!value)) return;
selectedLanguages[index] = value;
setState(() {}); // todo: builder
},
),
const SizedBox(
width: 10,
),
Text(availableLanguages[index].split(".").first)
],
),
)
],
),
),
);
}
Future<void> getLostData() async {
final ImagePicker picker = ImagePicker();
final LostDataResponse response = await picker.retrieveLostData();

View file

@ -133,6 +133,7 @@ class _TessdataListViewState extends State<TessdataListView> {
/// Used to find which `.traineddata` is already downloaded and which not
/// so we can show it to the user
void loadAllTessdata() async {
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
var d = await TessdataApi.getAvailableData();
var dataStatus = <Map<String, bool>>[];
for (var data in d) {
@ -140,8 +141,7 @@ class _TessdataListViewState extends State<TessdataListView> {
e[data] = false;
dataStatus.add(e);
}
var appDir =
Directory(await FlutterTesseractOcr.getTessdataPath()).listSync();
var appDir = tessDir.listSync();
for (var file in appDir) {
if (file is! File ||
!file.path.endsWith("traineddata") ||