fix(ocr): ocr, switch tessdata_best with fast
This commit is contained in:
parent
5761f686dd
commit
d5e3d8b76c
7 changed files with 129 additions and 58 deletions
|
@ -20,4 +20,4 @@ Expense manager
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
```
|
```
|
||||||
|
|
||||||
The base app includes the [tessdata_best](https://github.com/tesseract-ocr/tessdata_best) English trained data, ©️ [tessdata_best / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_best/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_best/blob/main/LICENSE)
|
The base app includes the [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast) English trained data, ©️ [tessdata_fast / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_fast/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_fast/blob/main/LICENSE)
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
<uses-permission android:name="android.permission.INTERNET" />
|
||||||
|
|
||||||
<application
|
<application
|
||||||
android:label="Prašule"
|
android:label="Prašule"
|
||||||
android:name="${applicationName}"
|
android:name="${applicationName}"
|
||||||
|
|
Binary file not shown.
|
@ -12,7 +12,7 @@ class TessdataApi {
|
||||||
);
|
);
|
||||||
static Future<List<String>> getAvailableData() async {
|
static Future<List<String>> getAvailableData() async {
|
||||||
var res = await _client.get(
|
var res = await _client.get(
|
||||||
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_best/contents",
|
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_fast/contents",
|
||||||
options: Options(headers: {"Accept": "application/json"}));
|
options: Options(headers: {"Accept": "application/json"}));
|
||||||
if ((res.statusCode ?? 500) > 399) {
|
if ((res.statusCode ?? 500) > 399) {
|
||||||
return Future.error("The server returned status code ${res.statusCode}");
|
return Future.error("The server returned status code ${res.statusCode}");
|
||||||
|
@ -28,25 +28,36 @@ class TessdataApi {
|
||||||
|
|
||||||
static Future<void> deleteData(String name) async {
|
static Future<void> deleteData(String name) async {
|
||||||
var dataDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
var dataDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||||
|
if (!dataDir.existsSync()) {
|
||||||
|
dataDir.createSync();
|
||||||
|
}
|
||||||
var dataFile = File("${dataDir.path}/$name.traineddata");
|
var dataFile = File("${dataDir.path}/$name.traineddata");
|
||||||
if (!dataFile.existsSync()) return;
|
if (!dataFile.existsSync()) return;
|
||||||
dataFile.deleteSync();
|
dataFile.deleteSync();
|
||||||
}
|
}
|
||||||
|
|
||||||
static Future<List<String>> getDownloadedData() async =>
|
static Future<List<String>> getDownloadedData() async {
|
||||||
Directory(await FlutterTesseractOcr.getTessdataPath())
|
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||||
|
if (!tessDir.existsSync()) {
|
||||||
|
tessDir.createSync();
|
||||||
|
}
|
||||||
|
return tessDir
|
||||||
.listSync()
|
.listSync()
|
||||||
.where((element) => element.path.endsWith(".traineddata"))
|
.where((element) => element.path.endsWith(".traineddata"))
|
||||||
.map<String>((e) => e.path.split("/").last)
|
.map<String>((e) => e.path.split("/").last)
|
||||||
.toList();
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
static Future<void> downloadData(String isoCode,
|
static Future<void> downloadData(String isoCode,
|
||||||
{void Function(int, int)? callback}) async {
|
{void Function(int, int)? callback}) async {
|
||||||
var file = File(
|
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||||
"${(await FlutterTesseractOcr.getTessdataPath())}/$isoCode.traineddata");
|
if (!tessDir.existsSync()) {
|
||||||
|
tessDir.createSync();
|
||||||
|
}
|
||||||
|
var file = File("${tessDir.path}/$isoCode.traineddata");
|
||||||
if (file.existsSync()) return; // TODO: maybe ask to redownload?
|
if (file.existsSync()) return; // TODO: maybe ask to redownload?
|
||||||
var res = await _client.get(
|
var res = await _client.get(
|
||||||
"https://git.mnau.xyz/hernik/tessdata_best/raw/branch/main/$isoCode.traineddata",
|
"https://git.mnau.xyz/hernik/tessdata_fast/raw/branch/main/$isoCode.traineddata",
|
||||||
options: Options(responseType: ResponseType.bytes),
|
options: Options(responseType: ResponseType.bytes),
|
||||||
onReceiveProgress: callback);
|
onReceiveProgress: callback);
|
||||||
if ((res.statusCode ?? 500) > 399) {
|
if ((res.statusCode ?? 500) > 399) {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import 'package:flutter/material.dart';
|
import 'package:flutter/material.dart';
|
||||||
import 'package:flutter_slidable/flutter_slidable.dart';
|
import 'package:flutter_slidable/flutter_slidable.dart';
|
||||||
import 'package:flutter_speed_dial/flutter_speed_dial.dart';
|
import 'package:flutter_speed_dial/flutter_speed_dial.dart';
|
||||||
|
import 'package:flutter_tesseract_ocr/flutter_tesseract_ocr.dart';
|
||||||
import 'package:grouped_list/grouped_list.dart';
|
import 'package:grouped_list/grouped_list.dart';
|
||||||
import 'package:image_picker/image_picker.dart';
|
import 'package:image_picker/image_picker.dart';
|
||||||
import 'package:intl/date_symbol_data_local.dart';
|
import 'package:intl/date_symbol_data_local.dart';
|
||||||
|
@ -14,6 +15,7 @@ import 'package:prasule/pw/platformbutton.dart';
|
||||||
import 'package:prasule/pw/platformdialog.dart';
|
import 'package:prasule/pw/platformdialog.dart';
|
||||||
import 'package:prasule/views/create_entry.dart';
|
import 'package:prasule/views/create_entry.dart';
|
||||||
import 'package:prasule/views/settings/settings.dart';
|
import 'package:prasule/views/settings/settings.dart';
|
||||||
|
import 'package:prasule/views/settings/tessdata_list.dart';
|
||||||
import 'package:prasule/views/setup.dart';
|
import 'package:prasule/views/setup.dart';
|
||||||
|
|
||||||
class HomeView extends StatefulWidget {
|
class HomeView extends StatefulWidget {
|
||||||
|
@ -85,48 +87,8 @@ class _HomeViewState extends State<HomeView> {
|
||||||
SpeedDialChild(
|
SpeedDialChild(
|
||||||
child: const Icon(Icons.image),
|
child: const Icon(Icons.image),
|
||||||
label: "Add through saved image",
|
label: "Add through saved image",
|
||||||
onTap: () async {
|
onTap: () {
|
||||||
var availableLanguages = await TessdataApi.getDownloadedData();
|
startOcr(ImageSource.gallery);
|
||||||
if (mounted) {
|
|
||||||
var selectedLanguages =
|
|
||||||
List<bool>.filled(availableLanguages.length, false);
|
|
||||||
selectedLanguages[
|
|
||||||
availableLanguages.indexOf("eng.traineddata")] = true;
|
|
||||||
showDialog(
|
|
||||||
context: context,
|
|
||||||
builder: (c) => PlatformDialog(
|
|
||||||
title: "Select languages for OCR",
|
|
||||||
content: Column(
|
|
||||||
children: [
|
|
||||||
...List.generate(
|
|
||||||
availableLanguages.length,
|
|
||||||
(index) => Row(
|
|
||||||
children: [
|
|
||||||
Checkbox(
|
|
||||||
value: selectedLanguages[index],
|
|
||||||
onChanged: (value) {
|
|
||||||
if (value == null ||
|
|
||||||
(selectedLanguages
|
|
||||||
.where((element) => element)
|
|
||||||
.length <=
|
|
||||||
1 &&
|
|
||||||
!value)) return;
|
|
||||||
selectedLanguages[index] = value;
|
|
||||||
setState(() {}); // todo: builder
|
|
||||||
},
|
|
||||||
),
|
|
||||||
const SizedBox(
|
|
||||||
width: 10,
|
|
||||||
),
|
|
||||||
Text(availableLanguages[index].split(".").first)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
@ -286,6 +248,102 @@ class _HomeViewState extends State<HomeView> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<void> startOcr(ImageSource imgSrc) async {
|
||||||
|
var availableLanguages = await TessdataApi.getDownloadedData();
|
||||||
|
if (availableLanguages.isEmpty) {
|
||||||
|
if (!mounted) return;
|
||||||
|
ScaffoldMessenger.of(context).showSnackBar(
|
||||||
|
SnackBar(
|
||||||
|
content:
|
||||||
|
const Text("You do not have any OCR language data downloaded"),
|
||||||
|
action: SnackBarAction(
|
||||||
|
label: "Download",
|
||||||
|
onPressed: () {
|
||||||
|
Navigator.of(context).push(
|
||||||
|
MaterialPageRoute(
|
||||||
|
builder: (c) => const TessdataListView(),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!mounted) return;
|
||||||
|
var selectedLanguages = List<bool>.filled(availableLanguages.length, false);
|
||||||
|
if (selectedLanguages.length == 1) {
|
||||||
|
selectedLanguages[0] = true;
|
||||||
|
}
|
||||||
|
showDialog(
|
||||||
|
context: context,
|
||||||
|
builder: (c) => PlatformDialog(
|
||||||
|
actions: [
|
||||||
|
TextButton(
|
||||||
|
onPressed: () async {
|
||||||
|
final ImagePicker picker = ImagePicker();
|
||||||
|
final XFile? media = await picker.pickImage(source: imgSrc);
|
||||||
|
if (media == null) {
|
||||||
|
if (mounted) Navigator.of(context).pop();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// get selected languages
|
||||||
|
var selected = availableLanguages
|
||||||
|
.where((element) =>
|
||||||
|
selectedLanguages[availableLanguages.indexOf(element)])
|
||||||
|
.join("+")
|
||||||
|
.replaceAll(".traineddata", "");
|
||||||
|
logger.i(selected);
|
||||||
|
var string = await FlutterTesseractOcr.extractText(media.path,
|
||||||
|
language: selected,
|
||||||
|
args: {
|
||||||
|
//"psm": "4",
|
||||||
|
"preserve_interword_spaces": "1",
|
||||||
|
});
|
||||||
|
logger.i(string);
|
||||||
|
if (mounted) Navigator.of(context).pop();
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
child: const Text("Ok")),
|
||||||
|
TextButton(
|
||||||
|
onPressed: () {
|
||||||
|
Navigator.of(c).pop();
|
||||||
|
},
|
||||||
|
child: const Text("Cancel")),
|
||||||
|
],
|
||||||
|
title: "Select languages for OCR",
|
||||||
|
content: Column(
|
||||||
|
children: [
|
||||||
|
...List.generate(
|
||||||
|
availableLanguages.length,
|
||||||
|
(index) => Row(
|
||||||
|
children: [
|
||||||
|
Checkbox(
|
||||||
|
value: selectedLanguages[index],
|
||||||
|
onChanged: (value) {
|
||||||
|
if (value == null ||
|
||||||
|
(selectedLanguages
|
||||||
|
.where((element) => element)
|
||||||
|
.length <=
|
||||||
|
1 &&
|
||||||
|
!value)) return;
|
||||||
|
selectedLanguages[index] = value;
|
||||||
|
setState(() {}); // todo: builder
|
||||||
|
},
|
||||||
|
),
|
||||||
|
const SizedBox(
|
||||||
|
width: 10,
|
||||||
|
),
|
||||||
|
Text(availableLanguages[index].split(".").first)
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
Future<void> getLostData() async {
|
Future<void> getLostData() async {
|
||||||
final ImagePicker picker = ImagePicker();
|
final ImagePicker picker = ImagePicker();
|
||||||
final LostDataResponse response = await picker.retrieveLostData();
|
final LostDataResponse response = await picker.retrieveLostData();
|
||||||
|
|
|
@ -133,6 +133,7 @@ class _TessdataListViewState extends State<TessdataListView> {
|
||||||
/// Used to find which `.traineddata` is already downloaded and which not
|
/// Used to find which `.traineddata` is already downloaded and which not
|
||||||
/// so we can show it to the user
|
/// so we can show it to the user
|
||||||
void loadAllTessdata() async {
|
void loadAllTessdata() async {
|
||||||
|
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||||
var d = await TessdataApi.getAvailableData();
|
var d = await TessdataApi.getAvailableData();
|
||||||
var dataStatus = <Map<String, bool>>[];
|
var dataStatus = <Map<String, bool>>[];
|
||||||
for (var data in d) {
|
for (var data in d) {
|
||||||
|
@ -140,8 +141,7 @@ class _TessdataListViewState extends State<TessdataListView> {
|
||||||
e[data] = false;
|
e[data] = false;
|
||||||
dataStatus.add(e);
|
dataStatus.add(e);
|
||||||
}
|
}
|
||||||
var appDir =
|
var appDir = tessDir.listSync();
|
||||||
Directory(await FlutterTesseractOcr.getTessdataPath()).listSync();
|
|
||||||
for (var file in appDir) {
|
for (var file in appDir) {
|
||||||
if (file is! File ||
|
if (file is! File ||
|
||||||
!file.path.endsWith("traineddata") ||
|
!file.path.endsWith("traineddata") ||
|
||||||
|
|
Loading…
Reference in a new issue