fix(ocr): ocr, switch tessdata_best with fast
This commit is contained in:
parent
5761f686dd
commit
d5e3d8b76c
7 changed files with 129 additions and 58 deletions
|
@ -20,4 +20,4 @@ Expense manager
|
|||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
```
|
||||
|
||||
The base app includes the [tessdata_best](https://github.com/tesseract-ocr/tessdata_best) English trained data, ©️ [tessdata_best / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_best/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_best/blob/main/LICENSE)
|
||||
The base app includes the [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast) English trained data, ©️ [tessdata_fast / Tesseract contributors](https://github.com/tesseract-ocr/tessdata_fast/graphs/contributors), used under the [Apache 2.0 license](https://github.com/tesseract-ocr/tessdata_fast/blob/main/LICENSE)
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
|
||||
<application
|
||||
android:label="Prašule"
|
||||
android:name="${applicationName}"
|
||||
|
|
Binary file not shown.
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"files": [
|
||||
"files":[
|
||||
"eng.traineddata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -12,7 +12,7 @@ class TessdataApi {
|
|||
);
|
||||
static Future<List<String>> getAvailableData() async {
|
||||
var res = await _client.get(
|
||||
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_best/contents",
|
||||
"https://git.mnau.xyz/api/v1/repos/hernik/tessdata_fast/contents",
|
||||
options: Options(headers: {"Accept": "application/json"}));
|
||||
if ((res.statusCode ?? 500) > 399) {
|
||||
return Future.error("The server returned status code ${res.statusCode}");
|
||||
|
@ -28,25 +28,36 @@ class TessdataApi {
|
|||
|
||||
static Future<void> deleteData(String name) async {
|
||||
var dataDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||
if (!dataDir.existsSync()) {
|
||||
dataDir.createSync();
|
||||
}
|
||||
var dataFile = File("${dataDir.path}/$name.traineddata");
|
||||
if (!dataFile.existsSync()) return;
|
||||
dataFile.deleteSync();
|
||||
}
|
||||
|
||||
static Future<List<String>> getDownloadedData() async =>
|
||||
Directory(await FlutterTesseractOcr.getTessdataPath())
|
||||
static Future<List<String>> getDownloadedData() async {
|
||||
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||
if (!tessDir.existsSync()) {
|
||||
tessDir.createSync();
|
||||
}
|
||||
return tessDir
|
||||
.listSync()
|
||||
.where((element) => element.path.endsWith(".traineddata"))
|
||||
.map<String>((e) => e.path.split("/").last)
|
||||
.toList();
|
||||
}
|
||||
|
||||
static Future<void> downloadData(String isoCode,
|
||||
{void Function(int, int)? callback}) async {
|
||||
var file = File(
|
||||
"${(await FlutterTesseractOcr.getTessdataPath())}/$isoCode.traineddata");
|
||||
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||
if (!tessDir.existsSync()) {
|
||||
tessDir.createSync();
|
||||
}
|
||||
var file = File("${tessDir.path}/$isoCode.traineddata");
|
||||
if (file.existsSync()) return; // TODO: maybe ask to redownload?
|
||||
var res = await _client.get(
|
||||
"https://git.mnau.xyz/hernik/tessdata_best/raw/branch/main/$isoCode.traineddata",
|
||||
"https://git.mnau.xyz/hernik/tessdata_fast/raw/branch/main/$isoCode.traineddata",
|
||||
options: Options(responseType: ResponseType.bytes),
|
||||
onReceiveProgress: callback);
|
||||
if ((res.statusCode ?? 500) > 399) {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import 'package:flutter/material.dart';
|
||||
import 'package:flutter_slidable/flutter_slidable.dart';
|
||||
import 'package:flutter_speed_dial/flutter_speed_dial.dart';
|
||||
import 'package:flutter_tesseract_ocr/flutter_tesseract_ocr.dart';
|
||||
import 'package:grouped_list/grouped_list.dart';
|
||||
import 'package:image_picker/image_picker.dart';
|
||||
import 'package:intl/date_symbol_data_local.dart';
|
||||
|
@ -14,6 +15,7 @@ import 'package:prasule/pw/platformbutton.dart';
|
|||
import 'package:prasule/pw/platformdialog.dart';
|
||||
import 'package:prasule/views/create_entry.dart';
|
||||
import 'package:prasule/views/settings/settings.dart';
|
||||
import 'package:prasule/views/settings/tessdata_list.dart';
|
||||
import 'package:prasule/views/setup.dart';
|
||||
|
||||
class HomeView extends StatefulWidget {
|
||||
|
@ -85,48 +87,8 @@ class _HomeViewState extends State<HomeView> {
|
|||
SpeedDialChild(
|
||||
child: const Icon(Icons.image),
|
||||
label: "Add through saved image",
|
||||
onTap: () async {
|
||||
var availableLanguages = await TessdataApi.getDownloadedData();
|
||||
if (mounted) {
|
||||
var selectedLanguages =
|
||||
List<bool>.filled(availableLanguages.length, false);
|
||||
selectedLanguages[
|
||||
availableLanguages.indexOf("eng.traineddata")] = true;
|
||||
showDialog(
|
||||
context: context,
|
||||
builder: (c) => PlatformDialog(
|
||||
title: "Select languages for OCR",
|
||||
content: Column(
|
||||
children: [
|
||||
...List.generate(
|
||||
availableLanguages.length,
|
||||
(index) => Row(
|
||||
children: [
|
||||
Checkbox(
|
||||
value: selectedLanguages[index],
|
||||
onChanged: (value) {
|
||||
if (value == null ||
|
||||
(selectedLanguages
|
||||
.where((element) => element)
|
||||
.length <=
|
||||
1 &&
|
||||
!value)) return;
|
||||
selectedLanguages[index] = value;
|
||||
setState(() {}); // todo: builder
|
||||
},
|
||||
),
|
||||
const SizedBox(
|
||||
width: 10,
|
||||
),
|
||||
Text(availableLanguages[index].split(".").first)
|
||||
],
|
||||
),
|
||||
)
|
||||
],
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
onTap: () {
|
||||
startOcr(ImageSource.gallery);
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -286,6 +248,102 @@ class _HomeViewState extends State<HomeView> {
|
|||
);
|
||||
}
|
||||
|
||||
Future<void> startOcr(ImageSource imgSrc) async {
|
||||
var availableLanguages = await TessdataApi.getDownloadedData();
|
||||
if (availableLanguages.isEmpty) {
|
||||
if (!mounted) return;
|
||||
ScaffoldMessenger.of(context).showSnackBar(
|
||||
SnackBar(
|
||||
content:
|
||||
const Text("You do not have any OCR language data downloaded"),
|
||||
action: SnackBarAction(
|
||||
label: "Download",
|
||||
onPressed: () {
|
||||
Navigator.of(context).push(
|
||||
MaterialPageRoute(
|
||||
builder: (c) => const TessdataListView(),
|
||||
),
|
||||
);
|
||||
},
|
||||
),
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (!mounted) return;
|
||||
var selectedLanguages = List<bool>.filled(availableLanguages.length, false);
|
||||
if (selectedLanguages.length == 1) {
|
||||
selectedLanguages[0] = true;
|
||||
}
|
||||
showDialog(
|
||||
context: context,
|
||||
builder: (c) => PlatformDialog(
|
||||
actions: [
|
||||
TextButton(
|
||||
onPressed: () async {
|
||||
final ImagePicker picker = ImagePicker();
|
||||
final XFile? media = await picker.pickImage(source: imgSrc);
|
||||
if (media == null) {
|
||||
if (mounted) Navigator.of(context).pop();
|
||||
return;
|
||||
}
|
||||
// get selected languages
|
||||
var selected = availableLanguages
|
||||
.where((element) =>
|
||||
selectedLanguages[availableLanguages.indexOf(element)])
|
||||
.join("+")
|
||||
.replaceAll(".traineddata", "");
|
||||
logger.i(selected);
|
||||
var string = await FlutterTesseractOcr.extractText(media.path,
|
||||
language: selected,
|
||||
args: {
|
||||
//"psm": "4",
|
||||
"preserve_interword_spaces": "1",
|
||||
});
|
||||
logger.i(string);
|
||||
if (mounted) Navigator.of(context).pop();
|
||||
return;
|
||||
},
|
||||
child: const Text("Ok")),
|
||||
TextButton(
|
||||
onPressed: () {
|
||||
Navigator.of(c).pop();
|
||||
},
|
||||
child: const Text("Cancel")),
|
||||
],
|
||||
title: "Select languages for OCR",
|
||||
content: Column(
|
||||
children: [
|
||||
...List.generate(
|
||||
availableLanguages.length,
|
||||
(index) => Row(
|
||||
children: [
|
||||
Checkbox(
|
||||
value: selectedLanguages[index],
|
||||
onChanged: (value) {
|
||||
if (value == null ||
|
||||
(selectedLanguages
|
||||
.where((element) => element)
|
||||
.length <=
|
||||
1 &&
|
||||
!value)) return;
|
||||
selectedLanguages[index] = value;
|
||||
setState(() {}); // todo: builder
|
||||
},
|
||||
),
|
||||
const SizedBox(
|
||||
width: 10,
|
||||
),
|
||||
Text(availableLanguages[index].split(".").first)
|
||||
],
|
||||
),
|
||||
)
|
||||
],
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> getLostData() async {
|
||||
final ImagePicker picker = ImagePicker();
|
||||
final LostDataResponse response = await picker.retrieveLostData();
|
||||
|
|
|
@ -133,6 +133,7 @@ class _TessdataListViewState extends State<TessdataListView> {
|
|||
/// Used to find which `.traineddata` is already downloaded and which not
|
||||
/// so we can show it to the user
|
||||
void loadAllTessdata() async {
|
||||
var tessDir = Directory(await FlutterTesseractOcr.getTessdataPath());
|
||||
var d = await TessdataApi.getAvailableData();
|
||||
var dataStatus = <Map<String, bool>>[];
|
||||
for (var data in d) {
|
||||
|
@ -140,8 +141,7 @@ class _TessdataListViewState extends State<TessdataListView> {
|
|||
e[data] = false;
|
||||
dataStatus.add(e);
|
||||
}
|
||||
var appDir =
|
||||
Directory(await FlutterTesseractOcr.getTessdataPath()).listSync();
|
||||
var appDir = tessDir.listSync();
|
||||
for (var file in appDir) {
|
||||
if (file is! File ||
|
||||
!file.path.endsWith("traineddata") ||
|
||||
|
|
Loading…
Reference in a new issue