diff --git a/README.md b/README.md
index bd26e7b..778efe5 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,88 @@
+[](https://github.com/apparser-development/apparser/blob/master/LICENSE.md) [](https://github.com/apparser-development/apparser/actions/workflows/unit_tests.yml)
+
+[](https://pepy.tech/projects/apparser)
[](https://apparser-development.github.io/apparser/)
-[](https://github.com/apparser-development/appwindows/actions/workflows/unit_tests.yml)
-[](https://github.com/apparser-development/apparser)
+[](https://pypi.org/project/apparser/)
+[](https://github.com/apparser-development/apparser)
[](https://github.com/apparser-development/apparser/issues)
# Apparser
-Apparser is a Python library designed for automating desktop applications and managing UI interfaces using artificial intelligence, such as OCR or object detection models.
-# Install
+Apparser is a Python library for automating desktop applications and interacting with UIs using AI-powered tools such as OCR and object detection models.
+
+# Installation
```bash
+# Base Apparser package
pip install apparser
+
+# Apparser with text recognition support
+pip install "apparser[ocr]"
+
+# Apparser with text-to-speech support
+pip install "apparser[speak]"
+
+# Apparser with object detection support
+pip install "apparser[cv]"
+
+# Apparser with all optional features
+pip install "apparser[all]"
```
# Examples
-
-1) Open terminal and write "Hello World!"
+1) Open CS2 and start a game
+#### Code
```python
from apparser import App
-from apparser.geometry import RelativelyPoint
-from apparser.instructions import Algorithm, MouseClickTo, WriteText, Sleep
+from apparser.instructions import OCRAlgorithm
+from apparser.instructions.ocr import WaitText, ClickOnText
+from apparser.text_readers import ScreensController, RapidOcrReader
+
+# Text labels that the OCR algorithm will look for on the screen.
+play_button = "play"
+deathmatch_button = "deathmatch"
+group_button = "hostage group"
+start_button = "go"
-algorithm = Algorithm([
- Sleep(1), # Wait for the application to open.
- MouseClickTo(RelativelyPoint(0.5, 0.5)), # Click to window center for start writing
- WriteText("Hello World") # Write text
-])
+# Create OCR-based algorithm.
+algorithm = OCRAlgorithm([
+ # Wait for the main menu and open the play screen.
+ WaitText(play_button),
+ ClickOnText(play_button),
+ # Select the deathmatch mode.
+ WaitText(deathmatch_button),
+ ClickOnText(deathmatch_button),
+ # Select the hostage group and start the match.
+ WaitText(group_button),
+ ClickOnText(group_button),
+ ClickOnText(start_button, min_similarity=0.5),
+], text_reader=ScreensController(RapidOcrReader()))
-app = App("notepad", window_title="Notepad")
+# Launch CS2
+app = App(['cmd', '/c', 'start', 'steam://rungameid/730'], timeout=20)
+# Run the prepared scenario against the application UI.
algorithm.perform(app.ui)
```
+#### Video
+
+
# Docs
-All documentation here
-Link to PyPi
+Full documentation is available here
+Package page on PyPI
+
+# Donation
+If you'd like to financially support the developers for their work:
+
+Donation link
# For Developers
-1) If something doesn't work - open issue.
-2) If you want something fixed - open issue.
-3) If you can help with the library - email.
+1) If something doesn't work, open an issue.
+2) If you want something fixed, open an issue.
+3) If you can help with the library, email us.
apparser.development@gmail.com
-Any help in development is welcome)!
+Contributions are welcome!
diff --git a/apparser/core/app.py b/apparser/core/app.py
index c853b3c..2fbc7db 100644
--- a/apparser/core/app.py
+++ b/apparser/core/app.py
@@ -1,4 +1,3 @@
-import os
import subprocess
import time
@@ -11,21 +10,24 @@
class App:
"""Manage an application process and its UI wrapper."""
- def __init__(self, path_to_exe: str,
+ def __init__(self, start_command: str | list[str],
window_title: str | None = None,
timeout: float = 1):
"""Initialize an application controller.
- :param path_to_exe: Path to the executable file.
- :type path_to_exe: str
+ :param start_command: App start command.
+ :type start_command: str
:param window_title: Title of the window to attach to.
:type window_title: str
:param timeout: Delay before the window lookup starts.
:type timeout: float
:raises TypeError: If any argument has an invalid type.
"""
- if not isinstance(path_to_exe, str):
- raise TypeError('path_to_exe must be a string')
+ if isinstance(start_command, str):
+ start_command = [start_command]
+
+ if not isinstance(start_command, list):
+ raise TypeError('start_command must be a string or list[str]')
if window_title is not None and not isinstance(window_title, str):
raise TypeError('window_title must be a string')
@@ -35,7 +37,7 @@ def __init__(self, path_to_exe: str,
self.__window_finder = get_finder()
self.__process: subprocess.Popen | None = None
- self.__path = path_to_exe
+ self.__start_command = start_command
self.__timeout = timeout
self.__window_title_name: str = window_title
self.__ui: BaseUi | None = None
@@ -65,9 +67,9 @@ def start_app(self):
if self.__ui is not None:
return
window_processes = [i.get_process_id() for i in get_finder().get_all_windows()]
- self.__process = subprocess.Popen([self.__path])
+ self.__process = subprocess.Popen(self.__start_command)
time.sleep(self.__timeout)
- self.__find_window_by_process_id(os.getpid())
+ self.__find_window_by_process_id(self.__process.pid)
for i in get_finder().get_all_windows():
if self.__ui is not None:
return
@@ -76,7 +78,7 @@ def start_app(self):
if self.__ui is not None:
return
self.__find_window_by_title()
- if self.__ui is not None:
+ if self.__ui is None:
raise WindowDoesNotValidException()
def stop_app(self):
diff --git a/apparser/core/ui/coordinates.py b/apparser/core/ui/coordinates.py
index 6d569a7..d74a422 100644
--- a/apparser/core/ui/coordinates.py
+++ b/apparser/core/ui/coordinates.py
@@ -2,10 +2,9 @@
import numpy
from appwindows import Window
-from appwindows.geometry import Size
from apparser.core.ui.base import BaseUi
-from apparser.geometry import Point, RelativelyPoint
+from apparser.geometry import Point, RelativelyPoint, Size
class CoordinatesUi(BaseUi):
diff --git a/apparser/core/ui/desktop.py b/apparser/core/ui/desktop.py
index d7eca53..b076362 100644
--- a/apparser/core/ui/desktop.py
+++ b/apparser/core/ui/desktop.py
@@ -39,8 +39,8 @@ def _(self, coordinates: Point):
@point_to_global.register(RelativelyPoint)
def _(self, coordinates: RelativelyPoint):
monitor = get_monitors()[self.__display_id]
- x = round(coordinates.x * monitor.width)
- y = round(coordinates.y * monitor.height)
+ x = getattr(monitor, "x", 0) + round(coordinates.x * monitor.width)
+ y = getattr(monitor, "y", 0) + round(coordinates.y * monitor.height)
local_point = Point(x, y)
return self.point_to_global(local_point)
diff --git a/apparser/core/ui/window.py b/apparser/core/ui/window.py
index a5c9f5e..73638e1 100644
--- a/apparser/core/ui/window.py
+++ b/apparser/core/ui/window.py
@@ -3,10 +3,9 @@
import numpy
from appwindows import Window
-from appwindows.geometry import Point, Size
+from apparser.geometry import Point, Size, RelativelyPoint
from apparser.core.ui.base import BaseUi
-from apparser.geometry.relatively_point import RelativelyPoint
class WindowUi(BaseUi):
diff --git a/apparser/core/ui/window_by_display.py b/apparser/core/ui/window_by_display.py
index dad741d..38d12af 100644
--- a/apparser/core/ui/window_by_display.py
+++ b/apparser/core/ui/window_by_display.py
@@ -4,10 +4,9 @@
from PIL import ImageGrab
from appwindows import Window
-from appwindows.geometry import Point, Size
+from apparser.geometry import Point, Size, RelativelyPoint
from apparser.core.ui.base import BaseUi
-from apparser.geometry.relatively_point import RelativelyPoint
class WindowByDisplayUi(BaseUi):
diff --git a/apparser/cv/readers/yolo.py b/apparser/cv/readers/yolo.py
index 2be8a35..8fcb027 100644
--- a/apparser/cv/readers/yolo.py
+++ b/apparser/cv/readers/yolo.py
@@ -52,9 +52,11 @@ def read(self, ui: BaseUi) -> CvAllData:
x1, y1, x2, y2 = box.xyxy[0].tolist()
x = int(x1)
y = int(y1)
- width = int(x2 - x1)
- height = int(y2 - y1)
- box_ui = CoordinatesUi(ui, Point(x, y), Size(width, height))
+ x2 = int(x2)
+ y2 = int(y2)
+ width = x2 - x1
+ height = y2 - y1
+ box_ui = CoordinatesUi(ui, Point(x, y), Point(x2, y2))
boxes.append(
CvBox(
class_name=cls_name,
diff --git a/apparser/cv/utils/changes_checker.py b/apparser/cv/utils/changes_checker.py
index 8f528a0..e704b08 100644
--- a/apparser/cv/utils/changes_checker.py
+++ b/apparser/cv/utils/changes_checker.py
@@ -25,7 +25,7 @@ def _is_resized(box: CvBox, old_box: CvBox) -> bool:
:return: True if width and height both changed.
:rtype: bool
"""
- return abs(box.width - old_box.width) > 0 and abs(box.height - old_box.height) > 0
+ return abs(box.width - old_box.width) > 0 or abs(box.height - old_box.height) > 0
class ChangesChecker:
diff --git a/apparser/exceptions/timeout.py b/apparser/exceptions/timeout.py
index 9d0570d..0b8bdc8 100644
--- a/apparser/exceptions/timeout.py
+++ b/apparser/exceptions/timeout.py
@@ -1,5 +1,14 @@
class TimeoutException(Exception):
+ """Represent a timeout during a waiting operation."""
+
def __init__(self, wait_time: float | int | None = None):
+ """Initialize a timeout exception.
+
+ :param wait_time: Time waited before the timeout occurred.
+ :type wait_time: float | int | None
+ :raises TypeError: If ``wait_time`` has an invalid type.
+ :raises ValueError: If ``wait_time`` is negative.
+ """
if wait_time is None:
super().__init__("Timeout error")
return
diff --git a/apparser/geometry/__init__.py b/apparser/geometry/__init__.py
index 2bcf958..0810e9f 100644
--- a/apparser/geometry/__init__.py
+++ b/apparser/geometry/__init__.py
@@ -1,4 +1,4 @@
-from appwindows.geometry import Point, Size
+from appwindows.geometry import Point, Size, QuadPoints
from apparser.geometry.relatively_point import RelativelyPoint
from apparser.geometry.distance import distance
@@ -6,4 +6,5 @@
__all__ = ["Point",
"Size",
"RelativelyPoint",
+ "QuadPoints",
"distance"]
diff --git a/apparser/instructions/default/press.py b/apparser/instructions/default/press.py
index 8807ba3..5235db4 100644
--- a/apparser/instructions/default/press.py
+++ b/apparser/instructions/default/press.py
@@ -1,8 +1,7 @@
import pyautogui
-from apparser.key_codes import BaseKeyCode
-
from apparser.instructions.base import BaseInstruction
+from apparser.key_codes import BaseKeyCode
class PressKey(BaseInstruction):
@@ -38,6 +37,12 @@ def __init__(self, keys: list[BaseKeyCode | str]):
:type keys: list[BaseKeyCode | str]
"""
self.__keys = keys
+ self.__validate()
+
+ def __validate(self):
+ for key in self.__keys:
+ if not (isinstance(key, BaseKeyCode) or isinstance(key, str)):
+ raise TypeError('key_code must be BaseKeyCode or str')
@property
def id(self) -> int:
@@ -45,8 +50,6 @@ def id(self) -> int:
def perform(self, *args, **kwargs):
for key in self.__keys:
- if not (isinstance(key, BaseKeyCode) or isinstance(key, str)):
- raise TypeError('key_code must be BaseKeyCode or str')
pyautogui.keyDown(str(key))
for key in self.__keys:
diff --git a/apparser/instructions/ocr/move_to_text.py b/apparser/instructions/ocr/move_to_text.py
index cb5e069..a909fc2 100644
--- a/apparser/instructions/ocr/move_to_text.py
+++ b/apparser/instructions/ocr/move_to_text.py
@@ -3,7 +3,6 @@
from apparser.core import BaseUi
from apparser.exceptions import TextNotFoundException
from apparser.geometry import Point, RelativelyPoint
-
from apparser.text_readers import BaseTextReader, TextData
from apparser.instructions.ocr.base import OCRInstruction
@@ -59,8 +58,8 @@ def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs):
needed_data, rating = self.find_text(self.__text_getter.local_answer)
if self.__min_similarity > rating:
raise TextNotFoundException(self.__min_similarity)
- y_cords = list(set([i.y for i in needed_data.coordinates]))
- x_cords = list(set([i.x for i in needed_data.coordinates]))
+ y_cords = [needed_data.coordinates.right_top.y, needed_data.coordinates.right_bottom.y]
+ x_cords = [needed_data.coordinates.left_top.x, needed_data.coordinates.right_top.x]
offset_point = self.__get_local_offset(ui)
x_center = round((x_cords[0] - x_cords[1]) / 2 + x_cords[1]) + offset_point.x
y_center = round((y_cords[0] - y_cords[1]) / 2 + y_cords[1]) + offset_point.y
diff --git a/apparser/instructions/ocr/plot_text.py b/apparser/instructions/ocr/plot_text.py
index b962645..dff78cb 100644
--- a/apparser/instructions/ocr/plot_text.py
+++ b/apparser/instructions/ocr/plot_text.py
@@ -33,16 +33,16 @@ def draw(self, bboxes: list[TextData]):
self.__paint_lines(data)
def __paint_lines(self, data: TextData):
- shape = [(data.coordinates[0].x, data.coordinates[0].y), (data.coordinates[2].x, data.coordinates[2].y)]
+ shape = [(data.coordinates.left_top.x, data.coordinates.left_top.y), (data.coordinates.right_bottom.x, data.coordinates.right_bottom.y)]
self.__draw.rectangle(shape, outline=self.__color, width=1)
def __paint_cords(self, data: TextData):
- y = data.coordinates[0].y + self.__text_move.y
+ y = data.coordinates.left_top.y + self.__text_move.y
if y < 0:
- y = data.coordinates[2].y - self.__text_move.y
- x = data.coordinates[0].x + self.__text_move.x
+ y = data.coordinates.right_bottom.y - self.__text_move.y
+ x = data.coordinates.left_top.x + self.__text_move.x
if y < 0:
- x = data.coordinates[2].x - self.__text_move.x
+ x = data.coordinates.right_bottom.x - self.__text_move.x
self.__draw.text((x, y), data.text, fill=self.__color)
diff --git a/apparser/instructions/ocr/print_all_text.py b/apparser/instructions/ocr/print_all_text.py
index 5bc57a0..9c78abb 100644
--- a/apparser/instructions/ocr/print_all_text.py
+++ b/apparser/instructions/ocr/print_all_text.py
@@ -27,7 +27,4 @@ def id(self) -> int:
def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs):
self.__text_getter.perform(ui, text_reader)
for i in self.__text_getter.local_answer:
- points_stroke = ""
- for j in i.coordinates:
- points_stroke += str(j) + " "
- print(f'text: "{i.text}", coordinates: {points_stroke}')
+ print(f'text: "{i.text}", coordinates: {str(i.coordinates)}')
diff --git a/apparser/instructions/ocr/text_getter.py b/apparser/instructions/ocr/text_getter.py
index 9fe0d9e..8e2ab2c 100644
--- a/apparser/instructions/ocr/text_getter.py
+++ b/apparser/instructions/ocr/text_getter.py
@@ -2,7 +2,7 @@
from PIL import Image
from apparser.core import BaseUi
-from apparser.geometry import Point, RelativelyPoint
+from apparser.geometry import Point, RelativelyPoint, QuadPoints
from apparser.text_readers import BaseTextReader, TextData
@@ -38,9 +38,12 @@ def id(self) -> int:
return 2000
def __text_coordinates_to_local(self, text: TextData) -> TextData:
- new_coordinates = []
- for point in text.coordinates:
- new_coordinates.append(point + self.__left_top_point_global)
+ new_coordinates = QuadPoints(
+ text.coordinates.left_top + self.__left_top_point_global,
+ text.coordinates.right_top + self.__left_top_point_global,
+ text.coordinates.right_bottom + self.__left_top_point_global,
+ text.coordinates.left_bottom + self.__left_top_point_global,
+ )
return TextData(text.text, new_coordinates)
def __texts_coordinates_to_local(self, texts: list[TextData]) -> list[TextData]:
@@ -66,18 +69,18 @@ def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs):
@property
def local_answer(self) -> list[TextData]:
- """Return the texts coordinates in local Ui object of the last perform.
+ """Return text coordinates in the local UI object of the last perform.
- :return: Texts coordinates in local Ui object.
+ :return: Text coordinates in the local UI object.
:rtype: list[TextData]
"""
return self.__local_answer
@property
def global_answer(self) -> list[TextData]:
- """Return the global texts coordinates of the last perform.
+ """Return global text coordinates of the last perform.
- :return: Global texts coordinates.
+ :return: Global text coordinates.
:rtype: list[TextData]
"""
return self.__global_answer
@@ -86,7 +89,7 @@ def global_answer(self) -> list[TextData]:
def screenshot(self) -> numpy.ndarray:
"""Return the screenshot of the last perform.
- :return: Ui screenshot
+ :return: UI screenshot.
:rtype: numpy.ndarray
"""
return self.__screenshot
diff --git a/apparser/instructions/ui/algorithms/ids.py b/apparser/instructions/ui/algorithms/ids.py
index 9e16676..df16dc3 100644
--- a/apparser/instructions/ui/algorithms/ids.py
+++ b/apparser/instructions/ui/algorithms/ids.py
@@ -2,7 +2,6 @@
from typing import Any
from apparser.core import BaseUi
-
from apparser.instructions import BaseInstruction
from apparser.instructions.debuggers import BaseDebugger, Debugger
from apparser.instructions.ui.algorithms.base import BaseAlgorithm
@@ -41,6 +40,12 @@ def __init__(self,
:raises TypeError: If ``debugger`` has an invalid type.
"""
+ if not isinstance(attributes, list):
+ raise TypeError("attributes must be list")
+
+ if not isinstance(instructions, list):
+ raise TypeError("attributes must be list")
+
if not isinstance(debugger, BaseDebugger) and not isinstance(debugger, bool):
raise TypeError(f"debugger must be a bool or BaseDebugger")
@@ -50,8 +55,6 @@ def __init__(self,
elif debugger == False:
debugger = None
- attributes.reverse()
-
self.__debugger = debugger
self.__instructions = instructions
self.__attributes = attributes
@@ -60,11 +63,11 @@ def __init__(self,
def id(self) -> int:
return 1501
- def __form_args(self, instruction: BaseInstruction) -> dict[str, Any]:
+ def __form_args(self, instruction: BaseInstruction, *additional_args) -> dict[str, Any]:
result = {}
function_signature = inspect.signature(instruction.perform)
for arg in function_signature.parameters.values():
- for a in self.__attributes:
+ for a in self.__attributes + list(additional_args):
if arg.annotation is type(a):
result[arg.name] = a
return result
@@ -83,7 +86,7 @@ def perform(self, ui: BaseUi, *args, **kwargs):
raise ValueError(f"instruction with id {instruction_id} not found")
instruction = instruction(*instruction_args)
- perform_kwargs = self.__form_args(instruction)
+ perform_kwargs = self.__form_args(instruction, ui)
if self.__debugger is not None:
self.__debugger.try_perform(instruction, **perform_kwargs)
diff --git a/apparser/instructions/ui/algorithms/names.py b/apparser/instructions/ui/algorithms/names.py
index 67f362f..f1fa940 100644
--- a/apparser/instructions/ui/algorithms/names.py
+++ b/apparser/instructions/ui/algorithms/names.py
@@ -40,6 +40,12 @@ def __init__(self,
:type debugger: BaseDebugger | bool
:raises TypeError: If ``debugger`` has an invalid type.
"""
+ if not isinstance(attributes, list):
+ raise TypeError("attributes must be list")
+
+ if not isinstance(instructions, list):
+ raise TypeError("attributes must be list")
+
if not isinstance(debugger, BaseDebugger) and not isinstance(debugger, bool):
raise TypeError(f"debugger must be a bool or BaseDebugger")
@@ -49,8 +55,6 @@ def __init__(self,
elif debugger == False:
debugger = None
- attributes.reverse()
-
self.__debugger = debugger
self.__instructions = instructions
self.__attributes = attributes
@@ -59,11 +63,11 @@ def __init__(self,
def id(self) -> int:
return 1502
- def __form_args(self, instruction: BaseInstruction) -> dict[str, Any]:
+ def __form_args(self, instruction: BaseInstruction, *additional_args) -> dict[str, Any]:
result = {}
function_signature = inspect.signature(instruction.perform)
for arg in function_signature.parameters.values():
- for a in self.__attributes:
+ for a in self.__attributes + list(additional_args):
if arg.annotation is type(a):
result[arg.name] = a
return result
@@ -83,7 +87,7 @@ def perform(self, ui: BaseUi, *args, **kwargs):
instruction = instruction_type(*instruction_args)
- perform_kwargs = self.__form_args(instruction)
+ perform_kwargs = self.__form_args(instruction, ui)
if self.__debugger is not None:
self.__debugger.try_perform(instruction, **perform_kwargs)
diff --git a/apparser/speakers/base.py b/apparser/speakers/base.py
index 7d72364..58c5962 100644
--- a/apparser/speakers/base.py
+++ b/apparser/speakers/base.py
@@ -12,7 +12,7 @@ def speak(self, text: str) -> tuple[numpy.ndarray, int]:
:param text: Text to synthesize.
:type text: str
- :return: Generated audio samples and bitrate.
+ :return: Generated audio samples and sample rate.
:rtype: tuple[numpy.ndarray, int]
"""
pass
diff --git a/apparser/speakers/chat_tts.py b/apparser/speakers/chat_tts.py
index 2379f17..3f096fd 100644
--- a/apparser/speakers/chat_tts.py
+++ b/apparser/speakers/chat_tts.py
@@ -6,6 +6,8 @@
class ChatTTSSpeaker(BaseSpeaker):
+ """Generate speech by using a ChatTTS backend."""
+
def __init__(
self,
speaker: str | None = None,
@@ -45,7 +47,7 @@ def __init__(
:type experimental: bool
:param enable_cache: Whether ChatTTS cache should be enabled.
:type enable_cache: bool
- :param sample_rate: Output bitrate for generated audio.
+ :param sample_rate: Output sample rate for generated audio.
:type sample_rate: int
"""
self.__chattts = importlib.import_module("ChatTTS")
@@ -98,7 +100,7 @@ def speak(self, text: str, **settings: object) -> tuple[numpy.ndarray, int]:
:type text: str
:param settings: Additional ChatTTS inference settings.
:type settings: dict[str, object]
- :return: Generated audio samples and bitrate.
+ :return: Generated audio samples and sample rate.
:rtype: tuple[numpy.ndarray, int]
"""
speaker = settings.pop("speaker", self.__speaker)
diff --git a/apparser/speakers/torch.py b/apparser/speakers/torch.py
index d83b922..56541da 100644
--- a/apparser/speakers/torch.py
+++ b/apparser/speakers/torch.py
@@ -30,7 +30,7 @@ def __init__(
:type speaker_model: str
:param speaker: Speaker name used for synthesis.
:type speaker: str
- :param sample_rate: Output bitrate.
+ :param sample_rate: Output sample rate.
:type sample_rate: int
:param device: Torch device used for inference.
:type device: str | object
@@ -75,7 +75,7 @@ def speak(self, text: str, **settings: object) -> tuple[numpy.ndarray, int]:
:type text: str
:param settings: Additional synthesis settings.
:type settings: dict[str, object]
- :return: Generated audio samples and bitrate.
+ :return: Generated audio samples and sample rate.
:rtype: tuple[numpy.ndarray, int]
"""
audio = self.__model.apply_tts(
diff --git a/apparser/text_readers/__init__.py b/apparser/text_readers/__init__.py
index 54e8e89..18f9027 100644
--- a/apparser/text_readers/__init__.py
+++ b/apparser/text_readers/__init__.py
@@ -1,14 +1,2 @@
-from apparser.text_readers.base import BaseTextReader
-from apparser.text_readers.screens_controller import ScreensController
-from apparser.text_readers.models.text_data import TextData
-from apparser.text_readers.easy_ocr import EasyOcrReader
-from apparser.text_readers.paddle import PaddleTextReader
-from apparser.text_readers.white_black_reader import WhiteBlackReader
-
-
-__all__ = ["EasyOcrReader",
- "ScreensController",
- "BaseTextReader",
- "WhiteBlackReader",
- "PaddleTextReader",
- "TextData"]
+from apparser.text_readers.readers import *
+from apparser.text_readers.models import *
diff --git a/apparser/text_readers/detectors/__init__.py b/apparser/text_readers/detectors/__init__.py
new file mode 100644
index 0000000..f854d9d
--- /dev/null
+++ b/apparser/text_readers/detectors/__init__.py
@@ -0,0 +1,7 @@
+from apparser.text_readers.detectors.base import BaseTextDetector
+from apparser.text_readers.detectors.easy_ocr import EasyOcrDetector
+
+__all__ = [
+ "BaseTextDetector",
+ "EasyOcrDetector"
+]
diff --git a/apparser/text_readers/detectors/base.py b/apparser/text_readers/detectors/base.py
new file mode 100644
index 0000000..684063b
--- /dev/null
+++ b/apparser/text_readers/detectors/base.py
@@ -0,0 +1,20 @@
+import abc
+
+import numpy
+
+from apparser.geometry import QuadPoints
+
+
+class BaseTextDetector(abc.ABC):
+ """Define the common interface for text detection backends."""
+
+ @abc.abstractmethod
+ def read_image(self, image: numpy.ndarray) -> list[QuadPoints]:
+ """Detect text coordinates in an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :return: Detected text coordinates.
+ :rtype: list[QuadPoints]
+ """
+ pass
diff --git a/apparser/text_readers/detectors/easy_ocr.py b/apparser/text_readers/detectors/easy_ocr.py
new file mode 100644
index 0000000..f0b6ebc
--- /dev/null
+++ b/apparser/text_readers/detectors/easy_ocr.py
@@ -0,0 +1,141 @@
+import importlib
+from typing import Any
+
+import numpy
+
+from apparser.geometry import Point, QuadPoints
+from apparser.text_readers.detectors.base import BaseTextDetector
+
+
+def _build_box_points(
+ left: int,
+ top: int,
+ right: int,
+ bottom: int,
+) -> QuadPoints:
+ return QuadPoints(
+ Point(left, top),
+ Point(right, top),
+ Point(right, bottom),
+ Point(left, bottom),
+ )
+
+
+def _parse_horizontal_box(box: Any) -> QuadPoints | None:
+ array = numpy.asarray(box)
+ if array.ndim != 1 or array.size < 4:
+ return None
+ left, right, top, bottom = array[:4]
+ return _build_box_points(
+ int(left),
+ int(top),
+ int(right),
+ int(bottom),
+ )
+
+
+def _parse_free_box(box: Any) -> QuadPoints | None:
+ array = numpy.asarray(box)
+ if array.ndim == 1 and array.size >= 8 and array.size % 2 == 0:
+ array = array.reshape(-1, 2)
+ if array.ndim != 2 or len(array) < 4 or array.shape[-1] < 2:
+ return None
+ points = [
+ Point(int(coordinates[0]), int(coordinates[1]))
+ for coordinates in array[:4]
+ ]
+ return QuadPoints(*points)
+
+
+def _extend_horizontal_points(
+ returned: list[QuadPoints],
+ horizontal_groups: Any,
+) -> None:
+ for group in horizontal_groups:
+ points = _parse_horizontal_box(group)
+ if points is not None:
+ returned.append(points)
+ continue
+
+ for box in group:
+ points = _parse_horizontal_box(box)
+ if points is not None:
+ returned.append(points)
+
+
+def _extend_free_points(
+ returned: list[QuadPoints],
+ free_groups: Any,
+) -> None:
+ for group in free_groups:
+ points = _parse_free_box(group)
+ if points is not None:
+ returned.append(points)
+ continue
+
+ for box in group:
+ points = _parse_free_box(box)
+ if points is not None:
+ returned.append(points)
+
+
+def _parse_detect_result(predicted: Any) -> list[QuadPoints]:
+ returned: list[QuadPoints] = []
+
+ if len(predicted) < 2:
+ return returned
+
+ horizontal_groups, free_groups = predicted[:2]
+ _extend_horizontal_points(returned, horizontal_groups)
+ _extend_free_points(returned, free_groups)
+ return returned
+
+
+def _build_default_settings(settings: dict[str, Any]) -> dict[str, Any]:
+ default_settings: dict[str, Any] = {
+ "detector": True,
+ "recognizer": False,
+ }
+ default_settings.update(settings)
+ return default_settings
+
+
+class EasyOcrDetector(BaseTextDetector):
+ """Detect text regions in images by using EasyOCR."""
+
+ def __init__(
+ self,
+ lang_list: list[str] | None = None,
+ **settings: Any,
+ ) -> None:
+ """Initialize an EasyOCR-backed text detector.
+
+ :param lang_list: Languages passed to the EasyOCR reader.
+ :type lang_list: list[str] | None
+ :param settings: Additional EasyOCR reader settings.
+ :type settings: dict[str, object]
+ """
+ if lang_list is None:
+ lang_list = ["en"]
+ easyocr = importlib.import_module("easyocr")
+ self.__reader = easyocr.Reader(
+ lang_list,
+ **_build_default_settings(settings),
+ )
+
+ def read_image(
+ self,
+ image: numpy.ndarray,
+ **settings: Any,
+ ) -> list[QuadPoints]:
+ """Detect text coordinates in an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :param settings: Additional EasyOCR detect settings.
+ :type settings: dict[str, object]
+ :return: Detected text coordinates.
+ :rtype: list[QuadPoints]
+ """
+ predicted = self.__reader.detect(image, **settings)
+ return _parse_detect_result(predicted)
diff --git a/apparser/text_readers/models/__init__.py b/apparser/text_readers/models/__init__.py
index e69de29..1c40a79 100644
--- a/apparser/text_readers/models/__init__.py
+++ b/apparser/text_readers/models/__init__.py
@@ -0,0 +1,3 @@
+from apparser.text_readers.models.text_data import TextData
+
+__all__ = ["TextData"]
diff --git a/apparser/text_readers/models/text_data.py b/apparser/text_readers/models/text_data.py
index d51b0e4..bdd9be3 100644
--- a/apparser/text_readers/models/text_data.py
+++ b/apparser/text_readers/models/text_data.py
@@ -1,6 +1,6 @@
from dataclasses import dataclass
-from apparser.geometry import Point
+from apparser.geometry import QuadPoints
@dataclass(frozen=True)
@@ -8,4 +8,4 @@ class TextData:
"""Store detected text together with its polygon coordinates."""
text: str
- coordinates: list[Point]
+ coordinates: QuadPoints
\ No newline at end of file
diff --git a/apparser/text_readers/readers/__init__.py b/apparser/text_readers/readers/__init__.py
new file mode 100644
index 0000000..1f7e74c
--- /dev/null
+++ b/apparser/text_readers/readers/__init__.py
@@ -0,0 +1,15 @@
+from apparser.text_readers.readers.base import BaseTextReader
+from apparser.text_readers.readers.easy_ocr import EasyOcrReader
+from apparser.text_readers.readers.paddle import PaddleTextReader
+from apparser.text_readers.readers.screens_controller import ScreensController
+from apparser.text_readers.readers.white_black_reader import WhiteBlackReader
+from apparser.text_readers.readers.rapid_ocr import RapidOcrReader
+from apparser.text_readers.readers.compound import CompoundReader
+
+__all__ = ["EasyOcrReader",
+ "ScreensController",
+ "BaseTextReader",
+ "WhiteBlackReader",
+ "PaddleTextReader",
+ "CompoundReader",
+ "RapidOcrReader"]
diff --git a/apparser/text_readers/base.py b/apparser/text_readers/readers/base.py
similarity index 100%
rename from apparser/text_readers/base.py
rename to apparser/text_readers/readers/base.py
diff --git a/apparser/text_readers/readers/compound.py b/apparser/text_readers/readers/compound.py
new file mode 100644
index 0000000..9efdded
--- /dev/null
+++ b/apparser/text_readers/readers/compound.py
@@ -0,0 +1,68 @@
+from apparser.geometry import QuadPoints, distance, Point
+
+from apparser.text_readers.readers.base import BaseTextReader
+
+from apparser.text_readers.models import TextData
+from apparser.text_readers.detectors import BaseTextDetector
+from apparser.text_readers.scanners import BaseTextScanner
+
+
+import numpy
+from PIL import Image
+
+
+def _cut_by_coordinates(image: numpy.ndarray, coordinates: QuadPoints) -> numpy.ndarray:
+ pil_image = Image.fromarray(image)
+ if pil_image.mode not in ("RGB", "RGBA", "L"):
+ pil_image = pil_image.convert("RGB")
+ left_top = Point(coordinates.left_top.x, coordinates.left_top.y)
+ right_top = Point(coordinates.right_top.x, coordinates.right_top.y)
+ right_bottom = Point(coordinates.right_bottom.x, coordinates.right_bottom.y)
+ left_bottom = Point(coordinates.left_bottom.x, coordinates.left_bottom.y)
+ out_width = int(max(distance(left_top, right_top), distance(left_bottom, right_bottom)))
+ out_height = int(max(distance(left_top, left_bottom), distance(right_top, right_bottom)))
+ out_size = (max(out_width, 1), max(out_height, 1))
+ quad_data = (left_top.x, left_top.y,
+ right_top.x, right_top.y,
+ right_bottom.x, right_bottom.y,
+ left_bottom.x, left_bottom.y)
+ transformed = pil_image.transform(out_size, Image.QUAD, quad_data,
+ resample=Image.BICUBIC)
+ return numpy.array(transformed)
+
+
+class CompoundReader(BaseTextReader):
+ """Detect text regions and scan each detected image fragment."""
+
+ def __init__(self, detector: BaseTextDetector, scanner: BaseTextScanner ):
+ """Initialize a compound text reader.
+
+ :param detector: Detector used to find text regions in an image.
+ :type detector: BaseTextDetector
+ :param scanner: Scanner used to read text from detected image fragments.
+ :type scanner: BaseTextScanner
+ :raises TypeError: If any argument has an invalid type.
+ """
+ if not isinstance(detector, BaseTextDetector):
+ raise TypeError('detector must be an instance of BaseTextDetector')
+
+ if not isinstance(scanner, BaseTextScanner):
+ raise TypeError('scanner must be an instance of BaseTextScanner')
+
+ self.__scanner = scanner
+ self.__detector = detector
+
+ def read_image(self, image: numpy.ndarray) -> list[TextData]:
+ """Read text data.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :return: Detected text data.
+ :rtype: list[TextData]
+ """
+ result = []
+ for coordinates in self.__detector.read_image(image):
+ cuted_image = _cut_by_coordinates(image, coordinates)
+ text = self.__scanner.read_image(cuted_image)
+ result.append(TextData(text=text, coordinates=coordinates))
+ return result
diff --git a/apparser/text_readers/easy_ocr.py b/apparser/text_readers/readers/easy_ocr.py
similarity index 87%
rename from apparser/text_readers/easy_ocr.py
rename to apparser/text_readers/readers/easy_ocr.py
index 9eaa89a..0f8633f 100644
--- a/apparser/text_readers/easy_ocr.py
+++ b/apparser/text_readers/readers/easy_ocr.py
@@ -1,10 +1,10 @@
import importlib
import numpy
-from apparser.text_readers.base import BaseTextReader
+from apparser.text_readers.readers.base import BaseTextReader
from apparser.text_readers.models.text_data import TextData
-from apparser.geometry import Point
+from apparser.geometry import Point, QuadPoints
class EasyOcrReader(BaseTextReader):
@@ -36,7 +36,7 @@ def read_image(self, image: numpy.ndarray, **settings) -> list[TextData]:
returned = []
predicted = self.__reader.readtext(image, **settings)
for i in predicted:
- points = [Point(int(j[0]), int(j[1])) for j in i[0]]
+ points = QuadPoints(*[Point(int(j[0]), int(j[1])) for j in i[0]])
text_data = TextData(i[1], points)
returned.append(text_data)
return returned
diff --git a/apparser/text_readers/paddle.py b/apparser/text_readers/readers/paddle.py
similarity index 75%
rename from apparser/text_readers/paddle.py
rename to apparser/text_readers/readers/paddle.py
index a87eb23..566b49b 100644
--- a/apparser/text_readers/paddle.py
+++ b/apparser/text_readers/readers/paddle.py
@@ -2,8 +2,8 @@
from typing import Any
import numpy
-from apparser.geometry import Point
-from apparser.text_readers.base import BaseTextReader
+from apparser.geometry import Point, QuadPoints
+from apparser.text_readers.readers.base import BaseTextReader
from apparser.text_readers.models.text_data import TextData
@@ -12,16 +12,16 @@ def _build_box_points(
top: int,
right: int,
bottom: int,
-) -> list[Point]:
- return [
+) -> QuadPoints:
+ return QuadPoints(
Point(left, top),
Point(right, top),
Point(right, bottom),
Point(left, bottom),
- ]
+ )
-def _parse_points_geometry(geometry: Any) -> list[Point]:
+def _parse_points_geometry(geometry: Any) -> QuadPoints | None:
array = numpy.asarray(geometry)
if array.ndim == 1 and array.size == 4:
@@ -41,7 +41,7 @@ def _parse_points_geometry(geometry: Any) -> list[Point]:
y_coordinates = array[..., 1].reshape(-1)
if len(x_coordinates) == 0 or len(y_coordinates) == 0:
- return []
+ return None
return _build_box_points(
int(x_coordinates.min()),
@@ -50,7 +50,7 @@ def _parse_points_geometry(geometry: Any) -> list[Point]:
int(y_coordinates.max()),
)
- return []
+ return None
def _parse_predict_result(predicted: list[Any]) -> list[TextData]:
@@ -77,7 +77,7 @@ def _parse_predict_result(predicted: list[Any]) -> list[TextData]:
for index in range(min(len(texts), len(geometries))):
points = _parse_points_geometry(geometries[index])
- if len(points) < 4:
+ if points is None:
continue
returned.append(TextData(texts[index], points))
@@ -99,12 +99,23 @@ def _build_default_settings(
class PaddleTextReader(BaseTextReader):
+ """Read text from images by using PaddleOCR."""
+
def __init__(
self,
lang: str = "en",
enable_mkldnn: bool = False,
**settings: Any,
) -> None:
+ """Initialize a PaddleOCR-backed text reader.
+
+ :param lang: Language passed to PaddleOCR.
+ :type lang: str
+ :param enable_mkldnn: Whether MKL-DNN acceleration should be enabled.
+ :type enable_mkldnn: bool
+ :param settings: Additional PaddleOCR reader settings.
+ :type settings: dict[str, object]
+ """
self.__lang = lang
self.__enable_mkldnn = enable_mkldnn
self.__settings = _build_default_settings(
@@ -118,6 +129,15 @@ def read_image(
image: numpy.ndarray,
**settings: Any,
) -> list[TextData]:
+ """Read text data from an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :param settings: Additional PaddleOCR predict settings.
+ :type settings: dict[str, object]
+ :return: Detected text data.
+ :rtype: list[TextData]
+ """
predicted = self.__reader.predict(image, **settings)
return _parse_predict_result(predicted)
diff --git a/apparser/text_readers/readers/rapid_ocr.py b/apparser/text_readers/readers/rapid_ocr.py
new file mode 100644
index 0000000..c91ee4f
--- /dev/null
+++ b/apparser/text_readers/readers/rapid_ocr.py
@@ -0,0 +1,110 @@
+import importlib
+from typing import Any
+
+import numpy
+
+from apparser.geometry import Point, QuadPoints
+from apparser.text_readers.models import TextData
+from apparser.text_readers.readers.base import BaseTextReader
+
+
+def _build_box_points(geometry: Any) -> QuadPoints | None:
+ array = numpy.asarray(geometry)
+ if array.ndim == 1 and array.size == 4:
+ left, top, right, bottom = array[:4]
+ return QuadPoints(
+ Point(int(left), int(top)),
+ Point(int(right), int(top)),
+ Point(int(right), int(bottom)),
+ Point(int(left), int(bottom)),
+ )
+ if array.ndim == 1 and array.size >= 8 and array.size % 2 == 0:
+ array = array.reshape(-1, 2)
+ if array.ndim != 2 or len(array) < 4 or array.shape[-1] < 2:
+ return None
+ return QuadPoints(
+ *[Point(int(coordinates[0]), int(coordinates[1]))
+ for coordinates in array[:4]]
+ )
+
+
+def _parse_output_object(predicted: Any) -> list[TextData] | None:
+ if not hasattr(predicted, "boxes") and not hasattr(predicted, "txts"):
+ return None
+ boxes = getattr(predicted, "boxes", None)
+ texts = getattr(predicted, "txts", None)
+ if boxes is None or texts is None:
+ return []
+ return _parse_boxes_and_texts(boxes, texts)
+
+
+def _parse_result_item(item: Any) -> TextData | None:
+ if not isinstance(item, (list, tuple)) or len(item) < 2:
+ return None
+ points = _build_box_points(item[0])
+ if points is None:
+ return None
+ return TextData(str(item[1]), points)
+
+
+def _parse_result_list(predicted: Any) -> list[TextData]:
+ if predicted is None:
+ return []
+ returned: list[TextData] = []
+ try:
+ iterator = iter(predicted)
+ except TypeError:
+ return []
+ for item in iterator:
+ text_data = _parse_result_item(item)
+ if text_data is not None:
+ returned.append(text_data)
+ return returned
+
+
+def _parse_boxes_and_texts(boxes: Any, texts: Any) -> list[TextData]:
+ returned: list[TextData] = []
+ for box, text in zip(boxes, texts):
+ points = _build_box_points(box)
+ if points is not None:
+ returned.append(TextData(str(text), points))
+ return returned
+
+
+def _parse_predict_result(predicted: Any) -> list[TextData]:
+ parsed = _parse_output_object(predicted)
+ if parsed is not None:
+ return parsed
+ if isinstance(predicted, tuple) and len(predicted) == 2:
+ predicted = predicted[0]
+ return _parse_result_list(predicted)
+
+
+class RapidOcrReader(BaseTextReader):
+ """Read text from images by using RapidOCR."""
+
+ def __init__(self, **settings: Any) -> None:
+ """Initialize a RapidOCR-backed text reader.
+
+ :param settings: Additional RapidOCR reader settings.
+ :type settings: dict[str, object]
+ """
+ rapidocr = importlib.import_module("rapidocr")
+ self.__reader = rapidocr.RapidOCR(**settings)
+
+ def read_image(
+ self,
+ image: numpy.ndarray,
+ **settings: Any,
+ ) -> list[TextData]:
+ """Read text data from an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :param settings: Additional RapidOCR read settings.
+ :type settings: dict[str, object]
+ :return: Detected text data.
+ :rtype: list[TextData]
+ """
+ predicted = self.__reader(image, **settings)
+ return _parse_predict_result(predicted)
diff --git a/apparser/text_readers/screens_controller.py b/apparser/text_readers/readers/screens_controller.py
similarity index 95%
rename from apparser/text_readers/screens_controller.py
rename to apparser/text_readers/readers/screens_controller.py
index 1262355..e8fe85f 100644
--- a/apparser/text_readers/screens_controller.py
+++ b/apparser/text_readers/readers/screens_controller.py
@@ -1,6 +1,6 @@
import numpy
-from apparser.text_readers.base import BaseTextReader
+from apparser.text_readers.readers.base import BaseTextReader
from apparser.text_readers.models.text_data import TextData
diff --git a/apparser/text_readers/white_black_reader.py b/apparser/text_readers/readers/white_black_reader.py
similarity index 93%
rename from apparser/text_readers/white_black_reader.py
rename to apparser/text_readers/readers/white_black_reader.py
index a16984a..05a9394 100644
--- a/apparser/text_readers/white_black_reader.py
+++ b/apparser/text_readers/readers/white_black_reader.py
@@ -1,6 +1,6 @@
import numpy
-from apparser.text_readers.base import BaseTextReader
+from apparser.text_readers.readers.base import BaseTextReader
from apparser.text_readers.models.text_data import TextData
from PIL import Image
diff --git a/apparser/text_readers/scanners/__init__.py b/apparser/text_readers/scanners/__init__.py
new file mode 100644
index 0000000..ce74c4f
--- /dev/null
+++ b/apparser/text_readers/scanners/__init__.py
@@ -0,0 +1,4 @@
+from apparser.text_readers.scanners.base import BaseTextScanner
+from apparser.text_readers.scanners.trocr import TrocrScanner
+
+__all__ = ["BaseTextScanner", "TrocrScanner"]
diff --git a/apparser/text_readers/scanners/base.py b/apparser/text_readers/scanners/base.py
new file mode 100644
index 0000000..d48451a
--- /dev/null
+++ b/apparser/text_readers/scanners/base.py
@@ -0,0 +1,18 @@
+import abc
+
+import numpy
+
+
+class BaseTextScanner(abc.ABC):
+ """Define the common interface for text scanner backends."""
+
+ @abc.abstractmethod
+ def read_image(self, image: numpy.ndarray) -> str:
+ """Read text from an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :return: Detected text.
+ :rtype: str
+ """
+ pass
diff --git a/apparser/text_readers/scanners/trocr.py b/apparser/text_readers/scanners/trocr.py
new file mode 100644
index 0000000..fadd290
--- /dev/null
+++ b/apparser/text_readers/scanners/trocr.py
@@ -0,0 +1,59 @@
+import importlib
+import numpy
+from PIL import Image
+
+from apparser.text_readers.scanners.base import BaseTextScanner
+
+
+class TrocrScanner(BaseTextScanner):
+ """Read text from images by using TrOCR."""
+
+ def __init__(self, model_name="microsoft/trocr-base-printed",
+ processor_name=None, device=None):
+ """Initialize a TrOCR-backed text scanner.
+
+ :param model_name: Vision encoder-decoder model name.
+ :type model_name: str
+ :param processor_name: Processor model name. If None, use ``model_name``.
+ :type processor_name: str | None
+ :param device: Device used for inference. If None, choose CUDA when available.
+ :type device: str | None
+ """
+ transformers = importlib.import_module("transformers")
+ torch = importlib.import_module("torch")
+
+ self.__pil_image_module = Image
+
+ if processor_name is None:
+ processor_name = model_name
+
+ self.__processor = transformers.TrOCRProcessor.from_pretrained(
+ processor_name
+ )
+ self.__model = transformers.VisionEncoderDecoderModel.from_pretrained(
+ model_name
+ )
+
+ if device is None:
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.__device = device
+ self.__model.to(self.__device)
+
+ def read_image(self, image: numpy.ndarray) -> str:
+ """Read text from an image.
+
+ :param image: Image data to process.
+ :type image: numpy.ndarray
+ :return: Detected text.
+ :rtype: str
+ """
+ pil_image = self.__pil_image_module.fromarray(image).convert("RGB")
+ pixel_values = self.__processor(
+ images=pil_image, return_tensors="pt"
+ ).pixel_values
+ pixel_values = pixel_values.to(self.__device)
+ generated_ids = self.__model.generate(pixel_values)
+ generated_text = self.__processor.batch_decode(
+ generated_ids, skip_special_tokens=True
+ )[0]
+ return generated_text
diff --git a/docs/api/geometry/index.rst b/docs/api/geometry/index.rst
index eeb5a34..f4818aa 100644
--- a/docs/api/geometry/index.rst
+++ b/docs/api/geometry/index.rst
@@ -7,5 +7,6 @@ geometry
Point
Size
+ QuadPoints
RelativelyPoint
distance
diff --git a/docs/api/text_readers/CompoundReader.rst b/docs/api/text_readers/CompoundReader.rst
new file mode 100644
index 0000000..8b53fdf
--- /dev/null
+++ b/docs/api/text_readers/CompoundReader.rst
@@ -0,0 +1,10 @@
+CompoundReader
+================
+
+.. currentmodule:: apparser.text_readers
+
+.. autoclass:: CompoundReader
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/RapidOcrReader.rst b/docs/api/text_readers/RapidOcrReader.rst
new file mode 100644
index 0000000..ded60ee
--- /dev/null
+++ b/docs/api/text_readers/RapidOcrReader.rst
@@ -0,0 +1,10 @@
+RapidOcrReader
+================
+
+.. currentmodule:: apparser.text_readers
+
+.. autoclass:: RapidOcrReader
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/detectors/BaseTextDetector.rst b/docs/api/text_readers/detectors/BaseTextDetector.rst
new file mode 100644
index 0000000..226c37e
--- /dev/null
+++ b/docs/api/text_readers/detectors/BaseTextDetector.rst
@@ -0,0 +1,10 @@
+BaseTextDetector
+====================
+
+.. currentmodule:: apparser.text_readers.detectors
+
+.. autoclass:: BaseTextDetector
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/detectors/EasyOcrDetector.rst b/docs/api/text_readers/detectors/EasyOcrDetector.rst
new file mode 100644
index 0000000..02b442c
--- /dev/null
+++ b/docs/api/text_readers/detectors/EasyOcrDetector.rst
@@ -0,0 +1,10 @@
+EasyOcrDetector
+=====================
+
+.. currentmodule:: apparser.text_readers.detectors
+
+.. autoclass:: EasyOcrDetector
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/detectors/index.rst b/docs/api/text_readers/detectors/index.rst
new file mode 100644
index 0000000..ba34567
--- /dev/null
+++ b/docs/api/text_readers/detectors/index.rst
@@ -0,0 +1,10 @@
+detectors
+=====================
+
+.. toctree::
+ :maxdepth: 1
+ :titlesonly:
+
+ BaseTextDetector
+ EasyOcrDetector
+
diff --git a/docs/api/text_readers/index.rst b/docs/api/text_readers/index.rst
index 187174e..fedce76 100644
--- a/docs/api/text_readers/index.rst
+++ b/docs/api/text_readers/index.rst
@@ -1,13 +1,29 @@
text_readers
=====================
+Modules
+--------
+
+.. toctree::
+ :maxdepth: 2
+ :titlesonly:
+
+ detectors/index
+ scanners/index
+
+Classes
+--------------
+
.. toctree::
:maxdepth: 1
:titlesonly:
BaseTextReader
EasyOcrReader
+ RapidOcrReader
PaddleTextReader
ScreensController
WhiteBlackReader
+ CompoundReader
TextData
+
diff --git a/docs/api/text_readers/scanners/BaseTextScanner.rst b/docs/api/text_readers/scanners/BaseTextScanner.rst
new file mode 100644
index 0000000..8f2f984
--- /dev/null
+++ b/docs/api/text_readers/scanners/BaseTextScanner.rst
@@ -0,0 +1,10 @@
+BaseTextScanner
+===================
+
+.. currentmodule:: apparser.text_readers.scanners
+
+.. autoclass:: BaseTextScanner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/scanners/TrocrScanner.rst b/docs/api/text_readers/scanners/TrocrScanner.rst
new file mode 100644
index 0000000..cd0baf5
--- /dev/null
+++ b/docs/api/text_readers/scanners/TrocrScanner.rst
@@ -0,0 +1,10 @@
+TrocrScanner
+==============
+
+.. currentmodule:: apparser.text_readers.scanners
+
+.. autoclass:: TrocrScanner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :member-order: bysource
diff --git a/docs/api/text_readers/scanners/index.rst b/docs/api/text_readers/scanners/index.rst
new file mode 100644
index 0000000..4558f28
--- /dev/null
+++ b/docs/api/text_readers/scanners/index.rst
@@ -0,0 +1,10 @@
+scanners
+=====================
+
+.. toctree::
+ :maxdepth: 1
+ :titlesonly:
+
+ BaseTextScanner
+ TrocrScanner
+
diff --git a/docs/examples/ocr.rst b/docs/examples/ocr.rst
index 1d4d71b..da23d16 100644
--- a/docs/examples/ocr.rst
+++ b/docs/examples/ocr.rst
@@ -46,13 +46,17 @@ Code
app = App("Notepad", window_title="Notepad")
+ configure_algorithm.perform(app.ui)
+
ui = WindowByDisplayUi(app.ui.window)
- while True:
+ try:
hello_world_algorithm.perform(ui)
new_tab_algorithm.perform(ui)
+ finally:
+ app.stop_app()
Video
--------
-.. image:: ../_static/ocr.gif
\ No newline at end of file
+.. image:: ../_static/ocr.gif
diff --git a/docs/index.rst b/docs/index.rst
index 16ea236..c6d746e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,16 +7,22 @@ Apparser
.. raw:: html
- Donation link +
-Contribution +Contributing =============== -1. If something doesn't work - open issue. -2. If you want something fixed - open issue. -3. If you can help with the library - email. +1. If something doesn't work, open an issue. +2. If you want something fixed, open an issue. +3. If you can help with the library, email us. apparser.development@gmail.com -Any help in development is welcome!) +Contributions are welcome! diff --git a/docs/info/about.rst b/docs/info/about.rst index 436b0d1..2826f6e 100644 --- a/docs/info/about.rst +++ b/docs/info/about.rst @@ -7,16 +7,22 @@ Apparser .. raw:: html -Apparser is a Python library designed for automating desktop applications and managing UI interfaces using artificial intelligence, such as OCR or object detection models. +Apparser is a Python library for automating desktop applications and interacting with UIs using AI-powered tools such as OCR and object detection models. -Link to `PyPi- Donation link +
-Contribution +Contributing -------------- -1. If something doesn't work - open issue. -2. If you want something fixed - open issue. -3. If you can help with the library - email. +1. If something doesn't work, open an issue. +2. If you want something fixed, open an issue. +3. If you can help with the library, email us. apparser.development@gmail.com -Any help in development is welcome!) +Contributions are welcome! diff --git a/docs/info/instructions_ids.rst b/docs/info/instructions_ids.rst index d06207e..bec6635 100644 --- a/docs/info/instructions_ids.rst +++ b/docs/info/instructions_ids.rst @@ -81,6 +81,18 @@ Instructions available through get_instruction_by_id() * - ``9`` - ``Sleep`` - Pause execution for a fixed amount of time + * - ``10`` + - ``PressKeyDown`` + - Press a single key down + * - ``11`` + - ``PressKeyUp`` + - Release a single key + * - ``12`` + - ``MouseUp`` + - Release a mouse button + * - ``13`` + - ``MouseDown`` + - Press a mouse button down * - ``1000`` - ``WindowToForeground`` - Bring the window to the foreground @@ -114,6 +126,9 @@ Instructions available through get_instruction_by_id() * - ``2004`` - ``PlotAllText`` - Draw detected text on top of a screenshot + * - ``2005`` + - ``WaitText`` + - Wait until matching text appears * - ``3000`` - ``PlayTextAudio`` - Synthesize text and play it as regular audio @@ -148,4 +163,4 @@ Classes with their own id but not resolved by get_instruction_by_id() - Execute instructions with a provided ``speaker`` * - ``1505`` - ``UniqueAlgorithm`` - - Inject dependencies into instructions by argument type \ No newline at end of file + - Inject dependencies into instructions by argument type diff --git a/example.gif b/example.gif new file mode 100644 index 0000000..fd9ec70 Binary files /dev/null and b/example.gif differ diff --git a/pyproject.toml b/pyproject.toml index 80b3efc..a0eb667 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "thefuzz >= 0.20.0" ] name = "apparser" -version = "1.0.0" +version = "1.1.0" authors = [ { name = "Terochkin A.S", email = "apparser.development@gmail.com" }, ] @@ -59,7 +59,9 @@ cv = [ ocr = [ "easyocr >= 1.7.2, < 2.0", "paddleocr >= 3.5.0", - "paddlepaddle >= 3.3.1" + "paddlepaddle >= 3.3.1", + "rapidocr>=3.0.0", + "onnxruntime>=1.20.1" ] speak = [ diff --git a/tests/apparser/core/test_app.py b/tests/apparser/core/test_app.py index c18dc7e..4f51e6a 100644 --- a/tests/apparser/core/test_app.py +++ b/tests/apparser/core/test_app.py @@ -10,12 +10,16 @@ class FakeProcess: - def __init__(self) -> None: + def __init__(self, pid = 0) -> None: self.kill_calls = 0 + self.__pid = pid def kill(self) -> None: self.kill_calls += 1 + @property + def pid(self) -> int: + return self.__pid class FakeWindowUi: def __init__(self, window: FakeWindow) -> None: diff --git a/tests/apparser/cv/utils/test_changes_checker.py b/tests/apparser/cv/utils/test_changes_checker.py index f205d25..84aa970 100644 --- a/tests/apparser/cv/utils/test_changes_checker.py +++ b/tests/apparser/cv/utils/test_changes_checker.py @@ -21,7 +21,8 @@ def test_is_resized_requires_both_dimensions_to_change() -> None: only_width = CvBox("button", 1, 1, 2, 5, 4, ui) assert _is_resized(changed, first) is True - assert _is_resized(only_width, first) is False + assert _is_resized(only_width, first) is True + assert _is_resized(first, first) is False def test_changes_checker_reports_detected_moved_resized_and_undetected() -> None: diff --git a/tests/apparser/instructions/default/test_press.py b/tests/apparser/instructions/default/test_press.py index 81d3425..f9b496a 100644 --- a/tests/apparser/instructions/default/test_press.py +++ b/tests/apparser/instructions/default/test_press.py @@ -33,9 +33,8 @@ def test_press_keys_combination_presses_and_releases_keys() -> None: def test_press_keys_combination_rejects_invalid_key_on_perform() -> None: - instruction = PressKeysCombination([object()]) with pytest.raises(TypeError): - instruction.perform() + PressKeysCombination([object()]) def test_press_key_down_sends_key_down() -> None: diff --git a/tests/apparser/instructions/ocr/test_move_to_text.py b/tests/apparser/instructions/ocr/test_move_to_text.py index 87b66e2..3032511 100644 --- a/tests/apparser/instructions/ocr/test_move_to_text.py +++ b/tests/apparser/instructions/ocr/test_move_to_text.py @@ -1,7 +1,7 @@ from __future__ import annotations import pytest -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.exceptions import TextNotFoundException from apparser.instructions.ocr.move_to_text import MoveToText @@ -50,7 +50,7 @@ def test_move_to_text_rejects_low_similarity(monkeypatch: pytest.MonkeyPatch) -> def test_move_to_text_moves_to_text_center(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__local_answer = [ - TextData("hello", [Point(0, 0), Point(4, 0), Point(4, 4), Point(0, 4)]) + TextData("hello", QuadPoints(Point(0, 0), Point(4, 0), Point(4, 4), Point(0, 4))) ] monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) moved_to: list[Point] = [] diff --git a/tests/apparser/instructions/ocr/test_plot_text.py b/tests/apparser/instructions/ocr/test_plot_text.py index e440f4a..9cea6b6 100644 --- a/tests/apparser/instructions/ocr/test_plot_text.py +++ b/tests/apparser/instructions/ocr/test_plot_text.py @@ -3,7 +3,7 @@ from unittest.mock import Mock import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.instructions.ocr.plot_text import PlotAllText, _Painter from apparser.instructions.ocr.text_getter import GetText @@ -14,7 +14,7 @@ def test_painter_draws_rectangle_and_text() -> None: draw = Mock() painter = _Painter(draw, (255, 255, 255, 255)) - data = TextData("hello", [Point(1, 2), Point(3, 2), Point(3, 4), Point(1, 4)]) + data = TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) painter.draw([data]) @@ -25,7 +25,7 @@ def test_painter_draws_rectangle_and_text() -> None: def test_plot_all_text_draws_and_shows_image(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__global_answer = [ - TextData("hello", [Point(1, 2), Point(3, 2), Point(3, 4), Point(1, 4)]) + TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) ] getter._GetText__screenshot = numpy.zeros((4, 4, 3), dtype=numpy.uint8) monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) diff --git a/tests/apparser/instructions/ocr/test_print_all_text.py b/tests/apparser/instructions/ocr/test_print_all_text.py index 06ecae1..cf9ec05 100644 --- a/tests/apparser/instructions/ocr/test_print_all_text.py +++ b/tests/apparser/instructions/ocr/test_print_all_text.py @@ -1,6 +1,6 @@ from __future__ import annotations -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.instructions.ocr.print_all_text import PrintAllText from apparser.instructions.ocr.text_getter import GetText @@ -11,7 +11,7 @@ def test_print_all_text_prints_each_entry(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__local_answer = [ - TextData("hello", [Point(1, 2), Point(3, 4)]), + TextData("hello", QuadPoints(Point(1, 2), Point(3, 4), Point(3, 4), Point(3, 4))), ] printed: list[str] = [] monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) @@ -20,5 +20,5 @@ def test_print_all_text_prints_each_entry(monkeypatch: pytest.MonkeyPatch) -> No instruction.perform(FakeUi(), FakeTextReader()) - assert printed == ['text: "hello", coordinates: Point(x = 1, y = 2) Point(x = 3, y = 4) '] + assert printed == ['text: "hello", coordinates: QuadPoints(left_top = Point(x = 1, y = 2), right_top = Point(x = 3, y = 4), right_bottom = Point(x = 3, y = 4), left_bottom = Point(x = 3, y = 4))'] assert instruction.id == 2003 diff --git a/tests/apparser/instructions/ocr/test_text_getter.py b/tests/apparser/instructions/ocr/test_text_getter.py index a18e96a..d16ba0c 100644 --- a/tests/apparser/instructions/ocr/test_text_getter.py +++ b/tests/apparser/instructions/ocr/test_text_getter.py @@ -1,7 +1,7 @@ from __future__ import annotations import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.geometry import RelativelyPoint from apparser.instructions.ocr.text_getter import GetText @@ -15,7 +15,7 @@ def test_text_getter_reads_and_converts_coordinates() -> None: result=[ TextData( "hello", - [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)], + QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) ] ) @@ -25,14 +25,14 @@ def test_text_getter_reads_and_converts_coordinates() -> None: instruction.perform(ui, reader) assert reader.images[0].shape == (2, 3, 3) - assert instruction.global_answer[0].coordinates[0] == Point(0, 0) - assert instruction.local_answer[0].coordinates[0] == Point(1, 1) + assert instruction.global_answer[0].coordinates.left_top == Point(0, 0) + assert instruction.local_answer[0].coordinates.left_top == Point(1, 1) assert instruction.screenshot.shape == (2, 3, 3) def test_text_getter_respects_cached_result() -> None: reader = FakeTextReader( - result=[TextData("hello", [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)])] + result=[TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), )] ) instruction = GetText( RelativelyPoint(0, 0), diff --git a/tests/apparser/instructions/ocr/test_wait_text.py b/tests/apparser/instructions/ocr/test_wait_text.py index 0b5269c..f0081df 100644 --- a/tests/apparser/instructions/ocr/test_wait_text.py +++ b/tests/apparser/instructions/ocr/test_wait_text.py @@ -1,7 +1,7 @@ from __future__ import annotations import pytest -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.exceptions import TimeoutException from apparser.instructions.ocr.text_getter import GetText @@ -15,7 +15,7 @@ def test_wait_text_returns_when_text_is_found() -> None: result=[ TextData( "hello", - [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)], + QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ), ], ) diff --git a/tests/apparser/text_readers/detectors/__init__.py b/tests/apparser/text_readers/detectors/__init__.py new file mode 100644 index 0000000..9d48db4 --- /dev/null +++ b/tests/apparser/text_readers/detectors/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/tests/apparser/text_readers/detectors/test_base.py b/tests/apparser/text_readers/detectors/test_base.py new file mode 100644 index 0000000..c7558ea --- /dev/null +++ b/tests/apparser/text_readers/detectors/test_base.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import pytest + +from apparser.text_readers.detectors import BaseTextDetector + + +def test_base_text_detector_is_abstract() -> None: + with pytest.raises(TypeError): + BaseTextDetector() diff --git a/tests/apparser/text_readers/detectors/test_easy_ocr.py b/tests/apparser/text_readers/detectors/test_easy_ocr.py new file mode 100644 index 0000000..f2cf5ed --- /dev/null +++ b/tests/apparser/text_readers/detectors/test_easy_ocr.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import numpy +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers.detectors import EasyOcrDetector +from tests.utils import easyocr_stub + + +def test_easy_ocr_detector_uses_default_language() -> None: + EasyOcrDetector() + + instance = easyocr_stub.Reader.instances[0] + assert instance.lang_list == ["en"] + assert instance.settings == { + "detector": True, + "recognizer": False, + } + + +def test_easy_ocr_detector_maps_detected_boxes() -> None: + detector = EasyOcrDetector(["ru"], gpu=False) + instance = easyocr_stub.Reader.instances[0] + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + instance.detected = ( + [ + [ + [1.1, 5.8, 2.2, 4.9], + ], + ], + [ + [ + [ + [6.1, 7.2], + [8.3, 9.4], + [10.5, 11.6], + [12.7, 13.8], + ], + ], + ], + ) + + result = detector.read_image(image, slope_ths=0.1) + + first_points = QuadPoints( + Point(1, 2), + Point(5, 2), + Point(5, 4), + Point(1, 4), + ) + second_points = QuadPoints( + Point(6, 7), + Point(8, 9), + Point(10, 11), + Point(12, 13), + ) + assert instance.settings == { + "detector": True, + "recognizer": False, + "gpu": False, + } + assert instance.detect_calls[0]["image"] is image + assert instance.detect_calls[0]["settings"] == {"slope_ths": 0.1} + assert result[0].left_top == first_points.left_top + assert result[0].right_top == first_points.right_top + assert result[0].right_bottom == first_points.right_bottom + assert result[0].left_bottom == first_points.left_bottom + assert result[1].left_top == second_points.left_top + assert result[1].right_top == second_points.right_top + assert result[1].right_bottom == second_points.right_bottom + assert result[1].left_bottom == second_points.left_bottom diff --git a/tests/apparser/text_readers/scanners/__init__.py b/tests/apparser/text_readers/scanners/__init__.py new file mode 100644 index 0000000..9d48db4 --- /dev/null +++ b/tests/apparser/text_readers/scanners/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/tests/apparser/text_readers/scanners/test_base.py b/tests/apparser/text_readers/scanners/test_base.py new file mode 100644 index 0000000..724ec1f --- /dev/null +++ b/tests/apparser/text_readers/scanners/test_base.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import pytest + +from apparser.text_readers.scanners import BaseTextScanner + + +def test_base_text_scanner_is_abstract() -> None: + with pytest.raises(TypeError): + BaseTextScanner() diff --git a/tests/apparser/text_readers/scanners/test_trocr.py b/tests/apparser/text_readers/scanners/test_trocr.py new file mode 100644 index 0000000..8583ca5 --- /dev/null +++ b/tests/apparser/text_readers/scanners/test_trocr.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import sys +from types import ModuleType +from typing import Any + +import numpy +import pytest + +from apparser.text_readers.scanners import TrocrScanner + + +class PixelValuesStub: + def __init__(self) -> None: + self.device: str | None = None + + def to(self, device: str) -> "PixelValuesStub": + self.device = device + return self + + +class ProcessorResultStub: + def __init__(self, pixel_values: PixelValuesStub) -> None: + self.pixel_values = pixel_values + + +class ProcessorStub: + instances: list["ProcessorStub"] = [] + + def __init__(self, name: str) -> None: + self.name = name + self.pixel_values = PixelValuesStub() + self.calls: list[dict[str, Any]] = [] + self.decode_calls: list[dict[str, Any]] = [] + self.__class__.instances.append(self) + + @classmethod + def from_pretrained(cls, name: str) -> "ProcessorStub": + return cls(name) + + def __call__( + self, + images: Any, + return_tensors: str, + ) -> ProcessorResultStub: + self.calls.append( + { + "images": images, + "return_tensors": return_tensors, + } + ) + return ProcessorResultStub(self.pixel_values) + + def batch_decode( + self, + generated_ids: list[int], + skip_special_tokens: bool, + ) -> list[str]: + self.decode_calls.append( + { + "generated_ids": generated_ids, + "skip_special_tokens": skip_special_tokens, + } + ) + return ["recognized text"] + + +class ModelStub: + instances: list["ModelStub"] = [] + + def __init__(self, name: str) -> None: + self.name = name + self.device: str | None = None + self.generate_calls: list[PixelValuesStub] = [] + self.__class__.instances.append(self) + + @classmethod + def from_pretrained(cls, name: str) -> "ModelStub": + return cls(name) + + def to(self, device: str) -> None: + self.device = device + + def generate(self, pixel_values: PixelValuesStub) -> list[int]: + self.generate_calls.append(pixel_values) + return [1, 2, 3] + + +class TransformersStub(ModuleType): + def __init__(self) -> None: + super().__init__("transformers") + ProcessorStub.instances = [] + ModelStub.instances = [] + self.TrOCRProcessor = ProcessorStub + self.VisionEncoderDecoderModel = ModelStub + + +class CudaStub: + def is_available(self) -> bool: + return True + + +class TorchModuleStub(ModuleType): + def __init__(self) -> None: + super().__init__("torch") + self.cuda = CudaStub() + + +def test_trocr_scanner_reads_text(monkeypatch: pytest.MonkeyPatch) -> None: + transformers_stub = TransformersStub() + torch_stub = TorchModuleStub() + monkeypatch.setitem(sys.modules, "transformers", transformers_stub) + monkeypatch.setitem(sys.modules, "torch", torch_stub) + scanner = TrocrScanner( + model_name="model", + processor_name="processor", + ) + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + + result = scanner.read_image(image) + + processor = ProcessorStub.instances[0] + model = ModelStub.instances[0] + assert processor.name == "processor" + assert model.name == "model" + assert model.device == "cuda" + assert processor.pixel_values.device == "cuda" + assert model.generate_calls == [processor.pixel_values] + assert processor.decode_calls == [ + { + "generated_ids": [1, 2, 3], + "skip_special_tokens": True, + } + ] + assert result == "recognized text" diff --git a/tests/apparser/text_readers/test_base.py b/tests/apparser/text_readers/test_base.py index e2c87b6..14343aa 100644 --- a/tests/apparser/text_readers/test_base.py +++ b/tests/apparser/text_readers/test_base.py @@ -2,7 +2,7 @@ import pytest -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader def test_base_text_reader_is_abstract() -> None: diff --git a/tests/apparser/text_readers/test_compound.py b/tests/apparser/text_readers/test_compound.py new file mode 100644 index 0000000..c2ec385 --- /dev/null +++ b/tests/apparser/text_readers/test_compound.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import Any + +import numpy +import pytest +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers import CompoundReader +from apparser.text_readers.detectors import BaseTextDetector +from apparser.text_readers.scanners import BaseTextScanner + + +class DetectorStub(BaseTextDetector): + def __init__(self, result: list[QuadPoints]) -> None: + self.result = result + self.images: list[numpy.ndarray] = [] + + def read_image(self, image: numpy.ndarray) -> list[QuadPoints]: + self.images.append(image) + return self.result + + +class ScannerStub(BaseTextScanner): + def __init__(self, result: str) -> None: + self.result = result + self.images: list[numpy.ndarray] = [] + + def read_image(self, image: numpy.ndarray) -> str: + self.images.append(image) + return self.result + + +def assert_quad_points_equal( + first: QuadPoints, + second: QuadPoints, +) -> None: + assert first.left_top == second.left_top + assert first.right_top == second.right_top + assert first.right_bottom == second.right_bottom + assert first.left_bottom == second.left_bottom + + +@pytest.mark.parametrize( + ("detector", "scanner"), + [ + (object(), ScannerStub("text")), + (DetectorStub([]), object()), + ], +) +def test_compound_reader_rejects_invalid_backends( + detector: Any, + scanner: Any, +) -> None: + with pytest.raises(TypeError): + CompoundReader(detector, scanner) + + +def test_compound_reader_detects_and_scans_text() -> None: + coordinates = QuadPoints( + Point(1, 1), + Point(3, 1), + Point(3, 3), + Point(1, 3), + ) + detector = DetectorStub([coordinates]) + scanner = ScannerStub("text") + reader = CompoundReader(detector, scanner) + image = numpy.zeros((4, 4, 3), dtype=numpy.uint8) + + result = reader.read_image(image) + + assert detector.images[0] is image + assert scanner.images[0].shape == (2, 2, 3) + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, coordinates) diff --git a/tests/apparser/text_readers/test_easy_ocr.py b/tests/apparser/text_readers/test_easy_ocr.py index 94c21a3..81e2e74 100644 --- a/tests/apparser/text_readers/test_easy_ocr.py +++ b/tests/apparser/text_readers/test_easy_ocr.py @@ -1,9 +1,9 @@ from __future__ import annotations import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints -from apparser.text_readers.easy_ocr import EasyOcrReader +from apparser.text_readers import EasyOcrReader from tests.utils import easyocr_stub @@ -30,9 +30,14 @@ def test_easy_ocr_reader_maps_prediction_result() -> None: assert instance.settings == {"gpu": False} assert instance.read_calls[0]["settings"] == {"detail": 1} assert result[0].text == "text" - assert result[0].coordinates == [ + + result_point = QuadPoints( Point(1, 2), Point(3, 4), Point(5, 6), Point(7, 8), - ] + ) + assert result[0].coordinates.left_top == result_point.left_top + assert result[0].coordinates.right_bottom == result_point.right_bottom + assert result[0].coordinates.right_top == result_point.right_top + assert result[0].coordinates.left_bottom == result_point.left_bottom \ No newline at end of file diff --git a/tests/apparser/text_readers/test_paddle_ocr.py b/tests/apparser/text_readers/test_paddle_ocr.py index 09c6f75..ff6e818 100644 --- a/tests/apparser/text_readers/test_paddle_ocr.py +++ b/tests/apparser/text_readers/test_paddle_ocr.py @@ -1,6 +1,6 @@ import numpy -from apparser.text_readers.paddle import ( +from apparser.text_readers import ( PaddleTextReader, ) from tests.utils import paddleocr_stub diff --git a/tests/apparser/text_readers/test_rapid_ocr.py b/tests/apparser/text_readers/test_rapid_ocr.py new file mode 100644 index 0000000..ba4d5d6 --- /dev/null +++ b/tests/apparser/text_readers/test_rapid_ocr.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import sys +from types import ModuleType +from typing import Any + +import numpy +import pytest +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers import RapidOcrReader + + +class RapidOcrEngineStub: + instances: list["RapidOcrEngineStub"] = [] + + def __init__(self, **settings: Any) -> None: + self.settings = settings + self.result: Any = [] + self.calls: list[dict[str, Any]] = [] + self.__class__.instances.append(self) + + def __call__(self, image: numpy.ndarray, **settings: Any) -> Any: + self.calls.append({"image": image, "settings": settings}) + return self.result + + +class RapidOcrStub(ModuleType): + def __init__(self) -> None: + super().__init__("rapidocr") + RapidOcrEngineStub.instances = [] + self.RapidOCR = RapidOcrEngineStub + + +class RapidOcrOutputStub: + def __init__(self, boxes: list[Any], txts: list[str]) -> None: + self.boxes = boxes + self.txts = txts + + +def assert_quad_points_equal( + first: QuadPoints, + second: QuadPoints, +) -> None: + assert first.left_top == second.left_top + assert first.right_top == second.right_top + assert first.right_bottom == second.right_bottom + assert first.left_bottom == second.left_bottom + + +def test_rapid_ocr_reader_maps_result_list( + monkeypatch: pytest.MonkeyPatch, +) -> None: + rapidocr_stub = RapidOcrStub() + monkeypatch.setitem(sys.modules, "rapidocr", rapidocr_stub) + reader = RapidOcrReader(device="cpu") + instance = RapidOcrEngineStub.instances[0] + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + instance.result = [ + ( + [ + [1.1, 2.2], + [3.3, 4.4], + [5.5, 6.6], + [7.7, 8.8], + ], + "text", + ), + ] + + result = reader.read_image(image, use_det=True) + + expected_points = QuadPoints( + Point(1, 2), + Point(3, 4), + Point(5, 6), + Point(7, 8), + ) + assert instance.settings == {"device": "cpu"} + assert instance.calls[0]["image"] is image + assert instance.calls[0]["settings"] == {"use_det": True} + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, expected_points) + + +def test_rapid_ocr_reader_maps_output_object( + monkeypatch: pytest.MonkeyPatch, +) -> None: + rapidocr_stub = RapidOcrStub() + monkeypatch.setitem(sys.modules, "rapidocr", rapidocr_stub) + reader = RapidOcrReader() + instance = RapidOcrEngineStub.instances[0] + instance.result = RapidOcrOutputStub( + boxes=[ + [1, 2, 3, 4], + ], + txts=["text"], + ) + + result = reader.read_image(numpy.zeros((2, 2, 3), dtype=numpy.uint8)) + + expected_points = QuadPoints( + Point(1, 2), + Point(3, 2), + Point(3, 4), + Point(1, 4), + ) + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, expected_points) diff --git a/tests/apparser/text_readers/test_screens_controller.py b/tests/apparser/text_readers/test_screens_controller.py index 11f480d..0a0faed 100644 --- a/tests/apparser/text_readers/test_screens_controller.py +++ b/tests/apparser/text_readers/test_screens_controller.py @@ -2,7 +2,7 @@ import numpy -from apparser.text_readers.screens_controller import ScreensController +from apparser.text_readers import ScreensController from tests.utils import FakeTextReader diff --git a/tests/apparser/text_readers/test_white_black_reader.py b/tests/apparser/text_readers/test_white_black_reader.py index 0b24106..a1770d8 100644 --- a/tests/apparser/text_readers/test_white_black_reader.py +++ b/tests/apparser/text_readers/test_white_black_reader.py @@ -2,7 +2,7 @@ import numpy -from apparser.text_readers.white_black_reader import WhiteBlackReader +from apparser.text_readers import WhiteBlackReader from tests.utils import FakeTextReader diff --git a/tests/utils/fakes/backends/fake_text_reader.py b/tests/utils/fakes/backends/fake_text_reader.py index 252f640..fa9fdcc 100644 --- a/tests/utils/fakes/backends/fake_text_reader.py +++ b/tests/utils/fakes/backends/fake_text_reader.py @@ -9,7 +9,7 @@ install_external_stubs() -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader class FakeTextReader(BaseTextReader): diff --git a/tests/utils/fakes/instructions/fake_ocr_instruction.py b/tests/utils/fakes/instructions/fake_ocr_instruction.py index 7193371..1535ccc 100644 --- a/tests/utils/fakes/instructions/fake_ocr_instruction.py +++ b/tests/utils/fakes/instructions/fake_ocr_instruction.py @@ -7,7 +7,7 @@ install_external_stubs() from apparser.core.ui.base import BaseUi -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader class FakeOcrInstruction(FakeInstruction): diff --git a/tests/utils/stubs/text/easy_ocr_reader_stub.py b/tests/utils/stubs/text/easy_ocr_reader_stub.py index defe96b..fafb154 100644 --- a/tests/utils/stubs/text/easy_ocr_reader_stub.py +++ b/tests/utils/stubs/text/easy_ocr_reader_stub.py @@ -12,9 +12,15 @@ def __init__(self, lang_list: list[str], **settings: Any) -> None: self.lang_list = lang_list self.settings = settings self.predicted: list[Any] = [] + self.detected: Any = ([], []) self.read_calls: list[dict[str, Any]] = [] + self.detect_calls: list[dict[str, Any]] = [] self.__class__.instances.append(self) def readtext(self, image: numpy.ndarray, **settings: Any) -> list[Any]: self.read_calls.append({"image": image, "settings": settings}) return self.predicted + + def detect(self, image: numpy.ndarray, **settings: Any) -> Any: + self.detect_calls.append({"image": image, "settings": settings}) + return self.detected