diff --git a/README.md b/README.md index bd26e7b..778efe5 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,88 @@ +[![License - BSD 3-Clause](https://img.shields.io/pypi/l/apparser.svg)](https://github.com/apparser-development/apparser/blob/master/LICENSE.md) [![unit_tests](https://github.com/apparser-development/apparser/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/apparser-development/apparser/actions/workflows/unit_tests.yml) +
+[![PyPI Downloads](https://static.pepy.tech/personalized-badge/apparser?period=total&units=INTERNATIONAL_SYSTEM&left_color=GRAY&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/apparser) [![Documentation](https://img.shields.io/badge/docs-pages-green)](https://apparser-development.github.io/apparser/) -[![unit_tests](https://github.com/apparser-development/appwindows/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/apparser-development/appwindows/actions/workflows/unit_tests.yml)
-[![Github](https://img.shields.io/badge/github-repo-green)](https://github.com/apparser-development/apparser) +[![PyPI](https://img.shields.io/badge/PyPI-link-green)](https://pypi.org/project/apparser/) +[![GitHub](https://img.shields.io/badge/github-repo-green)](https://github.com/apparser-development/apparser) [![Issues](https://img.shields.io/badge/github-issues-green)](https://github.com/apparser-development/apparser/issues) # Apparser -Apparser is a Python library designed for automating desktop applications and managing UI interfaces using artificial intelligence, such as OCR or object detection models. -# Install +Apparser is a Python library for automating desktop applications and interacting with UIs using AI-powered tools such as OCR and object detection models. + +# Installation ```bash +# Base Apparser package pip install apparser + +# Apparser with text recognition support +pip install "apparser[ocr]" + +# Apparser with text-to-speech support +pip install "apparser[speak]" + +# Apparser with object detection support +pip install "apparser[cv]" + +# Apparser with all optional features +pip install "apparser[all]" ``` # Examples - -1) Open terminal and write "Hello World!" +1) Open CS2 and start a game +#### Code ```python from apparser import App -from apparser.geometry import RelativelyPoint -from apparser.instructions import Algorithm, MouseClickTo, WriteText, Sleep +from apparser.instructions import OCRAlgorithm +from apparser.instructions.ocr import WaitText, ClickOnText +from apparser.text_readers import ScreensController, RapidOcrReader + +# Text labels that the OCR algorithm will look for on the screen. +play_button = "play" +deathmatch_button = "deathmatch" +group_button = "hostage group" +start_button = "go" -algorithm = Algorithm([ - Sleep(1), # Wait for the application to open. - MouseClickTo(RelativelyPoint(0.5, 0.5)), # Click to window center for start writing - WriteText("Hello World") # Write text -]) +# Create OCR-based algorithm. +algorithm = OCRAlgorithm([ + # Wait for the main menu and open the play screen. + WaitText(play_button), + ClickOnText(play_button), + # Select the deathmatch mode. + WaitText(deathmatch_button), + ClickOnText(deathmatch_button), + # Select the hostage group and start the match. + WaitText(group_button), + ClickOnText(group_button), + ClickOnText(start_button, min_similarity=0.5), +], text_reader=ScreensController(RapidOcrReader())) -app = App("notepad", window_title="Notepad") +# Launch CS2 +app = App(['cmd', '/c', 'start', 'steam://rungameid/730'], timeout=20) +# Run the prepared scenario against the application UI. algorithm.perform(app.ui) ``` +#### Video + + # Docs -All documentation here
-Link to PyPi +Full documentation is available here
+Package page on PyPI + +# Donation +If you'd like to financially support the developers for their work: + +Donation link # For Developers -1) If something doesn't work - open issue. -2) If you want something fixed - open issue. -3) If you can help with the library - email. +1) If something doesn't work, open an issue. +2) If you want something fixed, open an issue. +3) If you can help with the library, email us. apparser.development@gmail.com -Any help in development is welcome)! +Contributions are welcome! diff --git a/apparser/core/app.py b/apparser/core/app.py index c853b3c..2fbc7db 100644 --- a/apparser/core/app.py +++ b/apparser/core/app.py @@ -1,4 +1,3 @@ -import os import subprocess import time @@ -11,21 +10,24 @@ class App: """Manage an application process and its UI wrapper.""" - def __init__(self, path_to_exe: str, + def __init__(self, start_command: str | list[str], window_title: str | None = None, timeout: float = 1): """Initialize an application controller. - :param path_to_exe: Path to the executable file. - :type path_to_exe: str + :param start_command: App start command. + :type start_command: str :param window_title: Title of the window to attach to. :type window_title: str :param timeout: Delay before the window lookup starts. :type timeout: float :raises TypeError: If any argument has an invalid type. """ - if not isinstance(path_to_exe, str): - raise TypeError('path_to_exe must be a string') + if isinstance(start_command, str): + start_command = [start_command] + + if not isinstance(start_command, list): + raise TypeError('start_command must be a string or list[str]') if window_title is not None and not isinstance(window_title, str): raise TypeError('window_title must be a string') @@ -35,7 +37,7 @@ def __init__(self, path_to_exe: str, self.__window_finder = get_finder() self.__process: subprocess.Popen | None = None - self.__path = path_to_exe + self.__start_command = start_command self.__timeout = timeout self.__window_title_name: str = window_title self.__ui: BaseUi | None = None @@ -65,9 +67,9 @@ def start_app(self): if self.__ui is not None: return window_processes = [i.get_process_id() for i in get_finder().get_all_windows()] - self.__process = subprocess.Popen([self.__path]) + self.__process = subprocess.Popen(self.__start_command) time.sleep(self.__timeout) - self.__find_window_by_process_id(os.getpid()) + self.__find_window_by_process_id(self.__process.pid) for i in get_finder().get_all_windows(): if self.__ui is not None: return @@ -76,7 +78,7 @@ def start_app(self): if self.__ui is not None: return self.__find_window_by_title() - if self.__ui is not None: + if self.__ui is None: raise WindowDoesNotValidException() def stop_app(self): diff --git a/apparser/core/ui/coordinates.py b/apparser/core/ui/coordinates.py index 6d569a7..d74a422 100644 --- a/apparser/core/ui/coordinates.py +++ b/apparser/core/ui/coordinates.py @@ -2,10 +2,9 @@ import numpy from appwindows import Window -from appwindows.geometry import Size from apparser.core.ui.base import BaseUi -from apparser.geometry import Point, RelativelyPoint +from apparser.geometry import Point, RelativelyPoint, Size class CoordinatesUi(BaseUi): diff --git a/apparser/core/ui/desktop.py b/apparser/core/ui/desktop.py index d7eca53..b076362 100644 --- a/apparser/core/ui/desktop.py +++ b/apparser/core/ui/desktop.py @@ -39,8 +39,8 @@ def _(self, coordinates: Point): @point_to_global.register(RelativelyPoint) def _(self, coordinates: RelativelyPoint): monitor = get_monitors()[self.__display_id] - x = round(coordinates.x * monitor.width) - y = round(coordinates.y * monitor.height) + x = getattr(monitor, "x", 0) + round(coordinates.x * monitor.width) + y = getattr(monitor, "y", 0) + round(coordinates.y * monitor.height) local_point = Point(x, y) return self.point_to_global(local_point) diff --git a/apparser/core/ui/window.py b/apparser/core/ui/window.py index a5c9f5e..73638e1 100644 --- a/apparser/core/ui/window.py +++ b/apparser/core/ui/window.py @@ -3,10 +3,9 @@ import numpy from appwindows import Window -from appwindows.geometry import Point, Size +from apparser.geometry import Point, Size, RelativelyPoint from apparser.core.ui.base import BaseUi -from apparser.geometry.relatively_point import RelativelyPoint class WindowUi(BaseUi): diff --git a/apparser/core/ui/window_by_display.py b/apparser/core/ui/window_by_display.py index dad741d..38d12af 100644 --- a/apparser/core/ui/window_by_display.py +++ b/apparser/core/ui/window_by_display.py @@ -4,10 +4,9 @@ from PIL import ImageGrab from appwindows import Window -from appwindows.geometry import Point, Size +from apparser.geometry import Point, Size, RelativelyPoint from apparser.core.ui.base import BaseUi -from apparser.geometry.relatively_point import RelativelyPoint class WindowByDisplayUi(BaseUi): diff --git a/apparser/cv/readers/yolo.py b/apparser/cv/readers/yolo.py index 2be8a35..8fcb027 100644 --- a/apparser/cv/readers/yolo.py +++ b/apparser/cv/readers/yolo.py @@ -52,9 +52,11 @@ def read(self, ui: BaseUi) -> CvAllData: x1, y1, x2, y2 = box.xyxy[0].tolist() x = int(x1) y = int(y1) - width = int(x2 - x1) - height = int(y2 - y1) - box_ui = CoordinatesUi(ui, Point(x, y), Size(width, height)) + x2 = int(x2) + y2 = int(y2) + width = x2 - x1 + height = y2 - y1 + box_ui = CoordinatesUi(ui, Point(x, y), Point(x2, y2)) boxes.append( CvBox( class_name=cls_name, diff --git a/apparser/cv/utils/changes_checker.py b/apparser/cv/utils/changes_checker.py index 8f528a0..e704b08 100644 --- a/apparser/cv/utils/changes_checker.py +++ b/apparser/cv/utils/changes_checker.py @@ -25,7 +25,7 @@ def _is_resized(box: CvBox, old_box: CvBox) -> bool: :return: True if width and height both changed. :rtype: bool """ - return abs(box.width - old_box.width) > 0 and abs(box.height - old_box.height) > 0 + return abs(box.width - old_box.width) > 0 or abs(box.height - old_box.height) > 0 class ChangesChecker: diff --git a/apparser/exceptions/timeout.py b/apparser/exceptions/timeout.py index 9d0570d..0b8bdc8 100644 --- a/apparser/exceptions/timeout.py +++ b/apparser/exceptions/timeout.py @@ -1,5 +1,14 @@ class TimeoutException(Exception): + """Represent a timeout during a waiting operation.""" + def __init__(self, wait_time: float | int | None = None): + """Initialize a timeout exception. + + :param wait_time: Time waited before the timeout occurred. + :type wait_time: float | int | None + :raises TypeError: If ``wait_time`` has an invalid type. + :raises ValueError: If ``wait_time`` is negative. + """ if wait_time is None: super().__init__("Timeout error") return diff --git a/apparser/geometry/__init__.py b/apparser/geometry/__init__.py index 2bcf958..0810e9f 100644 --- a/apparser/geometry/__init__.py +++ b/apparser/geometry/__init__.py @@ -1,4 +1,4 @@ -from appwindows.geometry import Point, Size +from appwindows.geometry import Point, Size, QuadPoints from apparser.geometry.relatively_point import RelativelyPoint from apparser.geometry.distance import distance @@ -6,4 +6,5 @@ __all__ = ["Point", "Size", "RelativelyPoint", + "QuadPoints", "distance"] diff --git a/apparser/instructions/default/press.py b/apparser/instructions/default/press.py index 8807ba3..5235db4 100644 --- a/apparser/instructions/default/press.py +++ b/apparser/instructions/default/press.py @@ -1,8 +1,7 @@ import pyautogui -from apparser.key_codes import BaseKeyCode - from apparser.instructions.base import BaseInstruction +from apparser.key_codes import BaseKeyCode class PressKey(BaseInstruction): @@ -38,6 +37,12 @@ def __init__(self, keys: list[BaseKeyCode | str]): :type keys: list[BaseKeyCode | str] """ self.__keys = keys + self.__validate() + + def __validate(self): + for key in self.__keys: + if not (isinstance(key, BaseKeyCode) or isinstance(key, str)): + raise TypeError('key_code must be BaseKeyCode or str') @property def id(self) -> int: @@ -45,8 +50,6 @@ def id(self) -> int: def perform(self, *args, **kwargs): for key in self.__keys: - if not (isinstance(key, BaseKeyCode) or isinstance(key, str)): - raise TypeError('key_code must be BaseKeyCode or str') pyautogui.keyDown(str(key)) for key in self.__keys: diff --git a/apparser/instructions/ocr/move_to_text.py b/apparser/instructions/ocr/move_to_text.py index cb5e069..a909fc2 100644 --- a/apparser/instructions/ocr/move_to_text.py +++ b/apparser/instructions/ocr/move_to_text.py @@ -3,7 +3,6 @@ from apparser.core import BaseUi from apparser.exceptions import TextNotFoundException from apparser.geometry import Point, RelativelyPoint - from apparser.text_readers import BaseTextReader, TextData from apparser.instructions.ocr.base import OCRInstruction @@ -59,8 +58,8 @@ def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs): needed_data, rating = self.find_text(self.__text_getter.local_answer) if self.__min_similarity > rating: raise TextNotFoundException(self.__min_similarity) - y_cords = list(set([i.y for i in needed_data.coordinates])) - x_cords = list(set([i.x for i in needed_data.coordinates])) + y_cords = [needed_data.coordinates.right_top.y, needed_data.coordinates.right_bottom.y] + x_cords = [needed_data.coordinates.left_top.x, needed_data.coordinates.right_top.x] offset_point = self.__get_local_offset(ui) x_center = round((x_cords[0] - x_cords[1]) / 2 + x_cords[1]) + offset_point.x y_center = round((y_cords[0] - y_cords[1]) / 2 + y_cords[1]) + offset_point.y diff --git a/apparser/instructions/ocr/plot_text.py b/apparser/instructions/ocr/plot_text.py index b962645..dff78cb 100644 --- a/apparser/instructions/ocr/plot_text.py +++ b/apparser/instructions/ocr/plot_text.py @@ -33,16 +33,16 @@ def draw(self, bboxes: list[TextData]): self.__paint_lines(data) def __paint_lines(self, data: TextData): - shape = [(data.coordinates[0].x, data.coordinates[0].y), (data.coordinates[2].x, data.coordinates[2].y)] + shape = [(data.coordinates.left_top.x, data.coordinates.left_top.y), (data.coordinates.right_bottom.x, data.coordinates.right_bottom.y)] self.__draw.rectangle(shape, outline=self.__color, width=1) def __paint_cords(self, data: TextData): - y = data.coordinates[0].y + self.__text_move.y + y = data.coordinates.left_top.y + self.__text_move.y if y < 0: - y = data.coordinates[2].y - self.__text_move.y - x = data.coordinates[0].x + self.__text_move.x + y = data.coordinates.right_bottom.y - self.__text_move.y + x = data.coordinates.left_top.x + self.__text_move.x if y < 0: - x = data.coordinates[2].x - self.__text_move.x + x = data.coordinates.right_bottom.x - self.__text_move.x self.__draw.text((x, y), data.text, fill=self.__color) diff --git a/apparser/instructions/ocr/print_all_text.py b/apparser/instructions/ocr/print_all_text.py index 5bc57a0..9c78abb 100644 --- a/apparser/instructions/ocr/print_all_text.py +++ b/apparser/instructions/ocr/print_all_text.py @@ -27,7 +27,4 @@ def id(self) -> int: def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs): self.__text_getter.perform(ui, text_reader) for i in self.__text_getter.local_answer: - points_stroke = "" - for j in i.coordinates: - points_stroke += str(j) + " " - print(f'text: "{i.text}", coordinates: {points_stroke}') + print(f'text: "{i.text}", coordinates: {str(i.coordinates)}') diff --git a/apparser/instructions/ocr/text_getter.py b/apparser/instructions/ocr/text_getter.py index 9fe0d9e..8e2ab2c 100644 --- a/apparser/instructions/ocr/text_getter.py +++ b/apparser/instructions/ocr/text_getter.py @@ -2,7 +2,7 @@ from PIL import Image from apparser.core import BaseUi -from apparser.geometry import Point, RelativelyPoint +from apparser.geometry import Point, RelativelyPoint, QuadPoints from apparser.text_readers import BaseTextReader, TextData @@ -38,9 +38,12 @@ def id(self) -> int: return 2000 def __text_coordinates_to_local(self, text: TextData) -> TextData: - new_coordinates = [] - for point in text.coordinates: - new_coordinates.append(point + self.__left_top_point_global) + new_coordinates = QuadPoints( + text.coordinates.left_top + self.__left_top_point_global, + text.coordinates.right_top + self.__left_top_point_global, + text.coordinates.right_bottom + self.__left_top_point_global, + text.coordinates.left_bottom + self.__left_top_point_global, + ) return TextData(text.text, new_coordinates) def __texts_coordinates_to_local(self, texts: list[TextData]) -> list[TextData]: @@ -66,18 +69,18 @@ def perform(self, ui: BaseUi, text_reader: BaseTextReader, *args, **kwargs): @property def local_answer(self) -> list[TextData]: - """Return the texts coordinates in local Ui object of the last perform. + """Return text coordinates in the local UI object of the last perform. - :return: Texts coordinates in local Ui object. + :return: Text coordinates in the local UI object. :rtype: list[TextData] """ return self.__local_answer @property def global_answer(self) -> list[TextData]: - """Return the global texts coordinates of the last perform. + """Return global text coordinates of the last perform. - :return: Global texts coordinates. + :return: Global text coordinates. :rtype: list[TextData] """ return self.__global_answer @@ -86,7 +89,7 @@ def global_answer(self) -> list[TextData]: def screenshot(self) -> numpy.ndarray: """Return the screenshot of the last perform. - :return: Ui screenshot + :return: UI screenshot. :rtype: numpy.ndarray """ return self.__screenshot diff --git a/apparser/instructions/ui/algorithms/ids.py b/apparser/instructions/ui/algorithms/ids.py index 9e16676..df16dc3 100644 --- a/apparser/instructions/ui/algorithms/ids.py +++ b/apparser/instructions/ui/algorithms/ids.py @@ -2,7 +2,6 @@ from typing import Any from apparser.core import BaseUi - from apparser.instructions import BaseInstruction from apparser.instructions.debuggers import BaseDebugger, Debugger from apparser.instructions.ui.algorithms.base import BaseAlgorithm @@ -41,6 +40,12 @@ def __init__(self, :raises TypeError: If ``debugger`` has an invalid type. """ + if not isinstance(attributes, list): + raise TypeError("attributes must be list") + + if not isinstance(instructions, list): + raise TypeError("attributes must be list") + if not isinstance(debugger, BaseDebugger) and not isinstance(debugger, bool): raise TypeError(f"debugger must be a bool or BaseDebugger") @@ -50,8 +55,6 @@ def __init__(self, elif debugger == False: debugger = None - attributes.reverse() - self.__debugger = debugger self.__instructions = instructions self.__attributes = attributes @@ -60,11 +63,11 @@ def __init__(self, def id(self) -> int: return 1501 - def __form_args(self, instruction: BaseInstruction) -> dict[str, Any]: + def __form_args(self, instruction: BaseInstruction, *additional_args) -> dict[str, Any]: result = {} function_signature = inspect.signature(instruction.perform) for arg in function_signature.parameters.values(): - for a in self.__attributes: + for a in self.__attributes + list(additional_args): if arg.annotation is type(a): result[arg.name] = a return result @@ -83,7 +86,7 @@ def perform(self, ui: BaseUi, *args, **kwargs): raise ValueError(f"instruction with id {instruction_id} not found") instruction = instruction(*instruction_args) - perform_kwargs = self.__form_args(instruction) + perform_kwargs = self.__form_args(instruction, ui) if self.__debugger is not None: self.__debugger.try_perform(instruction, **perform_kwargs) diff --git a/apparser/instructions/ui/algorithms/names.py b/apparser/instructions/ui/algorithms/names.py index 67f362f..f1fa940 100644 --- a/apparser/instructions/ui/algorithms/names.py +++ b/apparser/instructions/ui/algorithms/names.py @@ -40,6 +40,12 @@ def __init__(self, :type debugger: BaseDebugger | bool :raises TypeError: If ``debugger`` has an invalid type. """ + if not isinstance(attributes, list): + raise TypeError("attributes must be list") + + if not isinstance(instructions, list): + raise TypeError("attributes must be list") + if not isinstance(debugger, BaseDebugger) and not isinstance(debugger, bool): raise TypeError(f"debugger must be a bool or BaseDebugger") @@ -49,8 +55,6 @@ def __init__(self, elif debugger == False: debugger = None - attributes.reverse() - self.__debugger = debugger self.__instructions = instructions self.__attributes = attributes @@ -59,11 +63,11 @@ def __init__(self, def id(self) -> int: return 1502 - def __form_args(self, instruction: BaseInstruction) -> dict[str, Any]: + def __form_args(self, instruction: BaseInstruction, *additional_args) -> dict[str, Any]: result = {} function_signature = inspect.signature(instruction.perform) for arg in function_signature.parameters.values(): - for a in self.__attributes: + for a in self.__attributes + list(additional_args): if arg.annotation is type(a): result[arg.name] = a return result @@ -83,7 +87,7 @@ def perform(self, ui: BaseUi, *args, **kwargs): instruction = instruction_type(*instruction_args) - perform_kwargs = self.__form_args(instruction) + perform_kwargs = self.__form_args(instruction, ui) if self.__debugger is not None: self.__debugger.try_perform(instruction, **perform_kwargs) diff --git a/apparser/speakers/base.py b/apparser/speakers/base.py index 7d72364..58c5962 100644 --- a/apparser/speakers/base.py +++ b/apparser/speakers/base.py @@ -12,7 +12,7 @@ def speak(self, text: str) -> tuple[numpy.ndarray, int]: :param text: Text to synthesize. :type text: str - :return: Generated audio samples and bitrate. + :return: Generated audio samples and sample rate. :rtype: tuple[numpy.ndarray, int] """ pass diff --git a/apparser/speakers/chat_tts.py b/apparser/speakers/chat_tts.py index 2379f17..3f096fd 100644 --- a/apparser/speakers/chat_tts.py +++ b/apparser/speakers/chat_tts.py @@ -6,6 +6,8 @@ class ChatTTSSpeaker(BaseSpeaker): + """Generate speech by using a ChatTTS backend.""" + def __init__( self, speaker: str | None = None, @@ -45,7 +47,7 @@ def __init__( :type experimental: bool :param enable_cache: Whether ChatTTS cache should be enabled. :type enable_cache: bool - :param sample_rate: Output bitrate for generated audio. + :param sample_rate: Output sample rate for generated audio. :type sample_rate: int """ self.__chattts = importlib.import_module("ChatTTS") @@ -98,7 +100,7 @@ def speak(self, text: str, **settings: object) -> tuple[numpy.ndarray, int]: :type text: str :param settings: Additional ChatTTS inference settings. :type settings: dict[str, object] - :return: Generated audio samples and bitrate. + :return: Generated audio samples and sample rate. :rtype: tuple[numpy.ndarray, int] """ speaker = settings.pop("speaker", self.__speaker) diff --git a/apparser/speakers/torch.py b/apparser/speakers/torch.py index d83b922..56541da 100644 --- a/apparser/speakers/torch.py +++ b/apparser/speakers/torch.py @@ -30,7 +30,7 @@ def __init__( :type speaker_model: str :param speaker: Speaker name used for synthesis. :type speaker: str - :param sample_rate: Output bitrate. + :param sample_rate: Output sample rate. :type sample_rate: int :param device: Torch device used for inference. :type device: str | object @@ -75,7 +75,7 @@ def speak(self, text: str, **settings: object) -> tuple[numpy.ndarray, int]: :type text: str :param settings: Additional synthesis settings. :type settings: dict[str, object] - :return: Generated audio samples and bitrate. + :return: Generated audio samples and sample rate. :rtype: tuple[numpy.ndarray, int] """ audio = self.__model.apply_tts( diff --git a/apparser/text_readers/__init__.py b/apparser/text_readers/__init__.py index 54e8e89..18f9027 100644 --- a/apparser/text_readers/__init__.py +++ b/apparser/text_readers/__init__.py @@ -1,14 +1,2 @@ -from apparser.text_readers.base import BaseTextReader -from apparser.text_readers.screens_controller import ScreensController -from apparser.text_readers.models.text_data import TextData -from apparser.text_readers.easy_ocr import EasyOcrReader -from apparser.text_readers.paddle import PaddleTextReader -from apparser.text_readers.white_black_reader import WhiteBlackReader - - -__all__ = ["EasyOcrReader", - "ScreensController", - "BaseTextReader", - "WhiteBlackReader", - "PaddleTextReader", - "TextData"] +from apparser.text_readers.readers import * +from apparser.text_readers.models import * diff --git a/apparser/text_readers/detectors/__init__.py b/apparser/text_readers/detectors/__init__.py new file mode 100644 index 0000000..f854d9d --- /dev/null +++ b/apparser/text_readers/detectors/__init__.py @@ -0,0 +1,7 @@ +from apparser.text_readers.detectors.base import BaseTextDetector +from apparser.text_readers.detectors.easy_ocr import EasyOcrDetector + +__all__ = [ + "BaseTextDetector", + "EasyOcrDetector" +] diff --git a/apparser/text_readers/detectors/base.py b/apparser/text_readers/detectors/base.py new file mode 100644 index 0000000..684063b --- /dev/null +++ b/apparser/text_readers/detectors/base.py @@ -0,0 +1,20 @@ +import abc + +import numpy + +from apparser.geometry import QuadPoints + + +class BaseTextDetector(abc.ABC): + """Define the common interface for text detection backends.""" + + @abc.abstractmethod + def read_image(self, image: numpy.ndarray) -> list[QuadPoints]: + """Detect text coordinates in an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :return: Detected text coordinates. + :rtype: list[QuadPoints] + """ + pass diff --git a/apparser/text_readers/detectors/easy_ocr.py b/apparser/text_readers/detectors/easy_ocr.py new file mode 100644 index 0000000..f0b6ebc --- /dev/null +++ b/apparser/text_readers/detectors/easy_ocr.py @@ -0,0 +1,141 @@ +import importlib +from typing import Any + +import numpy + +from apparser.geometry import Point, QuadPoints +from apparser.text_readers.detectors.base import BaseTextDetector + + +def _build_box_points( + left: int, + top: int, + right: int, + bottom: int, +) -> QuadPoints: + return QuadPoints( + Point(left, top), + Point(right, top), + Point(right, bottom), + Point(left, bottom), + ) + + +def _parse_horizontal_box(box: Any) -> QuadPoints | None: + array = numpy.asarray(box) + if array.ndim != 1 or array.size < 4: + return None + left, right, top, bottom = array[:4] + return _build_box_points( + int(left), + int(top), + int(right), + int(bottom), + ) + + +def _parse_free_box(box: Any) -> QuadPoints | None: + array = numpy.asarray(box) + if array.ndim == 1 and array.size >= 8 and array.size % 2 == 0: + array = array.reshape(-1, 2) + if array.ndim != 2 or len(array) < 4 or array.shape[-1] < 2: + return None + points = [ + Point(int(coordinates[0]), int(coordinates[1])) + for coordinates in array[:4] + ] + return QuadPoints(*points) + + +def _extend_horizontal_points( + returned: list[QuadPoints], + horizontal_groups: Any, +) -> None: + for group in horizontal_groups: + points = _parse_horizontal_box(group) + if points is not None: + returned.append(points) + continue + + for box in group: + points = _parse_horizontal_box(box) + if points is not None: + returned.append(points) + + +def _extend_free_points( + returned: list[QuadPoints], + free_groups: Any, +) -> None: + for group in free_groups: + points = _parse_free_box(group) + if points is not None: + returned.append(points) + continue + + for box in group: + points = _parse_free_box(box) + if points is not None: + returned.append(points) + + +def _parse_detect_result(predicted: Any) -> list[QuadPoints]: + returned: list[QuadPoints] = [] + + if len(predicted) < 2: + return returned + + horizontal_groups, free_groups = predicted[:2] + _extend_horizontal_points(returned, horizontal_groups) + _extend_free_points(returned, free_groups) + return returned + + +def _build_default_settings(settings: dict[str, Any]) -> dict[str, Any]: + default_settings: dict[str, Any] = { + "detector": True, + "recognizer": False, + } + default_settings.update(settings) + return default_settings + + +class EasyOcrDetector(BaseTextDetector): + """Detect text regions in images by using EasyOCR.""" + + def __init__( + self, + lang_list: list[str] | None = None, + **settings: Any, + ) -> None: + """Initialize an EasyOCR-backed text detector. + + :param lang_list: Languages passed to the EasyOCR reader. + :type lang_list: list[str] | None + :param settings: Additional EasyOCR reader settings. + :type settings: dict[str, object] + """ + if lang_list is None: + lang_list = ["en"] + easyocr = importlib.import_module("easyocr") + self.__reader = easyocr.Reader( + lang_list, + **_build_default_settings(settings), + ) + + def read_image( + self, + image: numpy.ndarray, + **settings: Any, + ) -> list[QuadPoints]: + """Detect text coordinates in an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :param settings: Additional EasyOCR detect settings. + :type settings: dict[str, object] + :return: Detected text coordinates. + :rtype: list[QuadPoints] + """ + predicted = self.__reader.detect(image, **settings) + return _parse_detect_result(predicted) diff --git a/apparser/text_readers/models/__init__.py b/apparser/text_readers/models/__init__.py index e69de29..1c40a79 100644 --- a/apparser/text_readers/models/__init__.py +++ b/apparser/text_readers/models/__init__.py @@ -0,0 +1,3 @@ +from apparser.text_readers.models.text_data import TextData + +__all__ = ["TextData"] diff --git a/apparser/text_readers/models/text_data.py b/apparser/text_readers/models/text_data.py index d51b0e4..bdd9be3 100644 --- a/apparser/text_readers/models/text_data.py +++ b/apparser/text_readers/models/text_data.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from apparser.geometry import Point +from apparser.geometry import QuadPoints @dataclass(frozen=True) @@ -8,4 +8,4 @@ class TextData: """Store detected text together with its polygon coordinates.""" text: str - coordinates: list[Point] + coordinates: QuadPoints \ No newline at end of file diff --git a/apparser/text_readers/readers/__init__.py b/apparser/text_readers/readers/__init__.py new file mode 100644 index 0000000..1f7e74c --- /dev/null +++ b/apparser/text_readers/readers/__init__.py @@ -0,0 +1,15 @@ +from apparser.text_readers.readers.base import BaseTextReader +from apparser.text_readers.readers.easy_ocr import EasyOcrReader +from apparser.text_readers.readers.paddle import PaddleTextReader +from apparser.text_readers.readers.screens_controller import ScreensController +from apparser.text_readers.readers.white_black_reader import WhiteBlackReader +from apparser.text_readers.readers.rapid_ocr import RapidOcrReader +from apparser.text_readers.readers.compound import CompoundReader + +__all__ = ["EasyOcrReader", + "ScreensController", + "BaseTextReader", + "WhiteBlackReader", + "PaddleTextReader", + "CompoundReader", + "RapidOcrReader"] diff --git a/apparser/text_readers/base.py b/apparser/text_readers/readers/base.py similarity index 100% rename from apparser/text_readers/base.py rename to apparser/text_readers/readers/base.py diff --git a/apparser/text_readers/readers/compound.py b/apparser/text_readers/readers/compound.py new file mode 100644 index 0000000..9efdded --- /dev/null +++ b/apparser/text_readers/readers/compound.py @@ -0,0 +1,68 @@ +from apparser.geometry import QuadPoints, distance, Point + +from apparser.text_readers.readers.base import BaseTextReader + +from apparser.text_readers.models import TextData +from apparser.text_readers.detectors import BaseTextDetector +from apparser.text_readers.scanners import BaseTextScanner + + +import numpy +from PIL import Image + + +def _cut_by_coordinates(image: numpy.ndarray, coordinates: QuadPoints) -> numpy.ndarray: + pil_image = Image.fromarray(image) + if pil_image.mode not in ("RGB", "RGBA", "L"): + pil_image = pil_image.convert("RGB") + left_top = Point(coordinates.left_top.x, coordinates.left_top.y) + right_top = Point(coordinates.right_top.x, coordinates.right_top.y) + right_bottom = Point(coordinates.right_bottom.x, coordinates.right_bottom.y) + left_bottom = Point(coordinates.left_bottom.x, coordinates.left_bottom.y) + out_width = int(max(distance(left_top, right_top), distance(left_bottom, right_bottom))) + out_height = int(max(distance(left_top, left_bottom), distance(right_top, right_bottom))) + out_size = (max(out_width, 1), max(out_height, 1)) + quad_data = (left_top.x, left_top.y, + right_top.x, right_top.y, + right_bottom.x, right_bottom.y, + left_bottom.x, left_bottom.y) + transformed = pil_image.transform(out_size, Image.QUAD, quad_data, + resample=Image.BICUBIC) + return numpy.array(transformed) + + +class CompoundReader(BaseTextReader): + """Detect text regions and scan each detected image fragment.""" + + def __init__(self, detector: BaseTextDetector, scanner: BaseTextScanner ): + """Initialize a compound text reader. + + :param detector: Detector used to find text regions in an image. + :type detector: BaseTextDetector + :param scanner: Scanner used to read text from detected image fragments. + :type scanner: BaseTextScanner + :raises TypeError: If any argument has an invalid type. + """ + if not isinstance(detector, BaseTextDetector): + raise TypeError('detector must be an instance of BaseTextDetector') + + if not isinstance(scanner, BaseTextScanner): + raise TypeError('scanner must be an instance of BaseTextScanner') + + self.__scanner = scanner + self.__detector = detector + + def read_image(self, image: numpy.ndarray) -> list[TextData]: + """Read text data. + + :param image: Image data to process. + :type image: numpy.ndarray + :return: Detected text data. + :rtype: list[TextData] + """ + result = [] + for coordinates in self.__detector.read_image(image): + cuted_image = _cut_by_coordinates(image, coordinates) + text = self.__scanner.read_image(cuted_image) + result.append(TextData(text=text, coordinates=coordinates)) + return result diff --git a/apparser/text_readers/easy_ocr.py b/apparser/text_readers/readers/easy_ocr.py similarity index 87% rename from apparser/text_readers/easy_ocr.py rename to apparser/text_readers/readers/easy_ocr.py index 9eaa89a..0f8633f 100644 --- a/apparser/text_readers/easy_ocr.py +++ b/apparser/text_readers/readers/easy_ocr.py @@ -1,10 +1,10 @@ import importlib import numpy -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers.readers.base import BaseTextReader from apparser.text_readers.models.text_data import TextData -from apparser.geometry import Point +from apparser.geometry import Point, QuadPoints class EasyOcrReader(BaseTextReader): @@ -36,7 +36,7 @@ def read_image(self, image: numpy.ndarray, **settings) -> list[TextData]: returned = [] predicted = self.__reader.readtext(image, **settings) for i in predicted: - points = [Point(int(j[0]), int(j[1])) for j in i[0]] + points = QuadPoints(*[Point(int(j[0]), int(j[1])) for j in i[0]]) text_data = TextData(i[1], points) returned.append(text_data) return returned diff --git a/apparser/text_readers/paddle.py b/apparser/text_readers/readers/paddle.py similarity index 75% rename from apparser/text_readers/paddle.py rename to apparser/text_readers/readers/paddle.py index a87eb23..566b49b 100644 --- a/apparser/text_readers/paddle.py +++ b/apparser/text_readers/readers/paddle.py @@ -2,8 +2,8 @@ from typing import Any import numpy -from apparser.geometry import Point -from apparser.text_readers.base import BaseTextReader +from apparser.geometry import Point, QuadPoints +from apparser.text_readers.readers.base import BaseTextReader from apparser.text_readers.models.text_data import TextData @@ -12,16 +12,16 @@ def _build_box_points( top: int, right: int, bottom: int, -) -> list[Point]: - return [ +) -> QuadPoints: + return QuadPoints( Point(left, top), Point(right, top), Point(right, bottom), Point(left, bottom), - ] + ) -def _parse_points_geometry(geometry: Any) -> list[Point]: +def _parse_points_geometry(geometry: Any) -> QuadPoints | None: array = numpy.asarray(geometry) if array.ndim == 1 and array.size == 4: @@ -41,7 +41,7 @@ def _parse_points_geometry(geometry: Any) -> list[Point]: y_coordinates = array[..., 1].reshape(-1) if len(x_coordinates) == 0 or len(y_coordinates) == 0: - return [] + return None return _build_box_points( int(x_coordinates.min()), @@ -50,7 +50,7 @@ def _parse_points_geometry(geometry: Any) -> list[Point]: int(y_coordinates.max()), ) - return [] + return None def _parse_predict_result(predicted: list[Any]) -> list[TextData]: @@ -77,7 +77,7 @@ def _parse_predict_result(predicted: list[Any]) -> list[TextData]: for index in range(min(len(texts), len(geometries))): points = _parse_points_geometry(geometries[index]) - if len(points) < 4: + if points is None: continue returned.append(TextData(texts[index], points)) @@ -99,12 +99,23 @@ def _build_default_settings( class PaddleTextReader(BaseTextReader): + """Read text from images by using PaddleOCR.""" + def __init__( self, lang: str = "en", enable_mkldnn: bool = False, **settings: Any, ) -> None: + """Initialize a PaddleOCR-backed text reader. + + :param lang: Language passed to PaddleOCR. + :type lang: str + :param enable_mkldnn: Whether MKL-DNN acceleration should be enabled. + :type enable_mkldnn: bool + :param settings: Additional PaddleOCR reader settings. + :type settings: dict[str, object] + """ self.__lang = lang self.__enable_mkldnn = enable_mkldnn self.__settings = _build_default_settings( @@ -118,6 +129,15 @@ def read_image( image: numpy.ndarray, **settings: Any, ) -> list[TextData]: + """Read text data from an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :param settings: Additional PaddleOCR predict settings. + :type settings: dict[str, object] + :return: Detected text data. + :rtype: list[TextData] + """ predicted = self.__reader.predict(image, **settings) return _parse_predict_result(predicted) diff --git a/apparser/text_readers/readers/rapid_ocr.py b/apparser/text_readers/readers/rapid_ocr.py new file mode 100644 index 0000000..c91ee4f --- /dev/null +++ b/apparser/text_readers/readers/rapid_ocr.py @@ -0,0 +1,110 @@ +import importlib +from typing import Any + +import numpy + +from apparser.geometry import Point, QuadPoints +from apparser.text_readers.models import TextData +from apparser.text_readers.readers.base import BaseTextReader + + +def _build_box_points(geometry: Any) -> QuadPoints | None: + array = numpy.asarray(geometry) + if array.ndim == 1 and array.size == 4: + left, top, right, bottom = array[:4] + return QuadPoints( + Point(int(left), int(top)), + Point(int(right), int(top)), + Point(int(right), int(bottom)), + Point(int(left), int(bottom)), + ) + if array.ndim == 1 and array.size >= 8 and array.size % 2 == 0: + array = array.reshape(-1, 2) + if array.ndim != 2 or len(array) < 4 or array.shape[-1] < 2: + return None + return QuadPoints( + *[Point(int(coordinates[0]), int(coordinates[1])) + for coordinates in array[:4]] + ) + + +def _parse_output_object(predicted: Any) -> list[TextData] | None: + if not hasattr(predicted, "boxes") and not hasattr(predicted, "txts"): + return None + boxes = getattr(predicted, "boxes", None) + texts = getattr(predicted, "txts", None) + if boxes is None or texts is None: + return [] + return _parse_boxes_and_texts(boxes, texts) + + +def _parse_result_item(item: Any) -> TextData | None: + if not isinstance(item, (list, tuple)) or len(item) < 2: + return None + points = _build_box_points(item[0]) + if points is None: + return None + return TextData(str(item[1]), points) + + +def _parse_result_list(predicted: Any) -> list[TextData]: + if predicted is None: + return [] + returned: list[TextData] = [] + try: + iterator = iter(predicted) + except TypeError: + return [] + for item in iterator: + text_data = _parse_result_item(item) + if text_data is not None: + returned.append(text_data) + return returned + + +def _parse_boxes_and_texts(boxes: Any, texts: Any) -> list[TextData]: + returned: list[TextData] = [] + for box, text in zip(boxes, texts): + points = _build_box_points(box) + if points is not None: + returned.append(TextData(str(text), points)) + return returned + + +def _parse_predict_result(predicted: Any) -> list[TextData]: + parsed = _parse_output_object(predicted) + if parsed is not None: + return parsed + if isinstance(predicted, tuple) and len(predicted) == 2: + predicted = predicted[0] + return _parse_result_list(predicted) + + +class RapidOcrReader(BaseTextReader): + """Read text from images by using RapidOCR.""" + + def __init__(self, **settings: Any) -> None: + """Initialize a RapidOCR-backed text reader. + + :param settings: Additional RapidOCR reader settings. + :type settings: dict[str, object] + """ + rapidocr = importlib.import_module("rapidocr") + self.__reader = rapidocr.RapidOCR(**settings) + + def read_image( + self, + image: numpy.ndarray, + **settings: Any, + ) -> list[TextData]: + """Read text data from an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :param settings: Additional RapidOCR read settings. + :type settings: dict[str, object] + :return: Detected text data. + :rtype: list[TextData] + """ + predicted = self.__reader(image, **settings) + return _parse_predict_result(predicted) diff --git a/apparser/text_readers/screens_controller.py b/apparser/text_readers/readers/screens_controller.py similarity index 95% rename from apparser/text_readers/screens_controller.py rename to apparser/text_readers/readers/screens_controller.py index 1262355..e8fe85f 100644 --- a/apparser/text_readers/screens_controller.py +++ b/apparser/text_readers/readers/screens_controller.py @@ -1,6 +1,6 @@ import numpy -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers.readers.base import BaseTextReader from apparser.text_readers.models.text_data import TextData diff --git a/apparser/text_readers/white_black_reader.py b/apparser/text_readers/readers/white_black_reader.py similarity index 93% rename from apparser/text_readers/white_black_reader.py rename to apparser/text_readers/readers/white_black_reader.py index a16984a..05a9394 100644 --- a/apparser/text_readers/white_black_reader.py +++ b/apparser/text_readers/readers/white_black_reader.py @@ -1,6 +1,6 @@ import numpy -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers.readers.base import BaseTextReader from apparser.text_readers.models.text_data import TextData from PIL import Image diff --git a/apparser/text_readers/scanners/__init__.py b/apparser/text_readers/scanners/__init__.py new file mode 100644 index 0000000..ce74c4f --- /dev/null +++ b/apparser/text_readers/scanners/__init__.py @@ -0,0 +1,4 @@ +from apparser.text_readers.scanners.base import BaseTextScanner +from apparser.text_readers.scanners.trocr import TrocrScanner + +__all__ = ["BaseTextScanner", "TrocrScanner"] diff --git a/apparser/text_readers/scanners/base.py b/apparser/text_readers/scanners/base.py new file mode 100644 index 0000000..d48451a --- /dev/null +++ b/apparser/text_readers/scanners/base.py @@ -0,0 +1,18 @@ +import abc + +import numpy + + +class BaseTextScanner(abc.ABC): + """Define the common interface for text scanner backends.""" + + @abc.abstractmethod + def read_image(self, image: numpy.ndarray) -> str: + """Read text from an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :return: Detected text. + :rtype: str + """ + pass diff --git a/apparser/text_readers/scanners/trocr.py b/apparser/text_readers/scanners/trocr.py new file mode 100644 index 0000000..fadd290 --- /dev/null +++ b/apparser/text_readers/scanners/trocr.py @@ -0,0 +1,59 @@ +import importlib +import numpy +from PIL import Image + +from apparser.text_readers.scanners.base import BaseTextScanner + + +class TrocrScanner(BaseTextScanner): + """Read text from images by using TrOCR.""" + + def __init__(self, model_name="microsoft/trocr-base-printed", + processor_name=None, device=None): + """Initialize a TrOCR-backed text scanner. + + :param model_name: Vision encoder-decoder model name. + :type model_name: str + :param processor_name: Processor model name. If None, use ``model_name``. + :type processor_name: str | None + :param device: Device used for inference. If None, choose CUDA when available. + :type device: str | None + """ + transformers = importlib.import_module("transformers") + torch = importlib.import_module("torch") + + self.__pil_image_module = Image + + if processor_name is None: + processor_name = model_name + + self.__processor = transformers.TrOCRProcessor.from_pretrained( + processor_name + ) + self.__model = transformers.VisionEncoderDecoderModel.from_pretrained( + model_name + ) + + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.__device = device + self.__model.to(self.__device) + + def read_image(self, image: numpy.ndarray) -> str: + """Read text from an image. + + :param image: Image data to process. + :type image: numpy.ndarray + :return: Detected text. + :rtype: str + """ + pil_image = self.__pil_image_module.fromarray(image).convert("RGB") + pixel_values = self.__processor( + images=pil_image, return_tensors="pt" + ).pixel_values + pixel_values = pixel_values.to(self.__device) + generated_ids = self.__model.generate(pixel_values) + generated_text = self.__processor.batch_decode( + generated_ids, skip_special_tokens=True + )[0] + return generated_text diff --git a/docs/api/geometry/index.rst b/docs/api/geometry/index.rst index eeb5a34..f4818aa 100644 --- a/docs/api/geometry/index.rst +++ b/docs/api/geometry/index.rst @@ -7,5 +7,6 @@ geometry Point Size + QuadPoints RelativelyPoint distance diff --git a/docs/api/text_readers/CompoundReader.rst b/docs/api/text_readers/CompoundReader.rst new file mode 100644 index 0000000..8b53fdf --- /dev/null +++ b/docs/api/text_readers/CompoundReader.rst @@ -0,0 +1,10 @@ +CompoundReader +================ + +.. currentmodule:: apparser.text_readers + +.. autoclass:: CompoundReader + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/RapidOcrReader.rst b/docs/api/text_readers/RapidOcrReader.rst new file mode 100644 index 0000000..ded60ee --- /dev/null +++ b/docs/api/text_readers/RapidOcrReader.rst @@ -0,0 +1,10 @@ +RapidOcrReader +================ + +.. currentmodule:: apparser.text_readers + +.. autoclass:: RapidOcrReader + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/detectors/BaseTextDetector.rst b/docs/api/text_readers/detectors/BaseTextDetector.rst new file mode 100644 index 0000000..226c37e --- /dev/null +++ b/docs/api/text_readers/detectors/BaseTextDetector.rst @@ -0,0 +1,10 @@ +BaseTextDetector +==================== + +.. currentmodule:: apparser.text_readers.detectors + +.. autoclass:: BaseTextDetector + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/detectors/EasyOcrDetector.rst b/docs/api/text_readers/detectors/EasyOcrDetector.rst new file mode 100644 index 0000000..02b442c --- /dev/null +++ b/docs/api/text_readers/detectors/EasyOcrDetector.rst @@ -0,0 +1,10 @@ +EasyOcrDetector +===================== + +.. currentmodule:: apparser.text_readers.detectors + +.. autoclass:: EasyOcrDetector + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/detectors/index.rst b/docs/api/text_readers/detectors/index.rst new file mode 100644 index 0000000..ba34567 --- /dev/null +++ b/docs/api/text_readers/detectors/index.rst @@ -0,0 +1,10 @@ +detectors +===================== + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + BaseTextDetector + EasyOcrDetector + diff --git a/docs/api/text_readers/index.rst b/docs/api/text_readers/index.rst index 187174e..fedce76 100644 --- a/docs/api/text_readers/index.rst +++ b/docs/api/text_readers/index.rst @@ -1,13 +1,29 @@ text_readers ===================== +Modules +-------- + +.. toctree:: + :maxdepth: 2 + :titlesonly: + + detectors/index + scanners/index + +Classes +-------------- + .. toctree:: :maxdepth: 1 :titlesonly: BaseTextReader EasyOcrReader + RapidOcrReader PaddleTextReader ScreensController WhiteBlackReader + CompoundReader TextData + diff --git a/docs/api/text_readers/scanners/BaseTextScanner.rst b/docs/api/text_readers/scanners/BaseTextScanner.rst new file mode 100644 index 0000000..8f2f984 --- /dev/null +++ b/docs/api/text_readers/scanners/BaseTextScanner.rst @@ -0,0 +1,10 @@ +BaseTextScanner +=================== + +.. currentmodule:: apparser.text_readers.scanners + +.. autoclass:: BaseTextScanner + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/scanners/TrocrScanner.rst b/docs/api/text_readers/scanners/TrocrScanner.rst new file mode 100644 index 0000000..cd0baf5 --- /dev/null +++ b/docs/api/text_readers/scanners/TrocrScanner.rst @@ -0,0 +1,10 @@ +TrocrScanner +============== + +.. currentmodule:: apparser.text_readers.scanners + +.. autoclass:: TrocrScanner + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource diff --git a/docs/api/text_readers/scanners/index.rst b/docs/api/text_readers/scanners/index.rst new file mode 100644 index 0000000..4558f28 --- /dev/null +++ b/docs/api/text_readers/scanners/index.rst @@ -0,0 +1,10 @@ +scanners +===================== + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + BaseTextScanner + TrocrScanner + diff --git a/docs/examples/ocr.rst b/docs/examples/ocr.rst index 1d4d71b..da23d16 100644 --- a/docs/examples/ocr.rst +++ b/docs/examples/ocr.rst @@ -46,13 +46,17 @@ Code app = App("Notepad", window_title="Notepad") + configure_algorithm.perform(app.ui) + ui = WindowByDisplayUi(app.ui.window) - while True: + try: hello_world_algorithm.perform(ui) new_tab_algorithm.perform(ui) + finally: + app.stop_app() Video -------- -.. image:: ../_static/ocr.gif \ No newline at end of file +.. image:: ../_static/ocr.gif diff --git a/docs/index.rst b/docs/index.rst index 16ea236..c6d746e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,16 +7,22 @@ Apparser .. raw:: html

- PyPi - Github + License + Unit tests +
+ PyPI downloads + Documentation +
+ PyPI + GitHub Issues

-Apparser is a Python library designed for automating desktop applications and managing UI interfaces using artificial intelligence, such as OCR or object detection models. +Apparser is a Python library for automating desktop applications and interacting with UIs using AI-powered tools such as OCR and object detection models. -Link to `PyPi `__ +Package page on `PyPI `__ -Link to `GitHub `__ +Repository on `GitHub `__ Donation =========== @@ -24,21 +30,21 @@ If you'd like to financially support the developers for their work: .. raw:: html -

- Donation link +

+ Donation link

-Contribution +Contributing =============== -1. If something doesn't work - open issue. -2. If you want something fixed - open issue. -3. If you can help with the library - email. +1. If something doesn't work, open an issue. +2. If you want something fixed, open an issue. +3. If you can help with the library, email us. apparser.development@gmail.com -Any help in development is welcome!) +Contributions are welcome! diff --git a/docs/info/about.rst b/docs/info/about.rst index 436b0d1..2826f6e 100644 --- a/docs/info/about.rst +++ b/docs/info/about.rst @@ -7,16 +7,22 @@ Apparser .. raw:: html

- PyPi - Github + License + Unit tests +
+ PyPI downloads + Documentation +
+ PyPI + GitHub Issues

-Apparser is a Python library designed for automating desktop applications and managing UI interfaces using artificial intelligence, such as OCR or object detection models. +Apparser is a Python library for automating desktop applications and interacting with UIs using AI-powered tools such as OCR and object detection models. -Link to `PyPi `__ +Package page on `PyPI `__ -Link to `GitHub `__ +Repository on `GitHub `__ Donation ---------- @@ -25,18 +31,18 @@ If you'd like to financially support the developers for their work: .. raw:: html -

- Donation link +

+ Donation link

-Contribution +Contributing -------------- -1. If something doesn't work - open issue. -2. If you want something fixed - open issue. -3. If you can help with the library - email. +1. If something doesn't work, open an issue. +2. If you want something fixed, open an issue. +3. If you can help with the library, email us. apparser.development@gmail.com -Any help in development is welcome!) +Contributions are welcome! diff --git a/docs/info/instructions_ids.rst b/docs/info/instructions_ids.rst index d06207e..bec6635 100644 --- a/docs/info/instructions_ids.rst +++ b/docs/info/instructions_ids.rst @@ -81,6 +81,18 @@ Instructions available through get_instruction_by_id() * - ``9`` - ``Sleep`` - Pause execution for a fixed amount of time + * - ``10`` + - ``PressKeyDown`` + - Press a single key down + * - ``11`` + - ``PressKeyUp`` + - Release a single key + * - ``12`` + - ``MouseUp`` + - Release a mouse button + * - ``13`` + - ``MouseDown`` + - Press a mouse button down * - ``1000`` - ``WindowToForeground`` - Bring the window to the foreground @@ -114,6 +126,9 @@ Instructions available through get_instruction_by_id() * - ``2004`` - ``PlotAllText`` - Draw detected text on top of a screenshot + * - ``2005`` + - ``WaitText`` + - Wait until matching text appears * - ``3000`` - ``PlayTextAudio`` - Synthesize text and play it as regular audio @@ -148,4 +163,4 @@ Classes with their own id but not resolved by get_instruction_by_id() - Execute instructions with a provided ``speaker`` * - ``1505`` - ``UniqueAlgorithm`` - - Inject dependencies into instructions by argument type \ No newline at end of file + - Inject dependencies into instructions by argument type diff --git a/example.gif b/example.gif new file mode 100644 index 0000000..fd9ec70 Binary files /dev/null and b/example.gif differ diff --git a/pyproject.toml b/pyproject.toml index 80b3efc..a0eb667 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "thefuzz >= 0.20.0" ] name = "apparser" -version = "1.0.0" +version = "1.1.0" authors = [ { name = "Terochkin A.S", email = "apparser.development@gmail.com" }, ] @@ -59,7 +59,9 @@ cv = [ ocr = [ "easyocr >= 1.7.2, < 2.0", "paddleocr >= 3.5.0", - "paddlepaddle >= 3.3.1" + "paddlepaddle >= 3.3.1", + "rapidocr>=3.0.0", + "onnxruntime>=1.20.1" ] speak = [ diff --git a/tests/apparser/core/test_app.py b/tests/apparser/core/test_app.py index c18dc7e..4f51e6a 100644 --- a/tests/apparser/core/test_app.py +++ b/tests/apparser/core/test_app.py @@ -10,12 +10,16 @@ class FakeProcess: - def __init__(self) -> None: + def __init__(self, pid = 0) -> None: self.kill_calls = 0 + self.__pid = pid def kill(self) -> None: self.kill_calls += 1 + @property + def pid(self) -> int: + return self.__pid class FakeWindowUi: def __init__(self, window: FakeWindow) -> None: diff --git a/tests/apparser/cv/utils/test_changes_checker.py b/tests/apparser/cv/utils/test_changes_checker.py index f205d25..84aa970 100644 --- a/tests/apparser/cv/utils/test_changes_checker.py +++ b/tests/apparser/cv/utils/test_changes_checker.py @@ -21,7 +21,8 @@ def test_is_resized_requires_both_dimensions_to_change() -> None: only_width = CvBox("button", 1, 1, 2, 5, 4, ui) assert _is_resized(changed, first) is True - assert _is_resized(only_width, first) is False + assert _is_resized(only_width, first) is True + assert _is_resized(first, first) is False def test_changes_checker_reports_detected_moved_resized_and_undetected() -> None: diff --git a/tests/apparser/instructions/default/test_press.py b/tests/apparser/instructions/default/test_press.py index 81d3425..f9b496a 100644 --- a/tests/apparser/instructions/default/test_press.py +++ b/tests/apparser/instructions/default/test_press.py @@ -33,9 +33,8 @@ def test_press_keys_combination_presses_and_releases_keys() -> None: def test_press_keys_combination_rejects_invalid_key_on_perform() -> None: - instruction = PressKeysCombination([object()]) with pytest.raises(TypeError): - instruction.perform() + PressKeysCombination([object()]) def test_press_key_down_sends_key_down() -> None: diff --git a/tests/apparser/instructions/ocr/test_move_to_text.py b/tests/apparser/instructions/ocr/test_move_to_text.py index 87b66e2..3032511 100644 --- a/tests/apparser/instructions/ocr/test_move_to_text.py +++ b/tests/apparser/instructions/ocr/test_move_to_text.py @@ -1,7 +1,7 @@ from __future__ import annotations import pytest -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.exceptions import TextNotFoundException from apparser.instructions.ocr.move_to_text import MoveToText @@ -50,7 +50,7 @@ def test_move_to_text_rejects_low_similarity(monkeypatch: pytest.MonkeyPatch) -> def test_move_to_text_moves_to_text_center(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__local_answer = [ - TextData("hello", [Point(0, 0), Point(4, 0), Point(4, 4), Point(0, 4)]) + TextData("hello", QuadPoints(Point(0, 0), Point(4, 0), Point(4, 4), Point(0, 4))) ] monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) moved_to: list[Point] = [] diff --git a/tests/apparser/instructions/ocr/test_plot_text.py b/tests/apparser/instructions/ocr/test_plot_text.py index e440f4a..9cea6b6 100644 --- a/tests/apparser/instructions/ocr/test_plot_text.py +++ b/tests/apparser/instructions/ocr/test_plot_text.py @@ -3,7 +3,7 @@ from unittest.mock import Mock import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.instructions.ocr.plot_text import PlotAllText, _Painter from apparser.instructions.ocr.text_getter import GetText @@ -14,7 +14,7 @@ def test_painter_draws_rectangle_and_text() -> None: draw = Mock() painter = _Painter(draw, (255, 255, 255, 255)) - data = TextData("hello", [Point(1, 2), Point(3, 2), Point(3, 4), Point(1, 4)]) + data = TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) painter.draw([data]) @@ -25,7 +25,7 @@ def test_painter_draws_rectangle_and_text() -> None: def test_plot_all_text_draws_and_shows_image(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__global_answer = [ - TextData("hello", [Point(1, 2), Point(3, 2), Point(3, 4), Point(1, 4)]) + TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) ] getter._GetText__screenshot = numpy.zeros((4, 4, 3), dtype=numpy.uint8) monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) diff --git a/tests/apparser/instructions/ocr/test_print_all_text.py b/tests/apparser/instructions/ocr/test_print_all_text.py index 06ecae1..cf9ec05 100644 --- a/tests/apparser/instructions/ocr/test_print_all_text.py +++ b/tests/apparser/instructions/ocr/test_print_all_text.py @@ -1,6 +1,6 @@ from __future__ import annotations -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.instructions.ocr.print_all_text import PrintAllText from apparser.instructions.ocr.text_getter import GetText @@ -11,7 +11,7 @@ def test_print_all_text_prints_each_entry(monkeypatch: pytest.MonkeyPatch) -> None: getter = GetText() getter._GetText__local_answer = [ - TextData("hello", [Point(1, 2), Point(3, 4)]), + TextData("hello", QuadPoints(Point(1, 2), Point(3, 4), Point(3, 4), Point(3, 4))), ] printed: list[str] = [] monkeypatch.setattr(getter, "perform", lambda ui, text_reader: None) @@ -20,5 +20,5 @@ def test_print_all_text_prints_each_entry(monkeypatch: pytest.MonkeyPatch) -> No instruction.perform(FakeUi(), FakeTextReader()) - assert printed == ['text: "hello", coordinates: Point(x = 1, y = 2) Point(x = 3, y = 4) '] + assert printed == ['text: "hello", coordinates: QuadPoints(left_top = Point(x = 1, y = 2), right_top = Point(x = 3, y = 4), right_bottom = Point(x = 3, y = 4), left_bottom = Point(x = 3, y = 4))'] assert instruction.id == 2003 diff --git a/tests/apparser/instructions/ocr/test_text_getter.py b/tests/apparser/instructions/ocr/test_text_getter.py index a18e96a..d16ba0c 100644 --- a/tests/apparser/instructions/ocr/test_text_getter.py +++ b/tests/apparser/instructions/ocr/test_text_getter.py @@ -1,7 +1,7 @@ from __future__ import annotations import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.geometry import RelativelyPoint from apparser.instructions.ocr.text_getter import GetText @@ -15,7 +15,7 @@ def test_text_getter_reads_and_converts_coordinates() -> None: result=[ TextData( "hello", - [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)], + QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ) ] ) @@ -25,14 +25,14 @@ def test_text_getter_reads_and_converts_coordinates() -> None: instruction.perform(ui, reader) assert reader.images[0].shape == (2, 3, 3) - assert instruction.global_answer[0].coordinates[0] == Point(0, 0) - assert instruction.local_answer[0].coordinates[0] == Point(1, 1) + assert instruction.global_answer[0].coordinates.left_top == Point(0, 0) + assert instruction.local_answer[0].coordinates.left_top == Point(1, 1) assert instruction.screenshot.shape == (2, 3, 3) def test_text_getter_respects_cached_result() -> None: reader = FakeTextReader( - result=[TextData("hello", [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)])] + result=[TextData("hello", QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), )] ) instruction = GetText( RelativelyPoint(0, 0), diff --git a/tests/apparser/instructions/ocr/test_wait_text.py b/tests/apparser/instructions/ocr/test_wait_text.py index 0b5269c..f0081df 100644 --- a/tests/apparser/instructions/ocr/test_wait_text.py +++ b/tests/apparser/instructions/ocr/test_wait_text.py @@ -1,7 +1,7 @@ from __future__ import annotations import pytest -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints from apparser.exceptions import TimeoutException from apparser.instructions.ocr.text_getter import GetText @@ -15,7 +15,7 @@ def test_wait_text_returns_when_text_is_found() -> None: result=[ TextData( "hello", - [Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)], + QuadPoints(Point(0, 0), Point(1, 0), Point(1, 1), Point(0, 1)), ), ], ) diff --git a/tests/apparser/text_readers/detectors/__init__.py b/tests/apparser/text_readers/detectors/__init__.py new file mode 100644 index 0000000..9d48db4 --- /dev/null +++ b/tests/apparser/text_readers/detectors/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/tests/apparser/text_readers/detectors/test_base.py b/tests/apparser/text_readers/detectors/test_base.py new file mode 100644 index 0000000..c7558ea --- /dev/null +++ b/tests/apparser/text_readers/detectors/test_base.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import pytest + +from apparser.text_readers.detectors import BaseTextDetector + + +def test_base_text_detector_is_abstract() -> None: + with pytest.raises(TypeError): + BaseTextDetector() diff --git a/tests/apparser/text_readers/detectors/test_easy_ocr.py b/tests/apparser/text_readers/detectors/test_easy_ocr.py new file mode 100644 index 0000000..f2cf5ed --- /dev/null +++ b/tests/apparser/text_readers/detectors/test_easy_ocr.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import numpy +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers.detectors import EasyOcrDetector +from tests.utils import easyocr_stub + + +def test_easy_ocr_detector_uses_default_language() -> None: + EasyOcrDetector() + + instance = easyocr_stub.Reader.instances[0] + assert instance.lang_list == ["en"] + assert instance.settings == { + "detector": True, + "recognizer": False, + } + + +def test_easy_ocr_detector_maps_detected_boxes() -> None: + detector = EasyOcrDetector(["ru"], gpu=False) + instance = easyocr_stub.Reader.instances[0] + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + instance.detected = ( + [ + [ + [1.1, 5.8, 2.2, 4.9], + ], + ], + [ + [ + [ + [6.1, 7.2], + [8.3, 9.4], + [10.5, 11.6], + [12.7, 13.8], + ], + ], + ], + ) + + result = detector.read_image(image, slope_ths=0.1) + + first_points = QuadPoints( + Point(1, 2), + Point(5, 2), + Point(5, 4), + Point(1, 4), + ) + second_points = QuadPoints( + Point(6, 7), + Point(8, 9), + Point(10, 11), + Point(12, 13), + ) + assert instance.settings == { + "detector": True, + "recognizer": False, + "gpu": False, + } + assert instance.detect_calls[0]["image"] is image + assert instance.detect_calls[0]["settings"] == {"slope_ths": 0.1} + assert result[0].left_top == first_points.left_top + assert result[0].right_top == first_points.right_top + assert result[0].right_bottom == first_points.right_bottom + assert result[0].left_bottom == first_points.left_bottom + assert result[1].left_top == second_points.left_top + assert result[1].right_top == second_points.right_top + assert result[1].right_bottom == second_points.right_bottom + assert result[1].left_bottom == second_points.left_bottom diff --git a/tests/apparser/text_readers/scanners/__init__.py b/tests/apparser/text_readers/scanners/__init__.py new file mode 100644 index 0000000..9d48db4 --- /dev/null +++ b/tests/apparser/text_readers/scanners/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/tests/apparser/text_readers/scanners/test_base.py b/tests/apparser/text_readers/scanners/test_base.py new file mode 100644 index 0000000..724ec1f --- /dev/null +++ b/tests/apparser/text_readers/scanners/test_base.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import pytest + +from apparser.text_readers.scanners import BaseTextScanner + + +def test_base_text_scanner_is_abstract() -> None: + with pytest.raises(TypeError): + BaseTextScanner() diff --git a/tests/apparser/text_readers/scanners/test_trocr.py b/tests/apparser/text_readers/scanners/test_trocr.py new file mode 100644 index 0000000..8583ca5 --- /dev/null +++ b/tests/apparser/text_readers/scanners/test_trocr.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import sys +from types import ModuleType +from typing import Any + +import numpy +import pytest + +from apparser.text_readers.scanners import TrocrScanner + + +class PixelValuesStub: + def __init__(self) -> None: + self.device: str | None = None + + def to(self, device: str) -> "PixelValuesStub": + self.device = device + return self + + +class ProcessorResultStub: + def __init__(self, pixel_values: PixelValuesStub) -> None: + self.pixel_values = pixel_values + + +class ProcessorStub: + instances: list["ProcessorStub"] = [] + + def __init__(self, name: str) -> None: + self.name = name + self.pixel_values = PixelValuesStub() + self.calls: list[dict[str, Any]] = [] + self.decode_calls: list[dict[str, Any]] = [] + self.__class__.instances.append(self) + + @classmethod + def from_pretrained(cls, name: str) -> "ProcessorStub": + return cls(name) + + def __call__( + self, + images: Any, + return_tensors: str, + ) -> ProcessorResultStub: + self.calls.append( + { + "images": images, + "return_tensors": return_tensors, + } + ) + return ProcessorResultStub(self.pixel_values) + + def batch_decode( + self, + generated_ids: list[int], + skip_special_tokens: bool, + ) -> list[str]: + self.decode_calls.append( + { + "generated_ids": generated_ids, + "skip_special_tokens": skip_special_tokens, + } + ) + return ["recognized text"] + + +class ModelStub: + instances: list["ModelStub"] = [] + + def __init__(self, name: str) -> None: + self.name = name + self.device: str | None = None + self.generate_calls: list[PixelValuesStub] = [] + self.__class__.instances.append(self) + + @classmethod + def from_pretrained(cls, name: str) -> "ModelStub": + return cls(name) + + def to(self, device: str) -> None: + self.device = device + + def generate(self, pixel_values: PixelValuesStub) -> list[int]: + self.generate_calls.append(pixel_values) + return [1, 2, 3] + + +class TransformersStub(ModuleType): + def __init__(self) -> None: + super().__init__("transformers") + ProcessorStub.instances = [] + ModelStub.instances = [] + self.TrOCRProcessor = ProcessorStub + self.VisionEncoderDecoderModel = ModelStub + + +class CudaStub: + def is_available(self) -> bool: + return True + + +class TorchModuleStub(ModuleType): + def __init__(self) -> None: + super().__init__("torch") + self.cuda = CudaStub() + + +def test_trocr_scanner_reads_text(monkeypatch: pytest.MonkeyPatch) -> None: + transformers_stub = TransformersStub() + torch_stub = TorchModuleStub() + monkeypatch.setitem(sys.modules, "transformers", transformers_stub) + monkeypatch.setitem(sys.modules, "torch", torch_stub) + scanner = TrocrScanner( + model_name="model", + processor_name="processor", + ) + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + + result = scanner.read_image(image) + + processor = ProcessorStub.instances[0] + model = ModelStub.instances[0] + assert processor.name == "processor" + assert model.name == "model" + assert model.device == "cuda" + assert processor.pixel_values.device == "cuda" + assert model.generate_calls == [processor.pixel_values] + assert processor.decode_calls == [ + { + "generated_ids": [1, 2, 3], + "skip_special_tokens": True, + } + ] + assert result == "recognized text" diff --git a/tests/apparser/text_readers/test_base.py b/tests/apparser/text_readers/test_base.py index e2c87b6..14343aa 100644 --- a/tests/apparser/text_readers/test_base.py +++ b/tests/apparser/text_readers/test_base.py @@ -2,7 +2,7 @@ import pytest -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader def test_base_text_reader_is_abstract() -> None: diff --git a/tests/apparser/text_readers/test_compound.py b/tests/apparser/text_readers/test_compound.py new file mode 100644 index 0000000..c2ec385 --- /dev/null +++ b/tests/apparser/text_readers/test_compound.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import Any + +import numpy +import pytest +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers import CompoundReader +from apparser.text_readers.detectors import BaseTextDetector +from apparser.text_readers.scanners import BaseTextScanner + + +class DetectorStub(BaseTextDetector): + def __init__(self, result: list[QuadPoints]) -> None: + self.result = result + self.images: list[numpy.ndarray] = [] + + def read_image(self, image: numpy.ndarray) -> list[QuadPoints]: + self.images.append(image) + return self.result + + +class ScannerStub(BaseTextScanner): + def __init__(self, result: str) -> None: + self.result = result + self.images: list[numpy.ndarray] = [] + + def read_image(self, image: numpy.ndarray) -> str: + self.images.append(image) + return self.result + + +def assert_quad_points_equal( + first: QuadPoints, + second: QuadPoints, +) -> None: + assert first.left_top == second.left_top + assert first.right_top == second.right_top + assert first.right_bottom == second.right_bottom + assert first.left_bottom == second.left_bottom + + +@pytest.mark.parametrize( + ("detector", "scanner"), + [ + (object(), ScannerStub("text")), + (DetectorStub([]), object()), + ], +) +def test_compound_reader_rejects_invalid_backends( + detector: Any, + scanner: Any, +) -> None: + with pytest.raises(TypeError): + CompoundReader(detector, scanner) + + +def test_compound_reader_detects_and_scans_text() -> None: + coordinates = QuadPoints( + Point(1, 1), + Point(3, 1), + Point(3, 3), + Point(1, 3), + ) + detector = DetectorStub([coordinates]) + scanner = ScannerStub("text") + reader = CompoundReader(detector, scanner) + image = numpy.zeros((4, 4, 3), dtype=numpy.uint8) + + result = reader.read_image(image) + + assert detector.images[0] is image + assert scanner.images[0].shape == (2, 2, 3) + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, coordinates) diff --git a/tests/apparser/text_readers/test_easy_ocr.py b/tests/apparser/text_readers/test_easy_ocr.py index 94c21a3..81e2e74 100644 --- a/tests/apparser/text_readers/test_easy_ocr.py +++ b/tests/apparser/text_readers/test_easy_ocr.py @@ -1,9 +1,9 @@ from __future__ import annotations import numpy -from appwindows.geometry import Point +from appwindows.geometry import Point, QuadPoints -from apparser.text_readers.easy_ocr import EasyOcrReader +from apparser.text_readers import EasyOcrReader from tests.utils import easyocr_stub @@ -30,9 +30,14 @@ def test_easy_ocr_reader_maps_prediction_result() -> None: assert instance.settings == {"gpu": False} assert instance.read_calls[0]["settings"] == {"detail": 1} assert result[0].text == "text" - assert result[0].coordinates == [ + + result_point = QuadPoints( Point(1, 2), Point(3, 4), Point(5, 6), Point(7, 8), - ] + ) + assert result[0].coordinates.left_top == result_point.left_top + assert result[0].coordinates.right_bottom == result_point.right_bottom + assert result[0].coordinates.right_top == result_point.right_top + assert result[0].coordinates.left_bottom == result_point.left_bottom \ No newline at end of file diff --git a/tests/apparser/text_readers/test_paddle_ocr.py b/tests/apparser/text_readers/test_paddle_ocr.py index 09c6f75..ff6e818 100644 --- a/tests/apparser/text_readers/test_paddle_ocr.py +++ b/tests/apparser/text_readers/test_paddle_ocr.py @@ -1,6 +1,6 @@ import numpy -from apparser.text_readers.paddle import ( +from apparser.text_readers import ( PaddleTextReader, ) from tests.utils import paddleocr_stub diff --git a/tests/apparser/text_readers/test_rapid_ocr.py b/tests/apparser/text_readers/test_rapid_ocr.py new file mode 100644 index 0000000..ba4d5d6 --- /dev/null +++ b/tests/apparser/text_readers/test_rapid_ocr.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import sys +from types import ModuleType +from typing import Any + +import numpy +import pytest +from appwindows.geometry import Point, QuadPoints + +from apparser.text_readers import RapidOcrReader + + +class RapidOcrEngineStub: + instances: list["RapidOcrEngineStub"] = [] + + def __init__(self, **settings: Any) -> None: + self.settings = settings + self.result: Any = [] + self.calls: list[dict[str, Any]] = [] + self.__class__.instances.append(self) + + def __call__(self, image: numpy.ndarray, **settings: Any) -> Any: + self.calls.append({"image": image, "settings": settings}) + return self.result + + +class RapidOcrStub(ModuleType): + def __init__(self) -> None: + super().__init__("rapidocr") + RapidOcrEngineStub.instances = [] + self.RapidOCR = RapidOcrEngineStub + + +class RapidOcrOutputStub: + def __init__(self, boxes: list[Any], txts: list[str]) -> None: + self.boxes = boxes + self.txts = txts + + +def assert_quad_points_equal( + first: QuadPoints, + second: QuadPoints, +) -> None: + assert first.left_top == second.left_top + assert first.right_top == second.right_top + assert first.right_bottom == second.right_bottom + assert first.left_bottom == second.left_bottom + + +def test_rapid_ocr_reader_maps_result_list( + monkeypatch: pytest.MonkeyPatch, +) -> None: + rapidocr_stub = RapidOcrStub() + monkeypatch.setitem(sys.modules, "rapidocr", rapidocr_stub) + reader = RapidOcrReader(device="cpu") + instance = RapidOcrEngineStub.instances[0] + image = numpy.zeros((2, 2, 3), dtype=numpy.uint8) + instance.result = [ + ( + [ + [1.1, 2.2], + [3.3, 4.4], + [5.5, 6.6], + [7.7, 8.8], + ], + "text", + ), + ] + + result = reader.read_image(image, use_det=True) + + expected_points = QuadPoints( + Point(1, 2), + Point(3, 4), + Point(5, 6), + Point(7, 8), + ) + assert instance.settings == {"device": "cpu"} + assert instance.calls[0]["image"] is image + assert instance.calls[0]["settings"] == {"use_det": True} + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, expected_points) + + +def test_rapid_ocr_reader_maps_output_object( + monkeypatch: pytest.MonkeyPatch, +) -> None: + rapidocr_stub = RapidOcrStub() + monkeypatch.setitem(sys.modules, "rapidocr", rapidocr_stub) + reader = RapidOcrReader() + instance = RapidOcrEngineStub.instances[0] + instance.result = RapidOcrOutputStub( + boxes=[ + [1, 2, 3, 4], + ], + txts=["text"], + ) + + result = reader.read_image(numpy.zeros((2, 2, 3), dtype=numpy.uint8)) + + expected_points = QuadPoints( + Point(1, 2), + Point(3, 2), + Point(3, 4), + Point(1, 4), + ) + assert result[0].text == "text" + assert_quad_points_equal(result[0].coordinates, expected_points) diff --git a/tests/apparser/text_readers/test_screens_controller.py b/tests/apparser/text_readers/test_screens_controller.py index 11f480d..0a0faed 100644 --- a/tests/apparser/text_readers/test_screens_controller.py +++ b/tests/apparser/text_readers/test_screens_controller.py @@ -2,7 +2,7 @@ import numpy -from apparser.text_readers.screens_controller import ScreensController +from apparser.text_readers import ScreensController from tests.utils import FakeTextReader diff --git a/tests/apparser/text_readers/test_white_black_reader.py b/tests/apparser/text_readers/test_white_black_reader.py index 0b24106..a1770d8 100644 --- a/tests/apparser/text_readers/test_white_black_reader.py +++ b/tests/apparser/text_readers/test_white_black_reader.py @@ -2,7 +2,7 @@ import numpy -from apparser.text_readers.white_black_reader import WhiteBlackReader +from apparser.text_readers import WhiteBlackReader from tests.utils import FakeTextReader diff --git a/tests/utils/fakes/backends/fake_text_reader.py b/tests/utils/fakes/backends/fake_text_reader.py index 252f640..fa9fdcc 100644 --- a/tests/utils/fakes/backends/fake_text_reader.py +++ b/tests/utils/fakes/backends/fake_text_reader.py @@ -9,7 +9,7 @@ install_external_stubs() -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader class FakeTextReader(BaseTextReader): diff --git a/tests/utils/fakes/instructions/fake_ocr_instruction.py b/tests/utils/fakes/instructions/fake_ocr_instruction.py index 7193371..1535ccc 100644 --- a/tests/utils/fakes/instructions/fake_ocr_instruction.py +++ b/tests/utils/fakes/instructions/fake_ocr_instruction.py @@ -7,7 +7,7 @@ install_external_stubs() from apparser.core.ui.base import BaseUi -from apparser.text_readers.base import BaseTextReader +from apparser.text_readers import BaseTextReader class FakeOcrInstruction(FakeInstruction): diff --git a/tests/utils/stubs/text/easy_ocr_reader_stub.py b/tests/utils/stubs/text/easy_ocr_reader_stub.py index defe96b..fafb154 100644 --- a/tests/utils/stubs/text/easy_ocr_reader_stub.py +++ b/tests/utils/stubs/text/easy_ocr_reader_stub.py @@ -12,9 +12,15 @@ def __init__(self, lang_list: list[str], **settings: Any) -> None: self.lang_list = lang_list self.settings = settings self.predicted: list[Any] = [] + self.detected: Any = ([], []) self.read_calls: list[dict[str, Any]] = [] + self.detect_calls: list[dict[str, Any]] = [] self.__class__.instances.append(self) def readtext(self, image: numpy.ndarray, **settings: Any) -> list[Any]: self.read_calls.append({"image": image, "settings": settings}) return self.predicted + + def detect(self, image: numpy.ndarray, **settings: Any) -> Any: + self.detect_calls.append({"image": image, "settings": settings}) + return self.detected