refact: use ruff instead of autopep8,bandit,pylint for linting

2026-03-12 10:31:50 +01:00 · 2025-04-28 12:51:51 +02:00
parent f0b84ab335
commit 376ec76226
27 changed files with 437 additions and 605 deletions
--- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
+++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
@@ -1,26 +1,24 @@
-"""
-SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
-SPDX-License-Identifier: AGPL-3.0-or-later
-SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
-"""
-import asyncio, enum, inspect, json, os, platform, secrets, shutil, time
+# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
+import asyncio, enum, inspect, json, os, platform, secrets, shutil  # isort: skip
 from collections.abc import Callable, Coroutine, Iterable
 from gettext import gettext as _
-from typing import cast, Any, Final
+from typing import Any, Final, cast

 try:
    from typing import Never  # type: ignore[attr-defined,unused-ignore] # mypy
 except ImportError:
    from typing import NoReturn as Never  # Python <3.11

-import nodriver, psutil
+import nodriver, psutil  # isort: skip
 from nodriver.core.browser import Browser
 from nodriver.core.config import Config
 from nodriver.core.element import Element
 from nodriver.core.tab import Tab as Page

 from . import loggers, net
-from .misc import ensure, T
+from .misc import T, ensure

 __all__ = [
    "Browser",
@@ -70,8 +68,8 @@ class WebScrapingMixin:

    def __init__(self) -> None:
        self.browser_config:Final[BrowserConfig] = BrowserConfig()
-        self.browser:Browser = None  # pyright: ignore
-        self.page:Page = None  # pyright: ignore
+        self.browser:Browser = None  # pyright: ignore[reportAttributeAccessIssue]
+        self.page:Page = None  # pyright: ignore[reportAttributeAccessIssue]

    async def create_browser_session(self) -> None:
        LOG.info("Creating Browser session...")
@@ -96,7 +94,7 @@ class WebScrapingMixin:
        if remote_port > 0:
            LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
            ensure(net.is_port_open(remote_host, remote_port),
-                f"Browser process not reachable at {remote_host}:{remote_port}. " +
+                f"Browser process not reachable at {remote_host}:{remote_port}. "
                f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
            cfg = Config(
                browser_executable_path = self.browser_config.binary_location  # actually not necessary but nodriver fails without
@@ -208,14 +206,14 @@ class WebScrapingMixin:
    def close_browser_session(self) -> None:
        if self.browser:
            LOG.debug("Closing Browser session...")
-            self.page = None  # pyright: ignore
-            browser_process = psutil.Process(self.browser._process_pid)  # pylint: disable=protected-access
+            self.page = None  # pyright: ignore[reportAttributeAccessIssue]
+            browser_process = psutil.Process(self.browser._process_pid)  # noqa: SLF001 Private member accessed
            browser_children:list[psutil.Process] = browser_process.children()
            self.browser.stop()
            for p in browser_children:
                if p.is_running():
                    p.kill()  # terminate orphaned browser processes
-            self.browser = None  # pyright: ignore
+            self.browser = None  # pyright: ignore[reportAttributeAccessIssue]

    def get_compatible_browser(self) -> str:
        match platform.system():
@@ -236,15 +234,15 @@ class WebScrapingMixin:

            case "Windows":
                browser_paths = [
-                    os.environ.get("ProgramFiles", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
-                    os.environ.get("ProgramFiles(x86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
+                    os.environ.get("PROGRAMFILES", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
+                    os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',

-                    os.environ["ProgramFiles"] + r'\Chromium\Application\chrome.exe',
-                    os.environ["ProgramFiles(x86)"] + r'\Chromium\Application\chrome.exe',
+                    os.environ["PROGRAMFILES"] + r'\Chromium\Application\chrome.exe',
+                    os.environ["PROGRAMFILES(X86)"] + r'\Chromium\Application\chrome.exe',
                    os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',

-                    os.environ["ProgramFiles"] + r'\Chrome\Application\chrome.exe',
-                    os.environ["ProgramFiles(x86)"] + r'\Chrome\Application\chrome.exe',
+                    os.environ["PROGRAMFILES"] + r'\Chrome\Application\chrome.exe',
+                    os.environ["PROGRAMFILES(X86)"] + r'\Chrome\Application\chrome.exe',
                    os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',

                    shutil.which("msedge.exe"),
@@ -277,7 +275,7 @@ class WebScrapingMixin:
            ex:Exception | None = None
            try:
                result_raw = condition()
-                result:T = (await result_raw) if inspect.isawaitable(result_raw) else result_raw
+                result:T = cast(T, await result_raw if inspect.isawaitable(result_raw) else result_raw)
                if result:
                    return result
            except Exception as ex1:
@@ -359,11 +357,11 @@ class WebScrapingMixin:
        _prev_jscode:str = getattr(self.__class__.web_execute, "_prev_jscode", "")
        if not (jscode == _prev_jscode or (jscode.startswith("window.scrollTo") and _prev_jscode.startswith("window.scrollTo"))):
            LOG.debug("web_execute(`%s`) = `%s`", jscode, result)
-        self.__class__.web_execute._prev_jscode = jscode  # type: ignore[attr-defined]  # pylint: disable=protected-access
+        self.__class__.web_execute._prev_jscode = jscode  # type: ignore[attr-defined]  # noqa: SLF001 Private member accessed

        return result

-    async def web_find(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> Element:
+    async def web_find(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> Element:
        """
        Locates an HTML element by the given selector type and value.

@@ -408,7 +406,7 @@ class WebScrapingMixin:

        raise AssertionError(_("Unsupported selector type: %s") % selector_type)

-    async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
+    async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> list[Element]:
        """
        Locates an HTML element by ID.

@@ -460,7 +458,7 @@ class WebScrapingMixin:
        await self.web_sleep()
        return input_field

-    async def web_open(self, url:str, *, timeout:int | float = 15000, reload_if_already_open:bool = False) -> None:
+    async def web_open(self, url:str, *, timeout:int | float = 15_000, reload_if_already_open:bool = False) -> None:
        """
        :param url: url to open in browser
        :param timeout: timespan in seconds within the page needs to be loaded
@@ -475,7 +473,7 @@ class WebScrapingMixin:
        await self.web_await(lambda: self.web_execute("document.readyState == 'complete'"), timeout = timeout,
                timeout_error_message = f"Page did not finish loading within {timeout} seconds.")

-    async def web_text(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> str:
+    async def web_text(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> str:
        return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
            function (elem) {
                let sel = window.getSelection()
@@ -489,10 +487,11 @@ class WebScrapingMixin:
            }
        """))

-    async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None:
+    async def web_sleep(self, min_ms:int = 1_000, max_ms:int = 2_500) -> None:
        duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
-        LOG.log(loggers.INFO if duration > 1500 else loggers.DEBUG, " ... pausing for %d ms ...", duration)
-        await self.page.sleep(duration / 1000)
+        LOG.log(loggers.INFO if duration > 1_500 else loggers.DEBUG,  # noqa: PLR2004 Magic value used in comparison
+                " ... pausing for %d ms ...", duration)
+        await self.page.sleep(duration / 1_000)

    async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,
            headers:dict[str, str] | None = None) -> dict[str, Any]:
@@ -524,7 +523,7 @@ class WebScrapingMixin:
        return response
    # pylint: enable=dangerous-default-value

-    async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
+    async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10_000, *, scroll_back_top: bool = False) -> None:
        """
        Smoothly scrolls the current web page down.

@@ -537,13 +536,13 @@ class WebScrapingMixin:
        while current_y_pos < bottom_y_pos:  # scroll in steps until bottom reached
            current_y_pos += scroll_length
            await self.web_execute(f'window.scrollTo(0, {current_y_pos})')  # scroll one step
-            time.sleep(scroll_length / scroll_speed)
+            await asyncio.sleep(scroll_length / scroll_speed)

        if scroll_back_top:  # scroll back to top in same style
            while current_y_pos > 0:
                current_y_pos -= scroll_length
                await self.web_execute(f'window.scrollTo(0, {current_y_pos})')
-                time.sleep(scroll_length / scroll_speed / 2)  # double speed
+                await asyncio.sleep(scroll_length / scroll_speed / 2)  # double speed

    async def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int | float = 5) -> Element:
        """