improve type hints

2026-03-12 02:31:45 +01:00 · 2023-09-17 14:26:35 +02:00
parent 70b187260f
commit 38a76572a4
5 changed files with 41 additions and 26 deletions
--- a/kleinanzeigen_bot/init.py
+++ b/kleinanzeigen_bot/init.py
@@ -13,6 +13,7 @@ from overrides import overrides
 from ruamel.yaml import YAML
 from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
 from selenium.webdriver.common.by import By
+from selenium.webdriver.remote.webelement import WebElement
 from selenium.webdriver.support import expected_conditions as EC

 from . import utils, resources, extract  # pylint: disable=W0406
@@ -409,6 +410,8 @@ class KleinanzeigenBot(SeleniumMixin):
        self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
        csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']")
        csrf_token = csrf_token_elem.get_attribute("content")
+        if csrf_token is None:
+            raise AssertionError("Expected CSRF Token not found in HTML content!")

        if self.delete_ads_by_title:
            published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
@@ -589,14 +592,14 @@ class KleinanzeigenBot(SeleniumMixin):

        # extract the ad id from the URL's query parameter
        current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
-        ad_id = int(current_url_query_params.get("adId", None)[0])
+        ad_id = int(current_url_query_params.get("adId", [])[0])
        ad_cfg_orig["id"] = ad_id

        LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)

        utils.save_dict(ad_file, ad_cfg_orig)

-    def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]):
+    def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
        # click on something to trigger automatic category detection
        self.web_click(By.ID, "pstad-descrptn")

@@ -683,7 +686,7 @@ class KleinanzeigenBot(SeleniumMixin):
        except NoSuchElementException as ex:
            LOG.debug(ex, exc_info = True)

-    def __upload_images(self, ad_cfg: dict[str, Any]):
+    def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
        LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
        image_upload = self.web_find(By.XPATH, "//input[@type='file']")

@@ -791,7 +794,7 @@ class KleinanzeigenBot(SeleniumMixin):
            n_images = 1

            # determine number of images (1 ... N)
-            next_button = None
+            next_button:WebElement
            try:  # check if multiple images given
                # edge case: 'Virtueller Rundgang' div could be found by same CSS class
                element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
@@ -810,6 +813,8 @@ class KleinanzeigenBot(SeleniumMixin):
            dl_counter = 0
            while img_nr <= n_images:  # scrolling + downloading
                current_img_url = img_element.get_attribute('src')  # URL of the image
+                if current_img_url is None:
+                    continue
                file_ending = current_img_url.split('.')[-1].lower()
                img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
                if current_img_url.startswith('https'):  # verify https (for Bandit linter)
@@ -836,7 +841,7 @@ class KleinanzeigenBot(SeleniumMixin):

        return img_paths

-    def extract_ad_page_info(self, directory:str, id_:int) -> dict:
+    def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
        """
        Extracts all necessary information from an ad´s page.

@@ -844,7 +849,7 @@ class KleinanzeigenBot(SeleniumMixin):
        :param id_: the ad ID, already extracted by a calling function
        :return: a dictionary with the keys as given in an ad YAML, and their respective values
        """
-        info = {'active': True}
+        info:dict[str, Any] = {'active': True}

        # extract basic info
        if 's-anzeige' in self.webdriver.current_url:
@@ -898,7 +903,7 @@ class KleinanzeigenBot(SeleniumMixin):

        return info

-    def download_ad_page(self, id_:int):
+    def download_ad_page(self, id_:int) -> None:
        """
        Downloads an ad to a specific location, specified by config and ad ID.
        NOTE: Requires that the driver session currently is on the ad page.
@@ -925,7 +930,7 @@ class KleinanzeigenBot(SeleniumMixin):
        ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
        utils.save_dict(ad_file_path, info)

-    def start_download_routine(self):
+    def start_download_routine(self) -> None:
        """
        Determines which download mode was chosen with the arguments, and calls the specified download routine.
        This downloads either all, only unsaved (new), or specific ads given by ID.
--- a/kleinanzeigen_bot/extract.py
+++ b/kleinanzeigen_bot/extract.py
@@ -4,11 +4,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
 """
 import json
 from decimal import DecimalException
+from typing import Any

-import selenium.webdriver.support.expected_conditions as EC
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.remote.webdriver import WebDriver
+import selenium.webdriver.support.expected_conditions as EC

 from .selenium_mixin import SeleniumMixin
 from .utils import parse_decimal, pause
@@ -39,7 +40,7 @@ class AdExtractor(SeleniumMixin):

        return category

-    def extract_special_attributes_from_ad_page(self) -> dict:
+    def extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
        """
        Extracts the special attributes from an ad page.

@@ -56,7 +57,7 @@ class AdExtractor(SeleniumMixin):
        special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
        return special_attributes

-    def extract_pricing_info_from_ad_page(self) -> (float | None, str):
+    def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
        """
        Extracts the pricing information (price and pricing type) from an ad page.

@@ -85,7 +86,7 @@ class AdExtractor(SeleniumMixin):
        except NoSuchElementException:  # no 'commercial' ad, has no pricing box etc.
            return None, 'NOT_APPLICABLE'

-    def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None):
+    def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
        """
        Extracts shipping information from an ad page.

@@ -102,9 +103,8 @@ class AdExtractor(SeleniumMixin):
                ship_type = 'SHIPPING'
            elif '€' in shipping_text:
                shipping_price_parts = shipping_text.split(' ')
-                shipping_price = float(parse_decimal(shipping_price_parts[-2]))
                ship_type = 'SHIPPING'
-                ship_costs = shipping_price
+                ship_costs = float(parse_decimal(shipping_price_parts[-2]))

                # extract shipping options
                # It is only possible the extract the cheapest shipping option,
@@ -140,13 +140,13 @@ class AdExtractor(SeleniumMixin):
        except NoSuchElementException:
            return None

-    def extract_contact_from_ad_page(self) -> dict:
+    def extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
        """
        Processes the address part involving street (optional), zip code + city, and phone number (optional).

        :return: a dictionary containing the address parts with their corresponding values
        """
-        contact = {}
+        contact:dict[str, (str | None)] = {}
        address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality')
        address_text = address_element.text.strip()
        # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
--- a/kleinanzeigen_bot/selenium_mixin.py
+++ b/kleinanzeigen_bot/selenium_mixin.py
@@ -4,7 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later
 """
 import logging, os, shutil, time
 from collections.abc import Callable, Iterable
-from typing import Any, Final
+from typing import Any, Final, TypeVar

 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
@@ -16,6 +16,7 @@ from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXEC
 from selenium.webdriver.remote.webdriver import WebDriver
 from selenium.webdriver.remote.webelement import WebElement
 from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.expected_conditions import AnyDriver
 from selenium.webdriver.support.ui import Select, WebDriverWait
 import selenium_stealth
 import webdriver_manager.core
@@ -39,13 +40,16 @@ class BrowserConfig:
        self.profile_name:str = ""


+CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions)  # pylint: disable=invalid-name
+
+
 class SeleniumMixin:

    def __init__(self) -> None:
        self.browser_config:Final[BrowserConfig] = BrowserConfig()
        self.webdriver:WebDriver = None

-    def _init_browser_options(self, browser_options:ChromiumOptions) -> ChromiumOptions:
+    def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
        if self.browser_config.use_private_window:
            if isinstance(browser_options, webdriver.EdgeOptions):
                browser_options.add_argument("-inprivate")
@@ -123,6 +127,7 @@ class SeleniumMixin:
            webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version

            # download and install matching chrome driver
+            webdriver_mgr: DriverManager
            if chrome_type == ChromeType.MSEDGE:
                webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
                webdriver_path = webdriver_mgr.install()
@@ -148,7 +153,7 @@ class SeleniumMixin:

        LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url)  # pylint: disable=protected-access

-    def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]:
+    def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]:  # -> [ chrome_type, chrome_version ]
        match webdriver_manager.core.utils.os_name():
            case OSType.WIN:
                import win32api  # pylint: disable=import-outside-toplevel,import-error
@@ -187,7 +192,7 @@ class SeleniumMixin:
            webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE])
        )

-    def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None:
+    def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None:  # -> [ browser_path, chrome_type, chrome_version ]
        match webdriver_manager.core.utils.os_name():
            case OSType.LINUX:
                browser_paths = [
@@ -233,7 +238,7 @@ class SeleniumMixin:
        LOG.warning("Installed browser could not be detected")
        return None

-    def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
+    def web_await(self, condition: Callable[[AnyDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
        """
        Blocks/waits until the given condition is met.

@@ -305,6 +310,7 @@ class SeleniumMixin:
        input_field.clear()
        input_field.send_keys(text)
        pause()
+        return input_field

    def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
        """
@@ -349,7 +355,7 @@ class SeleniumMixin:
        return response
    # pylint: enable=dangerous-default-value

-    def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False):
+    def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
        """
        Smoothly scrolls the current web page down.

--- a/kleinanzeigen_bot/utils.py
+++ b/kleinanzeigen_bot/utils.py
@@ -169,10 +169,10 @@ def pluralize(word:str, count:int | Sized, prefix:bool = True) -> str:
    'fields'
    """
    if not hasattr(pluralize, "inflect"):
-        pluralize.inflect = inflect.engine()
+        pluralize.inflect = inflect.engine()  # type: ignore[attr-defined] # mypy
    if isinstance(count, Sized):
        count = len(count)
-    plural:str = pluralize.inflect.plural_noun(word, count)
+    plural:str = pluralize.inflect.plural_noun(word, count)  # type: ignore[attr-defined] # mypy
    if prefix:
        return f"{count} {plural}"
    return plural
@@ -200,7 +200,7 @@ def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any]
        return None

    with open(filepath, encoding = "utf-8") as file:
-        return json.load(file) if filepath.endswith(".json") else YAML().load(file)
+        return json.load(file) if filepath.endswith(".json") else YAML().load(file)  # type: ignore[no-any-return] # mypy


 def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
@@ -214,7 +214,7 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
        raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')

    content = get_resource_as_string(module, filename)
-    return json.loads(content) if filename.endswith(".json") else YAML().load(content)
+    return json.loads(content) if filename.endswith(".json") else YAML().load(content)  # type: ignore[no-any-return] # mypy


 def save_dict(filepath:str, content:dict[str, Any]) -> None:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,13 +104,17 @@ aggressive = 3
 # https://github.com/python/mypy
 #####################
 [tool.mypy]
+# https://mypy.readthedocs.io/en/stable/config_file.html
+#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
 python_version = "3.10"
 strict = true
+disallow_untyped_calls = false
 disallow_untyped_defs = true
 disallow_incomplete_defs = true
 ignore_missing_imports = true
 show_error_codes = true
 warn_unused_ignores = true
+verbosity = 0


 #####################