From 38a76572a4a775db35d628f816e379ab41326a96 Mon Sep 17 00:00:00 2001
From: sebthom <sebthom@users.noreply.github.com>
Date: Sun, 17 Sep 2023 14:26:35 +0200
Subject: [PATCH] improve type hints

---
 kleinanzeigen_bot/__init__.py       | 21 +++++++++++++--------
 kleinanzeigen_bot/extract.py        | 16 ++++++++--------
 kleinanzeigen_bot/selenium_mixin.py | 18 ++++++++++++------
 kleinanzeigen_bot/utils.py          |  8 ++++----
 pyproject.toml                      |  4 ++++
 5 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/kleinanzeigen_bot/__init__.py b/kleinanzeigen_bot/__init__.py
index 57fd51e..b5ebfe5 100644
--- a/kleinanzeigen_bot/__init__.py
+++ b/kleinanzeigen_bot/__init__.py
@@ -13,6 +13,7 @@ from overrides import overrides
 from ruamel.yaml import YAML
 from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
 from selenium.webdriver.common.by import By
+from selenium.webdriver.remote.webelement import WebElement
 from selenium.webdriver.support import expected_conditions as EC
 
 from . import utils, resources, extract  # pylint: disable=W0406
@@ -409,6 +410,8 @@ class KleinanzeigenBot(SeleniumMixin):
         self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
         csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']")
         csrf_token = csrf_token_elem.get_attribute("content")
+        if csrf_token is None:
+            raise AssertionError("Expected CSRF Token not found in HTML content!")
 
         if self.delete_ads_by_title:
             published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
@@ -589,14 +592,14 @@ class KleinanzeigenBot(SeleniumMixin):
 
         # extract the ad id from the URL's query parameter
         current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
-        ad_id = int(current_url_query_params.get("adId", None)[0])
+        ad_id = int(current_url_query_params.get("adId", [])[0])
         ad_cfg_orig["id"] = ad_id
 
         LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
 
         utils.save_dict(ad_file, ad_cfg_orig)
 
-    def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]):
+    def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
         # click on something to trigger automatic category detection
         self.web_click(By.ID, "pstad-descrptn")
 
@@ -683,7 +686,7 @@ class KleinanzeigenBot(SeleniumMixin):
         except NoSuchElementException as ex:
             LOG.debug(ex, exc_info = True)
 
-    def __upload_images(self, ad_cfg: dict[str, Any]):
+    def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
         LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
         image_upload = self.web_find(By.XPATH, "//input[@type='file']")
 
@@ -791,7 +794,7 @@ class KleinanzeigenBot(SeleniumMixin):
             n_images = 1
 
             # determine number of images (1 ... N)
-            next_button = None
+            next_button:WebElement
             try:  # check if multiple images given
                 # edge case: 'Virtueller Rundgang' div could be found by same CSS class
                 element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
@@ -810,6 +813,8 @@ class KleinanzeigenBot(SeleniumMixin):
             dl_counter = 0
             while img_nr <= n_images:  # scrolling + downloading
                 current_img_url = img_element.get_attribute('src')  # URL of the image
+                if current_img_url is None:
+                    continue
                 file_ending = current_img_url.split('.')[-1].lower()
                 img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
                 if current_img_url.startswith('https'):  # verify https (for Bandit linter)
@@ -836,7 +841,7 @@ class KleinanzeigenBot(SeleniumMixin):
 
         return img_paths
 
-    def extract_ad_page_info(self, directory:str, id_:int) -> dict:
+    def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
         """
         Extracts all necessary information from an ad´s page.
 
@@ -844,7 +849,7 @@ class KleinanzeigenBot(SeleniumMixin):
         :param id_: the ad ID, already extracted by a calling function
         :return: a dictionary with the keys as given in an ad YAML, and their respective values
         """
-        info = {'active': True}
+        info:dict[str, Any] = {'active': True}
 
         # extract basic info
         if 's-anzeige' in self.webdriver.current_url:
@@ -898,7 +903,7 @@ class KleinanzeigenBot(SeleniumMixin):
 
         return info
 
-    def download_ad_page(self, id_:int):
+    def download_ad_page(self, id_:int) -> None:
         """
         Downloads an ad to a specific location, specified by config and ad ID.
         NOTE: Requires that the driver session currently is on the ad page.
@@ -925,7 +930,7 @@ class KleinanzeigenBot(SeleniumMixin):
         ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
         utils.save_dict(ad_file_path, info)
 
-    def start_download_routine(self):
+    def start_download_routine(self) -> None:
         """
         Determines which download mode was chosen with the arguments, and calls the specified download routine.
         This downloads either all, only unsaved (new), or specific ads given by ID.
diff --git a/kleinanzeigen_bot/extract.py b/kleinanzeigen_bot/extract.py
index 32d6a43..1c40422 100644
--- a/kleinanzeigen_bot/extract.py
+++ b/kleinanzeigen_bot/extract.py
@@ -4,11 +4,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
 """
 import json
 from decimal import DecimalException
+from typing import Any
 
-import selenium.webdriver.support.expected_conditions as EC
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.remote.webdriver import WebDriver
+import selenium.webdriver.support.expected_conditions as EC
 
 from .selenium_mixin import SeleniumMixin
 from .utils import parse_decimal, pause
@@ -39,7 +40,7 @@ class AdExtractor(SeleniumMixin):
 
         return category
 
-    def extract_special_attributes_from_ad_page(self) -> dict:
+    def extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
         """
         Extracts the special attributes from an ad page.
 
@@ -56,7 +57,7 @@ class AdExtractor(SeleniumMixin):
         special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
         return special_attributes
 
-    def extract_pricing_info_from_ad_page(self) -> (float | None, str):
+    def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
         """
         Extracts the pricing information (price and pricing type) from an ad page.
 
@@ -85,7 +86,7 @@ class AdExtractor(SeleniumMixin):
         except NoSuchElementException:  # no 'commercial' ad, has no pricing box etc.
             return None, 'NOT_APPLICABLE'
 
-    def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None):
+    def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
         """
         Extracts shipping information from an ad page.
 
@@ -102,9 +103,8 @@ class AdExtractor(SeleniumMixin):
                 ship_type = 'SHIPPING'
             elif '€' in shipping_text:
                 shipping_price_parts = shipping_text.split(' ')
-                shipping_price = float(parse_decimal(shipping_price_parts[-2]))
                 ship_type = 'SHIPPING'
-                ship_costs = shipping_price
+                ship_costs = float(parse_decimal(shipping_price_parts[-2]))
 
                 # extract shipping options
                 # It is only possible the extract the cheapest shipping option,
@@ -140,13 +140,13 @@ class AdExtractor(SeleniumMixin):
         except NoSuchElementException:
             return None
 
-    def extract_contact_from_ad_page(self) -> dict:
+    def extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
         """
         Processes the address part involving street (optional), zip code + city, and phone number (optional).
 
         :return: a dictionary containing the address parts with their corresponding values
         """
-        contact = {}
+        contact:dict[str, (str | None)] = {}
         address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality')
         address_text = address_element.text.strip()
         # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
diff --git a/kleinanzeigen_bot/selenium_mixin.py b/kleinanzeigen_bot/selenium_mixin.py
index 3b0f904..ffc4240 100644
--- a/kleinanzeigen_bot/selenium_mixin.py
+++ b/kleinanzeigen_bot/selenium_mixin.py
@@ -4,7 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later
 """
 import logging, os, shutil, time
 from collections.abc import Callable, Iterable
-from typing import Any, Final
+from typing import Any, Final, TypeVar
 
 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
@@ -16,6 +16,7 @@ from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXEC
 from selenium.webdriver.remote.webdriver import WebDriver
 from selenium.webdriver.remote.webelement import WebElement
 from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.expected_conditions import AnyDriver
 from selenium.webdriver.support.ui import Select, WebDriverWait
 import selenium_stealth
 import webdriver_manager.core
@@ -39,13 +40,16 @@ class BrowserConfig:
         self.profile_name:str = ""
 
 
+CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions)  # pylint: disable=invalid-name
+
+
 class SeleniumMixin:
 
     def __init__(self) -> None:
         self.browser_config:Final[BrowserConfig] = BrowserConfig()
         self.webdriver:WebDriver = None
 
-    def _init_browser_options(self, browser_options:ChromiumOptions) -> ChromiumOptions:
+    def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
         if self.browser_config.use_private_window:
             if isinstance(browser_options, webdriver.EdgeOptions):
                 browser_options.add_argument("-inprivate")
@@ -123,6 +127,7 @@ class SeleniumMixin:
             webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version
 
             # download and install matching chrome driver
+            webdriver_mgr: DriverManager
             if chrome_type == ChromeType.MSEDGE:
                 webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
                 webdriver_path = webdriver_mgr.install()
@@ -148,7 +153,7 @@ class SeleniumMixin:
 
         LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url)  # pylint: disable=protected-access
 
-    def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]:
+    def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]:  # -> [ chrome_type, chrome_version ]
         match webdriver_manager.core.utils.os_name():
             case OSType.WIN:
                 import win32api  # pylint: disable=import-outside-toplevel,import-error
@@ -187,7 +192,7 @@ class SeleniumMixin:
             webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE])
         )
 
-    def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None:
+    def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None:  # -> [ browser_path, chrome_type, chrome_version ]
         match webdriver_manager.core.utils.os_name():
             case OSType.LINUX:
                 browser_paths = [
@@ -233,7 +238,7 @@ class SeleniumMixin:
         LOG.warning("Installed browser could not be detected")
         return None
 
-    def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
+    def web_await(self, condition: Callable[[AnyDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
         """
         Blocks/waits until the given condition is met.
 
@@ -305,6 +310,7 @@ class SeleniumMixin:
         input_field.clear()
         input_field.send_keys(text)
         pause()
+        return input_field
 
     def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
         """
@@ -349,7 +355,7 @@ class SeleniumMixin:
         return response
     # pylint: enable=dangerous-default-value
 
-    def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False):
+    def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
         """
         Smoothly scrolls the current web page down.
 
diff --git a/kleinanzeigen_bot/utils.py b/kleinanzeigen_bot/utils.py
index 68f0ebf..d5c5a90 100644
--- a/kleinanzeigen_bot/utils.py
+++ b/kleinanzeigen_bot/utils.py
@@ -169,10 +169,10 @@ def pluralize(word:str, count:int | Sized, prefix:bool = True) -> str:
     'fields'
     """
     if not hasattr(pluralize, "inflect"):
-        pluralize.inflect = inflect.engine()
+        pluralize.inflect = inflect.engine()  # type: ignore[attr-defined] # mypy
     if isinstance(count, Sized):
         count = len(count)
-    plural:str = pluralize.inflect.plural_noun(word, count)
+    plural:str = pluralize.inflect.plural_noun(word, count)  # type: ignore[attr-defined] # mypy
     if prefix:
         return f"{count} {plural}"
     return plural
@@ -200,7 +200,7 @@ def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any]
         return None
 
     with open(filepath, encoding = "utf-8") as file:
-        return json.load(file) if filepath.endswith(".json") else YAML().load(file)
+        return json.load(file) if filepath.endswith(".json") else YAML().load(file)  # type: ignore[no-any-return] # mypy
 
 
 def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
@@ -214,7 +214,7 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
         raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
 
     content = get_resource_as_string(module, filename)
-    return json.loads(content) if filename.endswith(".json") else YAML().load(content)
+    return json.loads(content) if filename.endswith(".json") else YAML().load(content)  # type: ignore[no-any-return] # mypy
 
 
 def save_dict(filepath:str, content:dict[str, Any]) -> None:
diff --git a/pyproject.toml b/pyproject.toml
index c894364..03244c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,13 +104,17 @@ aggressive = 3
 # https://github.com/python/mypy
 #####################
 [tool.mypy]
+# https://mypy.readthedocs.io/en/stable/config_file.html
+#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
 python_version = "3.10"
 strict = true
+disallow_untyped_calls = false
 disallow_untyped_defs = true
 disallow_incomplete_defs = true
 ignore_missing_imports = true
 show_error_codes = true
 warn_unused_ignores = true
+verbosity = 0
 
 
 #####################