From 38a76572a4a775db35d628f816e379ab41326a96 Mon Sep 17 00:00:00 2001 From: sebthom Date: Sun, 17 Sep 2023 14:26:35 +0200 Subject: [PATCH] improve type hints --- kleinanzeigen_bot/__init__.py | 21 +++++++++++++-------- kleinanzeigen_bot/extract.py | 16 ++++++++-------- kleinanzeigen_bot/selenium_mixin.py | 18 ++++++++++++------ kleinanzeigen_bot/utils.py | 8 ++++---- pyproject.toml | 4 ++++ 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/kleinanzeigen_bot/__init__.py b/kleinanzeigen_bot/__init__.py index 57fd51e..b5ebfe5 100644 --- a/kleinanzeigen_bot/__init__.py +++ b/kleinanzeigen_bot/__init__.py @@ -13,6 +13,7 @@ from overrides import overrides from ruamel.yaml import YAML from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException from selenium.webdriver.common.by import By +from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions as EC from . import utils, resources, extract # pylint: disable=W0406 @@ -409,6 +410,8 @@ class KleinanzeigenBot(SeleniumMixin): self.web_open(f"{self.root_url}/m-meine-anzeigen.html") csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']") csrf_token = csrf_token_elem.get_attribute("content") + if csrf_token is None: + raise AssertionError("Expected CSRF Token not found in HTML content!") if self.delete_ads_by_title: published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"] @@ -589,14 +592,14 @@ class KleinanzeigenBot(SeleniumMixin): # extract the ad id from the URL's query parameter current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query) - ad_id = int(current_url_query_params.get("adId", None)[0]) + ad_id = int(current_url_query_params.get("adId", [])[0]) ad_cfg_orig["id"] = ad_id LOG.info(" -> SUCCESS: ad published with ID %s", ad_id) utils.save_dict(ad_file, ad_cfg_orig) - def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]): + def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None: # click on something to trigger automatic category detection self.web_click(By.ID, "pstad-descrptn") @@ -683,7 +686,7 @@ class KleinanzeigenBot(SeleniumMixin): except NoSuchElementException as ex: LOG.debug(ex, exc_info = True) - def __upload_images(self, ad_cfg: dict[str, Any]): + def __upload_images(self, ad_cfg: dict[str, Any]) -> None: LOG.info(" -> found %s", pluralize("image", ad_cfg["images"])) image_upload = self.web_find(By.XPATH, "//input[@type='file']") @@ -791,7 +794,7 @@ class KleinanzeigenBot(SeleniumMixin): n_images = 1 # determine number of images (1 ... N) - next_button = None + next_button:WebElement try: # check if multiple images given # edge case: 'Virtueller Rundgang' div could be found by same CSS class element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info') @@ -810,6 +813,8 @@ class KleinanzeigenBot(SeleniumMixin): dl_counter = 0 while img_nr <= n_images: # scrolling + downloading current_img_url = img_element.get_attribute('src') # URL of the image + if current_img_url is None: + continue file_ending = current_img_url.split('.')[-1].lower() img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending if current_img_url.startswith('https'): # verify https (for Bandit linter) @@ -836,7 +841,7 @@ class KleinanzeigenBot(SeleniumMixin): return img_paths - def extract_ad_page_info(self, directory:str, id_:int) -> dict: + def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]: """ Extracts all necessary information from an ad´s page. @@ -844,7 +849,7 @@ class KleinanzeigenBot(SeleniumMixin): :param id_: the ad ID, already extracted by a calling function :return: a dictionary with the keys as given in an ad YAML, and their respective values """ - info = {'active': True} + info:dict[str, Any] = {'active': True} # extract basic info if 's-anzeige' in self.webdriver.current_url: @@ -898,7 +903,7 @@ class KleinanzeigenBot(SeleniumMixin): return info - def download_ad_page(self, id_:int): + def download_ad_page(self, id_:int) -> None: """ Downloads an ad to a specific location, specified by config and ad ID. NOTE: Requires that the driver session currently is on the ad page. @@ -925,7 +930,7 @@ class KleinanzeigenBot(SeleniumMixin): ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml' utils.save_dict(ad_file_path, info) - def start_download_routine(self): + def start_download_routine(self) -> None: """ Determines which download mode was chosen with the arguments, and calls the specified download routine. This downloads either all, only unsaved (new), or specific ads given by ID. diff --git a/kleinanzeigen_bot/extract.py b/kleinanzeigen_bot/extract.py index 32d6a43..1c40422 100644 --- a/kleinanzeigen_bot/extract.py +++ b/kleinanzeigen_bot/extract.py @@ -4,11 +4,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later """ import json from decimal import DecimalException +from typing import Any -import selenium.webdriver.support.expected_conditions as EC from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By from selenium.webdriver.remote.webdriver import WebDriver +import selenium.webdriver.support.expected_conditions as EC from .selenium_mixin import SeleniumMixin from .utils import parse_decimal, pause @@ -39,7 +40,7 @@ class AdExtractor(SeleniumMixin): return category - def extract_special_attributes_from_ad_page(self) -> dict: + def extract_special_attributes_from_ad_page(self) -> dict[str, Any]: """ Extracts the special attributes from an ad page. @@ -56,7 +57,7 @@ class AdExtractor(SeleniumMixin): special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')} return special_attributes - def extract_pricing_info_from_ad_page(self) -> (float | None, str): + def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]: """ Extracts the pricing information (price and pricing type) from an ad page. @@ -85,7 +86,7 @@ class AdExtractor(SeleniumMixin): except NoSuchElementException: # no 'commercial' ad, has no pricing box etc. return None, 'NOT_APPLICABLE' - def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None): + def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]: """ Extracts shipping information from an ad page. @@ -102,9 +103,8 @@ class AdExtractor(SeleniumMixin): ship_type = 'SHIPPING' elif '€' in shipping_text: shipping_price_parts = shipping_text.split(' ') - shipping_price = float(parse_decimal(shipping_price_parts[-2])) ship_type = 'SHIPPING' - ship_costs = shipping_price + ship_costs = float(parse_decimal(shipping_price_parts[-2])) # extract shipping options # It is only possible the extract the cheapest shipping option, @@ -140,13 +140,13 @@ class AdExtractor(SeleniumMixin): except NoSuchElementException: return None - def extract_contact_from_ad_page(self) -> dict: + def extract_contact_from_ad_page(self) -> dict[str, (str | None)]: """ Processes the address part involving street (optional), zip code + city, and phone number (optional). :return: a dictionary containing the address parts with their corresponding values """ - contact = {} + contact:dict[str, (str | None)] = {} address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality') address_text = address_element.text.strip() # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt diff --git a/kleinanzeigen_bot/selenium_mixin.py b/kleinanzeigen_bot/selenium_mixin.py index 3b0f904..ffc4240 100644 --- a/kleinanzeigen_bot/selenium_mixin.py +++ b/kleinanzeigen_bot/selenium_mixin.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later """ import logging, os, shutil, time from collections.abc import Callable, Iterable -from typing import Any, Final +from typing import Any, Final, TypeVar from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException @@ -16,6 +16,7 @@ from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXEC from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.expected_conditions import AnyDriver from selenium.webdriver.support.ui import Select, WebDriverWait import selenium_stealth import webdriver_manager.core @@ -39,13 +40,16 @@ class BrowserConfig: self.profile_name:str = "" +CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions) # pylint: disable=invalid-name + + class SeleniumMixin: def __init__(self) -> None: self.browser_config:Final[BrowserConfig] = BrowserConfig() self.webdriver:WebDriver = None - def _init_browser_options(self, browser_options:ChromiumOptions) -> ChromiumOptions: + def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS: if self.browser_config.use_private_window: if isinstance(browser_options, webdriver.EdgeOptions): browser_options.add_argument("-inprivate") @@ -123,6 +127,7 @@ class SeleniumMixin: webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version # download and install matching chrome driver + webdriver_mgr: DriverManager if chrome_type == ChromeType.MSEDGE: webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14) webdriver_path = webdriver_mgr.install() @@ -148,7 +153,7 @@ class SeleniumMixin: LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access - def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]: + def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]: # -> [ chrome_type, chrome_version ] match webdriver_manager.core.utils.os_name(): case OSType.WIN: import win32api # pylint: disable=import-outside-toplevel,import-error @@ -187,7 +192,7 @@ class SeleniumMixin: webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE]) ) - def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None: + def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None: # -> [ browser_path, chrome_type, chrome_version ] match webdriver_manager.core.utils.os_name(): case OSType.LINUX: browser_paths = [ @@ -233,7 +238,7 @@ class SeleniumMixin: LOG.warning("Installed browser could not be detected") return None - def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T: + def web_await(self, condition: Callable[[AnyDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T: """ Blocks/waits until the given condition is met. @@ -305,6 +310,7 @@ class SeleniumMixin: input_field.clear() input_field.send_keys(text) pause() + return input_field def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None: """ @@ -349,7 +355,7 @@ class SeleniumMixin: return response # pylint: enable=dangerous-default-value - def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False): + def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None: """ Smoothly scrolls the current web page down. diff --git a/kleinanzeigen_bot/utils.py b/kleinanzeigen_bot/utils.py index 68f0ebf..d5c5a90 100644 --- a/kleinanzeigen_bot/utils.py +++ b/kleinanzeigen_bot/utils.py @@ -169,10 +169,10 @@ def pluralize(word:str, count:int | Sized, prefix:bool = True) -> str: 'fields' """ if not hasattr(pluralize, "inflect"): - pluralize.inflect = inflect.engine() + pluralize.inflect = inflect.engine() # type: ignore[attr-defined] # mypy if isinstance(count, Sized): count = len(count) - plural:str = pluralize.inflect.plural_noun(word, count) + plural:str = pluralize.inflect.plural_noun(word, count) # type: ignore[attr-defined] # mypy if prefix: return f"{count} {plural}" return plural @@ -200,7 +200,7 @@ def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] return None with open(filepath, encoding = "utf-8") as file: - return json.load(file) if filepath.endswith(".json") else YAML().load(file) + return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]: @@ -214,7 +214,7 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = " raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml') content = get_resource_as_string(module, filename) - return json.loads(content) if filename.endswith(".json") else YAML().load(content) + return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy def save_dict(filepath:str, content:dict[str, Any]) -> None: diff --git a/pyproject.toml b/pyproject.toml index c894364..03244c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,13 +104,17 @@ aggressive = 3 # https://github.com/python/mypy ##################### [tool.mypy] +# https://mypy.readthedocs.io/en/stable/config_file.html +#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs" python_version = "3.10" strict = true +disallow_untyped_calls = false disallow_untyped_defs = true disallow_incomplete_defs = true ignore_missing_imports = true show_error_codes = true warn_unused_ignores = true +verbosity = 0 #####################