improve type hints

This commit is contained in:
sebthom
2023-09-17 14:26:35 +02:00
parent 70b187260f
commit 38a76572a4
5 changed files with 41 additions and 26 deletions

View File

@@ -13,6 +13,7 @@ from overrides import overrides
from ruamel.yaml import YAML
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from . import utils, resources, extract # pylint: disable=W0406
@@ -409,6 +410,8 @@ class KleinanzeigenBot(SeleniumMixin):
self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']")
csrf_token = csrf_token_elem.get_attribute("content")
if csrf_token is None:
raise AssertionError("Expected CSRF Token not found in HTML content!")
if self.delete_ads_by_title:
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
@@ -589,14 +592,14 @@ class KleinanzeigenBot(SeleniumMixin):
# extract the ad id from the URL's query parameter
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
ad_id = int(current_url_query_params.get("adId", None)[0])
ad_id = int(current_url_query_params.get("adId", [])[0])
ad_cfg_orig["id"] = ad_id
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
utils.save_dict(ad_file, ad_cfg_orig)
def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]):
def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
# click on something to trigger automatic category detection
self.web_click(By.ID, "pstad-descrptn")
@@ -683,7 +686,7 @@ class KleinanzeigenBot(SeleniumMixin):
except NoSuchElementException as ex:
LOG.debug(ex, exc_info = True)
def __upload_images(self, ad_cfg: dict[str, Any]):
def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload = self.web_find(By.XPATH, "//input[@type='file']")
@@ -791,7 +794,7 @@ class KleinanzeigenBot(SeleniumMixin):
n_images = 1
# determine number of images (1 ... N)
next_button = None
next_button:WebElement
try: # check if multiple images given
# edge case: 'Virtueller Rundgang' div could be found by same CSS class
element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
@@ -810,6 +813,8 @@ class KleinanzeigenBot(SeleniumMixin):
dl_counter = 0
while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.get_attribute('src') # URL of the image
if current_img_url is None:
continue
file_ending = current_img_url.split('.')[-1].lower()
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
if current_img_url.startswith('https'): # verify https (for Bandit linter)
@@ -836,7 +841,7 @@ class KleinanzeigenBot(SeleniumMixin):
return img_paths
def extract_ad_page_info(self, directory:str, id_:int) -> dict:
def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
"""
Extracts all necessary information from an ad´s page.
@@ -844,7 +849,7 @@ class KleinanzeigenBot(SeleniumMixin):
:param id_: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values
"""
info = {'active': True}
info:dict[str, Any] = {'active': True}
# extract basic info
if 's-anzeige' in self.webdriver.current_url:
@@ -898,7 +903,7 @@ class KleinanzeigenBot(SeleniumMixin):
return info
def download_ad_page(self, id_:int):
def download_ad_page(self, id_:int) -> None:
"""
Downloads an ad to a specific location, specified by config and ad ID.
NOTE: Requires that the driver session currently is on the ad page.
@@ -925,7 +930,7 @@ class KleinanzeigenBot(SeleniumMixin):
ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
utils.save_dict(ad_file_path, info)
def start_download_routine(self):
def start_download_routine(self) -> None:
"""
Determines which download mode was chosen with the arguments, and calls the specified download routine.
This downloads either all, only unsaved (new), or specific ads given by ID.

View File

@@ -4,11 +4,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
"""
import json
from decimal import DecimalException
from typing import Any
import selenium.webdriver.support.expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webdriver import WebDriver
import selenium.webdriver.support.expected_conditions as EC
from .selenium_mixin import SeleniumMixin
from .utils import parse_decimal, pause
@@ -39,7 +40,7 @@ class AdExtractor(SeleniumMixin):
return category
def extract_special_attributes_from_ad_page(self) -> dict:
def extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
"""
Extracts the special attributes from an ad page.
@@ -56,7 +57,7 @@ class AdExtractor(SeleniumMixin):
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
return special_attributes
def extract_pricing_info_from_ad_page(self) -> (float | None, str):
def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
"""
Extracts the pricing information (price and pricing type) from an ad page.
@@ -85,7 +86,7 @@ class AdExtractor(SeleniumMixin):
except NoSuchElementException: # no 'commercial' ad, has no pricing box etc.
return None, 'NOT_APPLICABLE'
def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None):
def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
"""
Extracts shipping information from an ad page.
@@ -102,9 +103,8 @@ class AdExtractor(SeleniumMixin):
ship_type = 'SHIPPING'
elif '' in shipping_text:
shipping_price_parts = shipping_text.split(' ')
shipping_price = float(parse_decimal(shipping_price_parts[-2]))
ship_type = 'SHIPPING'
ship_costs = shipping_price
ship_costs = float(parse_decimal(shipping_price_parts[-2]))
# extract shipping options
# It is only possible the extract the cheapest shipping option,
@@ -140,13 +140,13 @@ class AdExtractor(SeleniumMixin):
except NoSuchElementException:
return None
def extract_contact_from_ad_page(self) -> dict:
def extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
"""
Processes the address part involving street (optional), zip code + city, and phone number (optional).
:return: a dictionary containing the address parts with their corresponding values
"""
contact = {}
contact:dict[str, (str | None)] = {}
address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality')
address_text = address_element.text.strip()
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt

View File

@@ -4,7 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later
"""
import logging, os, shutil, time
from collections.abc import Callable, Iterable
from typing import Any, Final
from typing import Any, Final, TypeVar
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
@@ -16,6 +16,7 @@ from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXEC
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.expected_conditions import AnyDriver
from selenium.webdriver.support.ui import Select, WebDriverWait
import selenium_stealth
import webdriver_manager.core
@@ -39,13 +40,16 @@ class BrowserConfig:
self.profile_name:str = ""
CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions) # pylint: disable=invalid-name
class SeleniumMixin:
def __init__(self) -> None:
self.browser_config:Final[BrowserConfig] = BrowserConfig()
self.webdriver:WebDriver = None
def _init_browser_options(self, browser_options:ChromiumOptions) -> ChromiumOptions:
def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
if self.browser_config.use_private_window:
if isinstance(browser_options, webdriver.EdgeOptions):
browser_options.add_argument("-inprivate")
@@ -123,6 +127,7 @@ class SeleniumMixin:
webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version
# download and install matching chrome driver
webdriver_mgr: DriverManager
if chrome_type == ChromeType.MSEDGE:
webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
webdriver_path = webdriver_mgr.install()
@@ -148,7 +153,7 @@ class SeleniumMixin:
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]:
def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]: # -> [ chrome_type, chrome_version ]
match webdriver_manager.core.utils.os_name():
case OSType.WIN:
import win32api # pylint: disable=import-outside-toplevel,import-error
@@ -187,7 +192,7 @@ class SeleniumMixin:
webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE])
)
def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None:
def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None: # -> [ browser_path, chrome_type, chrome_version ]
match webdriver_manager.core.utils.os_name():
case OSType.LINUX:
browser_paths = [
@@ -233,7 +238,7 @@ class SeleniumMixin:
LOG.warning("Installed browser could not be detected")
return None
def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
def web_await(self, condition: Callable[[AnyDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
"""
Blocks/waits until the given condition is met.
@@ -305,6 +310,7 @@ class SeleniumMixin:
input_field.clear()
input_field.send_keys(text)
pause()
return input_field
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
"""
@@ -349,7 +355,7 @@ class SeleniumMixin:
return response
# pylint: enable=dangerous-default-value
def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False):
def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
"""
Smoothly scrolls the current web page down.

View File

@@ -169,10 +169,10 @@ def pluralize(word:str, count:int | Sized, prefix:bool = True) -> str:
'fields'
"""
if not hasattr(pluralize, "inflect"):
pluralize.inflect = inflect.engine()
pluralize.inflect = inflect.engine() # type: ignore[attr-defined] # mypy
if isinstance(count, Sized):
count = len(count)
plural:str = pluralize.inflect.plural_noun(word, count)
plural:str = pluralize.inflect.plural_noun(word, count) # type: ignore[attr-defined] # mypy
if prefix:
return f"{count} {plural}"
return plural
@@ -200,7 +200,7 @@ def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any]
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file)
return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
@@ -214,7 +214,7 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename)
return json.loads(content) if filename.endswith(".json") else YAML().load(content)
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None:

View File

@@ -104,13 +104,17 @@ aggressive = 3
# https://github.com/python/mypy
#####################
[tool.mypy]
# https://mypy.readthedocs.io/en/stable/config_file.html
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
python_version = "3.10"
strict = true
disallow_untyped_calls = false
disallow_untyped_defs = true
disallow_incomplete_defs = true
ignore_missing_imports = true
show_error_codes = true
warn_unused_ignores = true
verbosity = 0
#####################