improve type hints

This commit is contained in:
sebthom
2023-09-17 14:26:35 +02:00
parent 70b187260f
commit 38a76572a4
5 changed files with 41 additions and 26 deletions

View File

@@ -13,6 +13,7 @@ from overrides import overrides
from ruamel.yaml import YAML from ruamel.yaml import YAML
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from . import utils, resources, extract # pylint: disable=W0406 from . import utils, resources, extract # pylint: disable=W0406
@@ -409,6 +410,8 @@ class KleinanzeigenBot(SeleniumMixin):
self.web_open(f"{self.root_url}/m-meine-anzeigen.html") self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']") csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']")
csrf_token = csrf_token_elem.get_attribute("content") csrf_token = csrf_token_elem.get_attribute("content")
if csrf_token is None:
raise AssertionError("Expected CSRF Token not found in HTML content!")
if self.delete_ads_by_title: if self.delete_ads_by_title:
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"] published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
@@ -589,14 +592,14 @@ class KleinanzeigenBot(SeleniumMixin):
# extract the ad id from the URL's query parameter # extract the ad id from the URL's query parameter
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query) current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
ad_id = int(current_url_query_params.get("adId", None)[0]) ad_id = int(current_url_query_params.get("adId", [])[0])
ad_cfg_orig["id"] = ad_id ad_cfg_orig["id"] = ad_id
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id) LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
utils.save_dict(ad_file, ad_cfg_orig) utils.save_dict(ad_file, ad_cfg_orig)
def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]): def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
# click on something to trigger automatic category detection # click on something to trigger automatic category detection
self.web_click(By.ID, "pstad-descrptn") self.web_click(By.ID, "pstad-descrptn")
@@ -683,7 +686,7 @@ class KleinanzeigenBot(SeleniumMixin):
except NoSuchElementException as ex: except NoSuchElementException as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
def __upload_images(self, ad_cfg: dict[str, Any]): def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"])) LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload = self.web_find(By.XPATH, "//input[@type='file']") image_upload = self.web_find(By.XPATH, "//input[@type='file']")
@@ -791,7 +794,7 @@ class KleinanzeigenBot(SeleniumMixin):
n_images = 1 n_images = 1
# determine number of images (1 ... N) # determine number of images (1 ... N)
next_button = None next_button:WebElement
try: # check if multiple images given try: # check if multiple images given
# edge case: 'Virtueller Rundgang' div could be found by same CSS class # edge case: 'Virtueller Rundgang' div could be found by same CSS class
element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info') element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
@@ -810,6 +813,8 @@ class KleinanzeigenBot(SeleniumMixin):
dl_counter = 0 dl_counter = 0
while img_nr <= n_images: # scrolling + downloading while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.get_attribute('src') # URL of the image current_img_url = img_element.get_attribute('src') # URL of the image
if current_img_url is None:
continue
file_ending = current_img_url.split('.')[-1].lower() file_ending = current_img_url.split('.')[-1].lower()
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
if current_img_url.startswith('https'): # verify https (for Bandit linter) if current_img_url.startswith('https'): # verify https (for Bandit linter)
@@ -836,7 +841,7 @@ class KleinanzeigenBot(SeleniumMixin):
return img_paths return img_paths
def extract_ad_page_info(self, directory:str, id_:int) -> dict: def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
""" """
Extracts all necessary information from an ad´s page. Extracts all necessary information from an ad´s page.
@@ -844,7 +849,7 @@ class KleinanzeigenBot(SeleniumMixin):
:param id_: the ad ID, already extracted by a calling function :param id_: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values :return: a dictionary with the keys as given in an ad YAML, and their respective values
""" """
info = {'active': True} info:dict[str, Any] = {'active': True}
# extract basic info # extract basic info
if 's-anzeige' in self.webdriver.current_url: if 's-anzeige' in self.webdriver.current_url:
@@ -898,7 +903,7 @@ class KleinanzeigenBot(SeleniumMixin):
return info return info
def download_ad_page(self, id_:int): def download_ad_page(self, id_:int) -> None:
""" """
Downloads an ad to a specific location, specified by config and ad ID. Downloads an ad to a specific location, specified by config and ad ID.
NOTE: Requires that the driver session currently is on the ad page. NOTE: Requires that the driver session currently is on the ad page.
@@ -925,7 +930,7 @@ class KleinanzeigenBot(SeleniumMixin):
ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml' ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
utils.save_dict(ad_file_path, info) utils.save_dict(ad_file_path, info)
def start_download_routine(self): def start_download_routine(self) -> None:
""" """
Determines which download mode was chosen with the arguments, and calls the specified download routine. Determines which download mode was chosen with the arguments, and calls the specified download routine.
This downloads either all, only unsaved (new), or specific ads given by ID. This downloads either all, only unsaved (new), or specific ads given by ID.

View File

@@ -4,11 +4,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
""" """
import json import json
from decimal import DecimalException from decimal import DecimalException
from typing import Any
import selenium.webdriver.support.expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webdriver import WebDriver
import selenium.webdriver.support.expected_conditions as EC
from .selenium_mixin import SeleniumMixin from .selenium_mixin import SeleniumMixin
from .utils import parse_decimal, pause from .utils import parse_decimal, pause
@@ -39,7 +40,7 @@ class AdExtractor(SeleniumMixin):
return category return category
def extract_special_attributes_from_ad_page(self) -> dict: def extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
""" """
Extracts the special attributes from an ad page. Extracts the special attributes from an ad page.
@@ -56,7 +57,7 @@ class AdExtractor(SeleniumMixin):
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')} special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
return special_attributes return special_attributes
def extract_pricing_info_from_ad_page(self) -> (float | None, str): def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
""" """
Extracts the pricing information (price and pricing type) from an ad page. Extracts the pricing information (price and pricing type) from an ad page.
@@ -85,7 +86,7 @@ class AdExtractor(SeleniumMixin):
except NoSuchElementException: # no 'commercial' ad, has no pricing box etc. except NoSuchElementException: # no 'commercial' ad, has no pricing box etc.
return None, 'NOT_APPLICABLE' return None, 'NOT_APPLICABLE'
def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None): def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
""" """
Extracts shipping information from an ad page. Extracts shipping information from an ad page.
@@ -102,9 +103,8 @@ class AdExtractor(SeleniumMixin):
ship_type = 'SHIPPING' ship_type = 'SHIPPING'
elif '' in shipping_text: elif '' in shipping_text:
shipping_price_parts = shipping_text.split(' ') shipping_price_parts = shipping_text.split(' ')
shipping_price = float(parse_decimal(shipping_price_parts[-2]))
ship_type = 'SHIPPING' ship_type = 'SHIPPING'
ship_costs = shipping_price ship_costs = float(parse_decimal(shipping_price_parts[-2]))
# extract shipping options # extract shipping options
# It is only possible the extract the cheapest shipping option, # It is only possible the extract the cheapest shipping option,
@@ -140,13 +140,13 @@ class AdExtractor(SeleniumMixin):
except NoSuchElementException: except NoSuchElementException:
return None return None
def extract_contact_from_ad_page(self) -> dict: def extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
""" """
Processes the address part involving street (optional), zip code + city, and phone number (optional). Processes the address part involving street (optional), zip code + city, and phone number (optional).
:return: a dictionary containing the address parts with their corresponding values :return: a dictionary containing the address parts with their corresponding values
""" """
contact = {} contact:dict[str, (str | None)] = {}
address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality') address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality')
address_text = address_element.text.strip() address_text = address_element.text.strip()
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt

View File

@@ -4,7 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later
""" """
import logging, os, shutil, time import logging, os, shutil, time
from collections.abc import Callable, Iterable from collections.abc import Callable, Iterable
from typing import Any, Final from typing import Any, Final, TypeVar
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
@@ -16,6 +16,7 @@ from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXEC
from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.expected_conditions import AnyDriver
from selenium.webdriver.support.ui import Select, WebDriverWait from selenium.webdriver.support.ui import Select, WebDriverWait
import selenium_stealth import selenium_stealth
import webdriver_manager.core import webdriver_manager.core
@@ -39,13 +40,16 @@ class BrowserConfig:
self.profile_name:str = "" self.profile_name:str = ""
CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions) # pylint: disable=invalid-name
class SeleniumMixin: class SeleniumMixin:
def __init__(self) -> None: def __init__(self) -> None:
self.browser_config:Final[BrowserConfig] = BrowserConfig() self.browser_config:Final[BrowserConfig] = BrowserConfig()
self.webdriver:WebDriver = None self.webdriver:WebDriver = None
def _init_browser_options(self, browser_options:ChromiumOptions) -> ChromiumOptions: def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
if self.browser_config.use_private_window: if self.browser_config.use_private_window:
if isinstance(browser_options, webdriver.EdgeOptions): if isinstance(browser_options, webdriver.EdgeOptions):
browser_options.add_argument("-inprivate") browser_options.add_argument("-inprivate")
@@ -123,6 +127,7 @@ class SeleniumMixin:
webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version webdriver_manager.core.driver.get_browser_version_from_os = lambda _: chrome_major_version
# download and install matching chrome driver # download and install matching chrome driver
webdriver_mgr: DriverManager
if chrome_type == ChromeType.MSEDGE: if chrome_type == ChromeType.MSEDGE:
webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14) webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
webdriver_path = webdriver_mgr.install() webdriver_path = webdriver_mgr.install()
@@ -148,7 +153,7 @@ class SeleniumMixin:
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]: def get_browser_version(self, executable_path: str) -> tuple[ChromeType, str]: # -> [ chrome_type, chrome_version ]
match webdriver_manager.core.utils.os_name(): match webdriver_manager.core.utils.os_name():
case OSType.WIN: case OSType.WIN:
import win32api # pylint: disable=import-outside-toplevel,import-error import win32api # pylint: disable=import-outside-toplevel,import-error
@@ -187,7 +192,7 @@ class SeleniumMixin:
webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE]) webdriver_manager.core.utils.read_version_from_cmd(version_cmd, webdriver_manager.core.utils.PATTERN[ChromeType.GOOGLE])
) )
def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None: def find_compatible_browser(self) -> tuple[str, ChromeType, str] | None: # -> [ browser_path, chrome_type, chrome_version ]
match webdriver_manager.core.utils.os_name(): match webdriver_manager.core.utils.os_name():
case OSType.LINUX: case OSType.LINUX:
browser_paths = [ browser_paths = [
@@ -233,7 +238,7 @@ class SeleniumMixin:
LOG.warning("Installed browser could not be detected") LOG.warning("Installed browser could not be detected")
return None return None
def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T: def web_await(self, condition: Callable[[AnyDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
""" """
Blocks/waits until the given condition is met. Blocks/waits until the given condition is met.
@@ -305,6 +310,7 @@ class SeleniumMixin:
input_field.clear() input_field.clear()
input_field.send_keys(text) input_field.send_keys(text)
pause() pause()
return input_field
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None: def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
""" """
@@ -349,7 +355,7 @@ class SeleniumMixin:
return response return response
# pylint: enable=dangerous-default-value # pylint: enable=dangerous-default-value
def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False): def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
""" """
Smoothly scrolls the current web page down. Smoothly scrolls the current web page down.

View File

@@ -169,10 +169,10 @@ def pluralize(word:str, count:int | Sized, prefix:bool = True) -> str:
'fields' 'fields'
""" """
if not hasattr(pluralize, "inflect"): if not hasattr(pluralize, "inflect"):
pluralize.inflect = inflect.engine() pluralize.inflect = inflect.engine() # type: ignore[attr-defined] # mypy
if isinstance(count, Sized): if isinstance(count, Sized):
count = len(count) count = len(count)
plural:str = pluralize.inflect.plural_noun(word, count) plural:str = pluralize.inflect.plural_noun(word, count) # type: ignore[attr-defined] # mypy
if prefix: if prefix:
return f"{count} {plural}" return f"{count} {plural}"
return plural return plural
@@ -200,7 +200,7 @@ def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any]
return None return None
with open(filepath, encoding = "utf-8") as file: with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file) return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]: def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
@@ -214,7 +214,7 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml') raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) content = get_resource_as_string(module, filename)
return json.loads(content) if filename.endswith(".json") else YAML().load(content) return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None: def save_dict(filepath:str, content:dict[str, Any]) -> None:

View File

@@ -104,13 +104,17 @@ aggressive = 3
# https://github.com/python/mypy # https://github.com/python/mypy
##################### #####################
[tool.mypy] [tool.mypy]
# https://mypy.readthedocs.io/en/stable/config_file.html
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
python_version = "3.10" python_version = "3.10"
strict = true strict = true
disallow_untyped_calls = false
disallow_untyped_defs = true disallow_untyped_defs = true
disallow_incomplete_defs = true disallow_incomplete_defs = true
ignore_missing_imports = true ignore_missing_imports = true
show_error_codes = true show_error_codes = true
warn_unused_ignores = true warn_unused_ignores = true
verbosity = 0
##################### #####################