initial import

This commit is contained in:
sebthom
2022-01-11 13:45:20 +01:00
parent be5621d7ba
commit 1ceccc48dc
20 changed files with 2435 additions and 1 deletions

View File

@@ -0,0 +1,471 @@
"""
Copyright (C) 2022 Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import atexit, copy, getopt, glob, json, logging, os, signal, sys, textwrap, time, urllib
from datetime import datetime
from logging.handlers import RotatingFileHandler
from typing import Any, Dict, Final, Iterable
from ruamel.yaml import YAML
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from . import utils, resources
from .utils import apply_defaults, ensure, is_frozen, pause, pluralize, safe_get
from .selenium_mixin import SeleniumMixin
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot")
LOG.setLevel(logging.INFO)
try:
from .version import version as VERSION
except ModuleNotFoundError:
VERSION = "unknown"
class KleinanzeigenBot(SeleniumMixin):
def __init__(self):
super().__init__()
self.root_url = "https://www.ebay-kleinanzeigen.de"
self.config:Dict[str, Any] = {}
self.config_file_path = os.path.join(os.getcwd(), "config.yaml")
self.categories:Dict[str, str] = {}
self.file_log:logging.FileHandler = None
if is_frozen():
log_file_basename = os.path.splitext(os.path.basename(sys.executable))[0]
else:
log_file_basename = self.__module__
self.log_file_path = os.path.join(os.getcwd(), f"{log_file_basename}.log")
self.command = "help"
def __del__(self):
if self.file_log:
LOG_ROOT.removeHandler(self.file_log)
super().__del__()
def run(self, args:Iterable[str]) -> None:
self.parse_args(args)
match self.command:
case "help":
self.show_help()
case "version":
print(VERSION)
case "verify":
self.configure_file_logging()
self.load_config()
self.load_ads()
LOG.info("############################################")
LOG.info("No configuration errors found.")
LOG.info("############################################")
case "publish":
self.configure_file_logging()
self.load_config()
ads = self.load_ads()
if len(ads) == 0:
LOG.info("############################################")
LOG.info("No ads to (re-)publish found.")
LOG.info("############################################")
else:
self.create_webdriver_session()
self.login()
self.publish_ads(ads)
case _:
LOG.error("Unknown command: %s", self.command)
sys.exit(2)
def show_help(self) -> None:
if is_frozen():
exe = sys.argv[0]
else:
exe = f"python -m {os.path.relpath(os.path.join(__file__, '..'))}"
print(textwrap.dedent(f"""\
Usage: {exe} COMMAND [-v|--verbose] [--config=<PATH>] [--logfile=<PATH>]
Commands:
publish - (re-)publishes ads
verify - verifies the configuration files
--
help - displays this help (default command)
version - displays the application version
"""))
def parse_args(self, args:Iterable[str]) -> None:
try:
options, arguments = getopt.gnu_getopt(args[1:], "hv", ["help", "verbose", "logfile=", "config="]) # pylint: disable=unused-variable
except getopt.error as ex:
LOG.error(ex.msg)
LOG.error("Use --help to display available options")
sys.exit(2)
for option, value in options:
match option:
case "-h" | "--help":
self.show_help()
sys.exit(0)
case "--config":
self.config_file_path = os.path.abspath(value)
case "--logfile":
if value:
self.log_file_path = os.path.abspath(value)
else:
self.log_file_path = None
case "-v" | "--verbose":
LOG.setLevel(logging.DEBUG)
match len(arguments):
case 0:
self.command = "help"
case 1:
self.command = arguments[0]
case _:
LOG.error("More than one command given: %s", arguments)
sys.exit(2)
def configure_file_logging(self) -> None:
if not self.log_file_path:
return
if self.file_log:
return
LOG.info("Logging to [%s]...", self.log_file_path)
self.file_log = RotatingFileHandler(filename = self.log_file_path, maxBytes = 10 * 1024 * 1024, backupCount = 10, encoding = "utf-8")
self.file_log.setLevel(logging.DEBUG)
self.file_log.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s'))
LOG_ROOT.addHandler(self.file_log)
def load_ads(self, exclude_inactive = True, exclude_undue = True) -> Iterable[Dict[str, Any]]:
LOG.info("Searching for ad files...")
ad_files = set()
for file_pattern in self.config["ad_files"]:
for ad_file in glob.glob(file_pattern, root_dir = os.getcwd(), recursive = True):
ad_files.add(os.path.abspath(ad_file))
LOG.info(" -> found %s", pluralize("ad file", ad_files))
if not ad_files:
return []
descr_prefix = self.config["ad_defaults"]["description"]["prefix"] or ""
descr_suffix = self.config["ad_defaults"]["description"]["suffix"] or ""
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
ads = []
for ad_file in sorted(ad_files):
ad_cfg_orig = utils.load_dict(ad_file, "ad file")
ad_cfg = copy.deepcopy(ad_cfg_orig)
apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "")
apply_defaults(ad_cfg, ad_fields)
if exclude_inactive and not ad_cfg["active"]:
LOG.info(" -> excluding inactive ad [%s]", ad_file)
continue
if exclude_undue:
if ad_cfg["updated_on"]:
last_updated_on = datetime.fromisoformat(ad_cfg["updated_on"])
elif ad_cfg["created_on"]:
last_updated_on = datetime.fromisoformat(ad_cfg["created_on"])
if last_updated_on:
ad_age = datetime.utcnow() - last_updated_on
if ad_age.days <= ad_cfg["republication_interval"]:
LOG.info(" -> skipping. last published %d days ago. republication is only required every %s days",
ad_age.days,
ad_cfg["republication_interval"]
)
continue
ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix
# pylint: disable=cell-var-from-loop
def assert_one_of(path:str, allowed:Iterable):
ensure(safe_get(ad_cfg, *path.split(".")) in allowed, f'-> property [{path}] must be one of: {allowed} @ [{ad_file}]')
def assert_min_len(path:str, minlen:int):
ensure(len(safe_get(ad_cfg, *path.split("."))) >= minlen, f'-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]')
def assert_has_value(path:str):
ensure(safe_get(ad_cfg, *path.split(".")), f'-> property [{path}] not specified @ [{ad_file}]')
# pylint: enable=cell-var-from-loop
assert_one_of("type", ("OFFER", "WANTED"))
assert_min_len("title", 10)
assert_has_value("description")
assert_has_value("price")
assert_one_of("price_type", ("FIXED", "NEGOTIABLE", "GIVE_AWAY"))
assert_one_of("shipping_type", ("PICKUP", "SHIPPING", "NOT_APPLICABLE"))
assert_has_value("contact.name")
assert_has_value("republication_interval")
if ad_cfg["id"]:
ad_cfg["id"] = int(ad_cfg["id"])
if ad_cfg["category"]:
ad_cfg["category"] = self.categories.get(ad_cfg["category"], ad_cfg["category"])
if ad_cfg["images"]:
images = set()
for image_pattern in ad_cfg["images"]:
for image_file in glob.glob(image_pattern, root_dir = os.path.dirname(ad_file), recursive = True):
_, image_file_ext = os.path.splitext(image_file)
ensure(image_file_ext.lower() in (".gif", ".jpg", ".jpeg", ".png"), f'Unsupported image file type [{image_file}]')
if os.path.isabs(image_file):
images.add(image_file)
else:
images.add(os.path.join(os.path.dirname(ad_file), image_file))
ensure(images or not ad_cfg["images"], f'No images found for given file patterns {ad_cfg["images"]} at {os.getcwd()}')
ad_cfg["images"] = sorted(images)
ads.append((
ad_file,
ad_cfg,
ad_cfg_orig
))
LOG.info(" -> loaded %s", pluralize("ad", ads))
return ads
def load_config(self) -> None:
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
config = utils.load_dict(self.config_file_path, "config", must_exist = False)
if config is None:
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
utils.save_dict(self.config_file_path, config_defaults)
config = {}
self.config = apply_defaults(config, config_defaults)
self.categories = utils.load_dict_from_module(resources, "categories.yaml", "categories")
if self.config["categories"]:
self.categories.update(self.config["categories"])
LOG.info(" -> found %s", pluralize("category", self.categories))
ensure(self.config["login"]["username"], f'[login.username] not specified @ [{self.config_file_path}]')
ensure(self.config["login"]["password"], f'[login.password] not specified @ [{self.config_file_path}]')
self.browser_arguments = self.config["browser"]["arguments"]
self.browser_binary_location = self.config["browser"]["binary_location"]
def login(self) -> None:
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
self.web_open(f'{self.root_url}/m-einloggen.html')
# accept privacy banner
self.web_click(By.ID, 'gdpr-banner-accept')
self.web_input(By.ID, 'login-email', self.config["login"]["username"])
self.web_input(By.ID, 'login-password', self.config["login"]["password"])
self.handle_captcha_if_present("login-recaptcha", "but DON'T click 'Einloggen'.")
self.web_click(By.ID, 'login-submit')
pause(800, 3000)
def handle_captcha_if_present(self, captcha_element_id:str, msg:str) -> None:
try:
self.web_click(By.XPATH, f'//*[@id="{captcha_element_id}"]')
except NoSuchElementException:
return
LOG.warning("############################################")
LOG.warning("# Captcha present! Please solve and close the captcha, %s", msg)
LOG.warning("############################################")
self.webdriver.switch_to.frame(self.web_find(By.CSS_SELECTOR, f'#{captcha_element_id} iframe'))
self.web_await(lambda _: self.webdriver.find_element(By.ID, 'recaptcha-anchor').get_attribute('aria-checked') == "true", timeout = 5 * 60)
self.webdriver.switch_to.default_content()
def delete_ad(self, ad_cfg: Dict[str, Any]) -> bool:
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
csrf_token_elem = self.web_find(By.XPATH, '//meta[@name="_csrf"]')
csrf_token = csrf_token_elem.get_attribute("content")
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
for published_ad in published_ads:
published_ad_id = int(published_ad.get("id", -1))
published_ad_title = published_ad.get("title", "")
if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title:
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
self.web_request(
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
method = "POST",
headers = {'x-csrf-token': csrf_token}
)
pause(1500, 3000)
ad_cfg["id"] = None
return True
def publish_ads(self, ad_cfgs:Iterable[Dict[str, Any]]) -> None:
count = 0
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
self.publish_ad(ad_file, ad_cfg, ad_cfg_orig)
pause(3000, 5000)
LOG.info("############################################")
LOG.info("(Re-)published %s", pluralize("ad", count))
LOG.info("############################################")
def publish_ad(self, ad_file, ad_cfg: Dict[str, Any], ad_cfg_orig: Dict[str, Any]) -> None:
self.delete_ad(ad_cfg)
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
if LOG.isEnabledFor(logging.DEBUG):
LOG.debug(" -> effective ad meta:")
YAML().dump(ad_cfg, sys.stdout)
self.web_open(f'{self.root_url}/p-anzeige-aufgeben-schritt2.html')
if ad_cfg["type"] == "WANTED":
self.web_click(By.ID, 'adType2')
#############################
# set title
#############################
self.web_input(By.ID, 'postad-title', ad_cfg["title"])
#############################
# set category
#############################
# trigger and wait for automatic category detection
self.web_click(By.ID, 'pstad-price')
try:
self.web_find(By.XPATH, "//*[@id='postad-category-path'][text()]")
is_category_auto_selected = True
except:
is_category_auto_selected = False
if ad_cfg["category"]:
self.web_click(By.ID, 'pstad-lnk-chngeCtgry')
self.web_find(By.ID, 'postad-step1-sbmt')
category_url = f'{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg["category"]}'
self.web_open(category_url)
self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
else:
ensure(is_category_auto_selected, f'No category specified in [{ad_file}] and automatic category detection failed')
#############################
# set price
#############################
self.web_select(By.XPATH, "//select[@id='priceType']", ad_cfg["price_type"])
if ad_cfg["price_type"] != 'GIVE_AWAY':
self.web_input(By.ID, 'pstad-price', ad_cfg["price"])
#############################
# set description
#############################
self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
#############################
# set contact zipcode
#############################
if ad_cfg["contact"]["zipcode"]:
self.web_input(By.ID, 'pstad-zip', ad_cfg["contact"]["zipcode"])
#############################
# set contact street
#############################
if ad_cfg["contact"]["street"]:
self.web_input(By.ID, 'pstad-street', ad_cfg["contact"]["street"])
#############################
# set contact name
#############################
if ad_cfg["contact"]["name"]:
self.web_input(By.ID, 'postad-contactname', ad_cfg["contact"]["name"])
#############################
# set contact phone
#############################
if ad_cfg["contact"]["phone"]:
self.web_input(By.ID, 'postad-phonenumber', ad_cfg["contact"]["phone"])
#############################
# upload images
#############################
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload = self.web_find(By.XPATH, "//input[@type='file']")
def count_uploaded_images():
return len(self.webdriver.find_elements(By.CLASS_NAME, "imagebox-new-thumbnail"))
for image in ad_cfg["images"]:
LOG.info(" -> uploading image [%s]", image)
previous_uploaded_images_count = count_uploaded_images()
image_upload.send_keys(image)
start_at = time.time()
while previous_uploaded_images_count == count_uploaded_images() and time.time() - start_at < 60:
print(".", end = '', flush = True)
time.sleep(1)
print(flush = True)
ensure(previous_uploaded_images_count < count_uploaded_images(), f"Couldn't upload image [{image}] within 60 seconds")
LOG.debug(" => uploaded image within %i seconds", time.time() - start_at)
#############################
# submit
#############################
self.web_click(By.ID, 'pstad-submit')
self.web_await(EC.url_contains("p-anzeige-aufgeben-bestaetigung.html?adId="), 20)
ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat()
if not ad_cfg_orig["created_on"] and not ad_cfg_orig["id"]:
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
# extract the ad id from the URL's query parameter
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
ad_id = int(current_url_query_params.get('adId', None)[0])
ad_cfg_orig["id"] = ad_id
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
utils.save_dict(ad_file, ad_cfg_orig)
#############################
# main entry point
#############################
def main(args:Iterable[str]):
if "version" not in args:
print(textwrap.dedent(r"""
_ _ _ _ _ _
| | _| | ___(_)_ __ __ _ _ __ _______(_) __ _ ___ _ __ | |__ ___ | |_
| |/ / |/ _ \ | '_ \ / _` | '_ \|_ / _ \ |/ _` |/ _ \ '_ \ ____| '_ \ / _ \| __|
| <| | __/ | | | | (_| | | | |/ / __/ | (_| | __/ | | |____| |_) | (_) | |_
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|___/
https://github.com/kleinanzeigen-bot
"""), flush = True)
utils.configure_console_logging()
signal.signal(signal.SIGINT, utils.on_sigint) # capture CTRL+C
sys.excepthook = utils.on_exception
atexit.register(utils.on_exit)
KleinanzeigenBot().run(args)
if __name__ == '__main__':
utils.configure_console_logging()
LOG.error("Direct execution not supported. Use 'python -m kleinanzeigen_bot'")
sys.exit(1)

View File

@@ -0,0 +1,8 @@
"""
Copyright (C) 2022 Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import sys
import kleinanzeigen_bot
kleinanzeigen_bot.main(sys.argv)

View File

View File

@@ -0,0 +1,18 @@
active:
type:
title:
description:
category:
price:
price_type:
shipping_type:
images: []
contact:
name:
street:
zipcode:
phone:
republication_interval:
id:
created_on:
updated_on:

View File

@@ -0,0 +1,17 @@
# Elektronik
Notebooks: 161/27
PCs: 161/228
PC-Zubehör: 161/225/sonstiges
Software: 161/225/software
Telefone: 161/173/telefone
# Freizeit
Sammeln: 185/234/sonstige
# Mode & Beauty
Gesundheit: 153/224/gesundheit
# Sonstiges
Tauschen: 272/273
Verleihen: 272/274
Verschenken: 272/192

View File

@@ -0,0 +1,35 @@
ad_files:
- "**/ad_*.json"
- "**/ad_*.yml"
- "**/ad_*.yaml"
ad_defaults:
active: true
type: OFFER
description:
prefix:
suffix:
price_type: NEGOTIABLE
shipping_type: SHIPPING
contact:
name:
street:
zipcode:
phone:
republication_interval: 7
categories: []
browser:
# https://peter.sh/experiments/chromium-command-line-switches/
arguments:
# https://stackoverflow.com/a/50725918/5116073
- --disable-dev-shm-usage
- --no-sandbox
# --headless
# --start-maximized
binary_location:
login:
username:
password:

View File

@@ -0,0 +1,261 @@
"""
Copyright (C) 2022 Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import logging, os, shutil, sys, tempfile
from typing import Any, Callable, Dict, Final, Iterable, Tuple
from importlib.resources import read_text as get_resource_as_string
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService, DEFAULT_EXECUTEABLE_PATH as DEFAULT_CHROMEDRIVER_PATH
from selenium.webdriver.chromium.webdriver import ChromiumDriver
from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXECUTEABLE_PATH as DEFAULT_EDGEDRIVER_PATH
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
import selenium_stealth
import webdriver_manager.utils as ChromeDriverManagerUtils
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from webdriver_manager.utils import ChromeType
from .utils import ensure, is_frozen, pause
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
class SeleniumMixin:
def __init__(self):
self.browser_arguments:Iterable[str] = []
self.browser_binary_location:str = None
self.webdriver:WebDriver = None
def __del__(self):
if getattr(self, 'cacertfile', None):
os.remove(self.cacertfile)
def create_webdriver_session(self) -> None:
LOG.info("Creating WebDriver session...")
def init_browser_options(browser_options):
browser_options.add_argument("--disable-crash-reporter")
browser_options.add_argument("--no-first-run")
browser_options.add_argument("--no-service-autorun")
for chrome_option in self.browser_arguments:
LOG.info(" -> Custom chrome argument: %s", chrome_option)
browser_options.add_argument(chrome_option)
browser_options.add_experimental_option('excludeSwitches', ['enable-automation'])
browser_options.add_experimental_option('useAutomationExtension', False)
browser_options.add_experimental_option("prefs", {
"credentials_enable_service": False,
"profile.password_manager_enabled": False,
"devtools.preferences.currentDockState": "\"bottom\""
})
if self.browser_binary_location:
browser_options.binary_location = self.browser_binary_location
LOG.info(" -> Chrome binary location: %s", self.browser_binary_location)
return browser_options
# if run via py2exe fix resource lookup
if is_frozen():
import pathlib # pylint: disable=import-outside-toplevel
if not os.getenv("REQUESTS_CA_BUNDLE", None) or not os.path.exists(os.getenv("REQUESTS_CA_BUNDLE", None)):
with tempfile.NamedTemporaryFile(delete = False) as tmp:
LOG.debug("Writing cacert file to [%s]...", tmp.name)
tmp.write(get_resource_as_string("certifi", "cacert.pem").encode('utf-8'))
self.cacertfile = tmp.name
os.environ['REQUESTS_CA_BUNDLE'] = self.cacertfile
read_text_orig = pathlib.Path.read_text
def read_text_new(self, encoding = None, errors = None):
path = str(self)
if "selenium_stealth" in path:
return get_resource_as_string("selenium_stealth", self.name)
return read_text_orig(self, encoding, errors)
pathlib.Path.read_text = read_text_new
# check if a chrome driver is present already
if shutil.which(DEFAULT_CHROMEDRIVER_PATH):
self.webdriver = webdriver.Chrome(options = init_browser_options(webdriver.ChromeOptions()))
elif shutil.which(DEFAULT_EDGEDRIVER_PATH):
self.webdriver = webdriver.ChromiumEdge(options = init_browser_options(webdriver.EdgeOptions()))
else:
# determine browser major version
if self.browser_binary_location:
chrome_type, chrome_version = self.get_browser_version(self.browser_binary_location)
else:
chrome_type, chrome_version = self.get_browser_version_from_os()
chrome_major_version = chrome_version.split(".", 1)[0]
# download and install matching chrome driver
if chrome_type == ChromeType.MSEDGE:
webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
webdriver_mgr.driver.browser_version = chrome_major_version
webdriver_path = webdriver_mgr.install()
self.webdriver = webdriver.ChromiumEdge(service = EdgeService(webdriver_path), options = init_browser_options(webdriver.EdgeOptions()))
else:
webdriver_mgr = ChromeDriverManager(chrome_type = chrome_type, cache_valid_range = 14)
webdriver_mgr.driver.browser_version = chrome_major_version
webdriver_path = webdriver_mgr.install()
self.webdriver = webdriver.Chrome(service = ChromeService(webdriver_path), options = init_browser_options(webdriver.ChromeOptions()))
# workaround to support Edge, see https://github.com/diprajpatra/selenium-stealth/pull/25
selenium_stealth.Driver = ChromiumDriver
selenium_stealth.stealth(self.webdriver, # https://github.com/diprajpatra/selenium-stealth#args
languages = ("de-DE", "de", "en-US", "en"),
vendor = "Google Inc.",
platform = "Win32",
webgl_vendor = "Intel Inc.",
renderer = "Intel Iris OpenGL Engine",
fix_hairline = True,
)
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
def get_browser_version(self, executable_path: str) -> Tuple[ChromeType, str]:
if sys.platform == "win32":
import win32api # pylint: disable=import-outside-toplevel,import-error
# pylint: disable=no-member
lang, codepage = win32api.GetFileVersionInfo(executable_path, "\\VarFileInfo\\Translation")[0]
product_name = win32api.GetFileVersionInfo(executable_path, f"\\StringFileInfo\\{lang:04X}{codepage:04X}\\ProductName")
product_version = win32api.GetFileVersionInfo(executable_path, f"\\StringFileInfo\\{lang:04X}{codepage:04X}\\ProductVersion")
# pylint: enable=no-member
match product_name:
case "Chromium":
return (ChromeType.CHROMIUM, product_version)
case "Microsoft Edge":
return (ChromeType.MSEDGE, product_version)
case _: # "Google Chrome"
return (ChromeType.GOOGLE, product_version)
if sys.platform.startswith("linux"):
cmd = ChromeDriverManagerUtils.linux_browser_apps_to_cmd(executable_path)
else:
cmd = executable_path + " --version"
version = ChromeDriverManagerUtils.read_version_from_cmd(cmd, r'\d+\.\d+\.\d+')
filename = os.path.basename(executable_path).lower()
if "chromium" in filename:
return (ChromeType.CHROMIUM, version)
if "edge" in filename:
return (ChromeType.MSEDGE, version)
return (ChromeType.GOOGLE, version)
def get_browser_version_from_os(self) -> Tuple[ChromeType, str]:
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.CHROMIUM)
if version != "UNKNOWN":
return (ChromeType.CHROMIUM, version)
LOG.debug("Chromium not found")
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.GOOGLE)
if version != "UNKNOWN":
return (ChromeType.GOOGLE, version)
LOG.debug("Google Chrome not found")
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.MSEDGE)
if version != "UNKNOWN":
return (ChromeType.MSEDGE, version)
LOG.debug("Microsoft Edge not found")
return (None, None)
def web_await(self, condition: Callable[[WebDriver], WebElement], timeout:int = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
try:
return WebDriverWait(self.webdriver, timeout).until(condition)
except TimeoutException as ex:
raise NoSuchElementException from ex
def web_click(self, selector_type:By, selector_value:str, timeout:int = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
elem = self.web_await(EC.element_to_be_clickable((selector_type, selector_value)), timeout)
elem.click()
pause()
return elem
def web_execute(self, javascript:str) -> Any:
"""
:return: The command's JSON response
"""
return self.webdriver.execute_script(javascript)
def web_find(self, selector_type:By, selector_value:str, timeout:int = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
return self.web_await(EC.presence_of_element_located((selector_type, selector_value)), timeout)
def web_input(self, selector_type:By, selector_value:str, text:str, timeout:int = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
input_field = self.web_find(selector_type, selector_value, timeout)
input_field.clear()
input_field.send_keys(text)
pause()
def web_open(self, url, timeout = 10, reload_if_already_open = False) -> None:
LOG.debug(" -> Opening [%s]...", url)
if not reload_if_already_open and url == self.webdriver.current_url:
LOG.debug(" => skipping, [%s] is already open", url)
return
self.webdriver.get(url)
WebDriverWait(self.webdriver, timeout).until(lambda _: self.web_execute("return document.readyState") == "complete")
# pylint: disable=dangerous-default-value
def web_request(self, url:str, method:str = "GET", valid_response_codes:Iterable[int] = [200], headers:Dict[str, str] = None) -> Dict[str, Any]:
method = method.upper()
LOG.debug(" -> HTTP %s [%s]...", method, url)
response = self.webdriver.execute_async_script(f"""
var callback = arguments[arguments.length - 1];
fetch("{url}", {{
method: "{method}",
redirect: "follow",
headers: {headers or {}}
}})
.then(response => response.text().then(responseText => {{
headers = {{}};
response.headers.forEach((v, k) => headers[k] = v);
callback({{
"statusCode": response.status,
"statusMessage": response.statusText,
"headers": headers,
"content": responseText
}})
}}))
""")
ensure(
response["statusCode"] in valid_response_codes,
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
)
return response
# pylint: enable=dangerous-default-value
def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
elem = self.web_await(EC.element_to_be_clickable((selector_type, selector_value)), timeout)
Select(elem).select_by_value(selected_value)
pause()
return elem

183
kleinanzeigen_bot/utils.py Normal file
View File

@@ -0,0 +1,183 @@
"""
Copyright (C) 2022 Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import copy, json, logging, os, secrets, sys, traceback, time
from importlib.resources import read_text as get_resource_as_string
from types import ModuleType
from typing import Any, Dict, Final, Iterable, Optional, Union
import coloredlogs, inflect
from ruamel.yaml import YAML
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.utils")
def ensure(condition:bool, error_message:str) -> None:
"""
:raises AssertionError: if condition is False
"""
if not condition:
raise AssertionError(error_message)
def is_frozen() -> bool:
"""
>>> is_frozen()
False
"""
return getattr(sys, 'frozen', False)
def apply_defaults(target:Dict[Any, Any], defaults:Dict[Any, Any], ignore = lambda _k, _v: False, override = lambda _k, _v: False) -> Dict[Any, Any]:
"""
>>> apply_defaults({}, {"foo": "bar"})
{'foo': 'bar'}
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
{'foo': 'foo'}
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
{'foo': ''}
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
{}
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': 'bar'}
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': None}
"""
for key, default_value in defaults.items():
if key in target:
if isinstance(target[key], Dict) and isinstance(default_value, Dict):
apply_defaults(target[key], default_value, ignore = ignore)
elif override(key, target[key]):
target[key] = copy.deepcopy(default_value)
else:
if not ignore(key, default_value):
target[key] = copy.deepcopy(default_value)
return target
def safe_get(a_map:Dict[Any, Any], *keys:str) -> Any:
"""
>>> safe_get({"foo": {}}, "foo", "bar") is None
True
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
'some_value'
"""
if a_map:
for key in keys:
try:
a_map = a_map[key]
except (KeyError, TypeError):
return None
return a_map
def configure_console_logging() -> None:
stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG)
stdout_log.setFormatter(coloredlogs.ColoredFormatter('[%(levelname)s] %(message)s'))
stdout_log.addFilter(type("", (logging.Filter,), {
"filter": lambda rec: rec.levelno <= logging.INFO
}))
LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(coloredlogs.ColoredFormatter('[%(levelname)s] %(message)s'))
LOG_ROOT.addHandler(stderr_log)
def on_exception(ex_type, ex_value, ex_traceback) -> None:
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
return
if LOG.isEnabledFor(logging.DEBUG) or isinstance(ex_value, (AttributeError, ImportError, NameError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
def on_exit() -> None:
for handler in LOG_ROOT.handlers:
handler.flush()
def on_sigint(_sig:int, _frame) -> None:
LOG.warning('Aborted on user request.')
sys.exit(0)
def pause(min_ms:int = 200, max_ms:int = None) -> None:
duration = secrets.randbelow((max_ms is None and 2000 or max_ms) - min_ms) + min_ms
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
time.sleep(duration / 1000)
def pluralize(word:str, count:Union[int, Iterable], prefix = True):
"""
>>> pluralize("field", 1)
'1 field'
>>> pluralize("field", 2)
'2 fields'
>>> pluralize("field", 2, prefix = False)
'fields'
"""
if not hasattr(pluralize, "inflect"):
pluralize.inflect = inflect.engine()
if isinstance(count, Iterable):
count = len(count)
plural = pluralize.inflect.plural_noun(word, count)
if prefix:
return f'{count} {plural}'
return plural
def load_dict(filepath:str, content_label:str = "", must_exist = True) -> Optional[Dict[str, Any]]:
filepath = os.path.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + " from " or "", filepath)
_, file_ext = os.path.splitext(filepath)
if not file_ext in [ ".json", ".yaml" , ".yml" ]:
raise ValueError(f'Unsupported file type. The file name "{filepath}" must end with *.json, *.yaml, or *.yml')
if not os.path.exists(filepath):
if must_exist:
raise FileNotFoundError(filepath)
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file)
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "", must_exist = True) -> Optional[Dict[str, Any]]:
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
_, file_ext = os.path.splitext(filename)
if not file_ext in [ ".json", ".yaml" , ".yml" ]:
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
try:
content = get_resource_as_string(module, filename)
except FileNotFoundError as ex:
if must_exist:
raise ex
return None
return json.loads(content) if filename.endswith(".json") else YAML().load(content)
def save_dict(filepath:str, content:Dict[str, Any]) -> None:
filepath = os.path.abspath(filepath)
LOG.info("Saving [%s]...", filepath)
with open(filepath, "w", encoding = "utf-8") as file:
if filepath.endswith(".json"):
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
else:
yaml = YAML()
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False
yaml.dump(content, file)