mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
initial import
This commit is contained in:
471
kleinanzeigen_bot/__init__.py
Normal file
471
kleinanzeigen_bot/__init__.py
Normal file
@@ -0,0 +1,471 @@
|
||||
"""
|
||||
Copyright (C) 2022 Sebastian Thomschke and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import atexit, copy, getopt, glob, json, logging, os, signal, sys, textwrap, time, urllib
|
||||
from datetime import datetime
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from typing import Any, Dict, Final, Iterable
|
||||
|
||||
from ruamel.yaml import YAML
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
from . import utils, resources
|
||||
from .utils import apply_defaults, ensure, is_frozen, pause, pluralize, safe_get
|
||||
from .selenium_mixin import SeleniumMixin
|
||||
|
||||
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot")
|
||||
LOG.setLevel(logging.INFO)
|
||||
|
||||
try:
|
||||
from .version import version as VERSION
|
||||
except ModuleNotFoundError:
|
||||
VERSION = "unknown"
|
||||
|
||||
|
||||
class KleinanzeigenBot(SeleniumMixin):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.root_url = "https://www.ebay-kleinanzeigen.de"
|
||||
|
||||
self.config:Dict[str, Any] = {}
|
||||
self.config_file_path = os.path.join(os.getcwd(), "config.yaml")
|
||||
|
||||
self.categories:Dict[str, str] = {}
|
||||
|
||||
self.file_log:logging.FileHandler = None
|
||||
if is_frozen():
|
||||
log_file_basename = os.path.splitext(os.path.basename(sys.executable))[0]
|
||||
else:
|
||||
log_file_basename = self.__module__
|
||||
self.log_file_path = os.path.join(os.getcwd(), f"{log_file_basename}.log")
|
||||
|
||||
self.command = "help"
|
||||
|
||||
def __del__(self):
|
||||
if self.file_log:
|
||||
LOG_ROOT.removeHandler(self.file_log)
|
||||
super().__del__()
|
||||
|
||||
def run(self, args:Iterable[str]) -> None:
|
||||
self.parse_args(args)
|
||||
match self.command:
|
||||
case "help":
|
||||
self.show_help()
|
||||
case "version":
|
||||
print(VERSION)
|
||||
case "verify":
|
||||
self.configure_file_logging()
|
||||
self.load_config()
|
||||
self.load_ads()
|
||||
LOG.info("############################################")
|
||||
LOG.info("No configuration errors found.")
|
||||
LOG.info("############################################")
|
||||
case "publish":
|
||||
self.configure_file_logging()
|
||||
self.load_config()
|
||||
ads = self.load_ads()
|
||||
if len(ads) == 0:
|
||||
LOG.info("############################################")
|
||||
LOG.info("No ads to (re-)publish found.")
|
||||
LOG.info("############################################")
|
||||
else:
|
||||
self.create_webdriver_session()
|
||||
self.login()
|
||||
self.publish_ads(ads)
|
||||
case _:
|
||||
LOG.error("Unknown command: %s", self.command)
|
||||
sys.exit(2)
|
||||
|
||||
def show_help(self) -> None:
|
||||
if is_frozen():
|
||||
exe = sys.argv[0]
|
||||
else:
|
||||
exe = f"python -m {os.path.relpath(os.path.join(__file__, '..'))}"
|
||||
|
||||
print(textwrap.dedent(f"""\
|
||||
Usage: {exe} COMMAND [-v|--verbose] [--config=<PATH>] [--logfile=<PATH>]
|
||||
|
||||
Commands:
|
||||
publish - (re-)publishes ads
|
||||
verify - verifies the configuration files
|
||||
--
|
||||
help - displays this help (default command)
|
||||
version - displays the application version
|
||||
"""))
|
||||
|
||||
def parse_args(self, args:Iterable[str]) -> None:
|
||||
try:
|
||||
options, arguments = getopt.gnu_getopt(args[1:], "hv", ["help", "verbose", "logfile=", "config="]) # pylint: disable=unused-variable
|
||||
except getopt.error as ex:
|
||||
LOG.error(ex.msg)
|
||||
LOG.error("Use --help to display available options")
|
||||
sys.exit(2)
|
||||
|
||||
for option, value in options:
|
||||
match option:
|
||||
case "-h" | "--help":
|
||||
self.show_help()
|
||||
sys.exit(0)
|
||||
case "--config":
|
||||
self.config_file_path = os.path.abspath(value)
|
||||
case "--logfile":
|
||||
if value:
|
||||
self.log_file_path = os.path.abspath(value)
|
||||
else:
|
||||
self.log_file_path = None
|
||||
case "-v" | "--verbose":
|
||||
LOG.setLevel(logging.DEBUG)
|
||||
|
||||
match len(arguments):
|
||||
case 0:
|
||||
self.command = "help"
|
||||
case 1:
|
||||
self.command = arguments[0]
|
||||
case _:
|
||||
LOG.error("More than one command given: %s", arguments)
|
||||
sys.exit(2)
|
||||
|
||||
def configure_file_logging(self) -> None:
|
||||
if not self.log_file_path:
|
||||
return
|
||||
if self.file_log:
|
||||
return
|
||||
|
||||
LOG.info("Logging to [%s]...", self.log_file_path)
|
||||
self.file_log = RotatingFileHandler(filename = self.log_file_path, maxBytes = 10 * 1024 * 1024, backupCount = 10, encoding = "utf-8")
|
||||
self.file_log.setLevel(logging.DEBUG)
|
||||
self.file_log.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s'))
|
||||
LOG_ROOT.addHandler(self.file_log)
|
||||
|
||||
def load_ads(self, exclude_inactive = True, exclude_undue = True) -> Iterable[Dict[str, Any]]:
|
||||
LOG.info("Searching for ad files...")
|
||||
|
||||
ad_files = set()
|
||||
for file_pattern in self.config["ad_files"]:
|
||||
for ad_file in glob.glob(file_pattern, root_dir = os.getcwd(), recursive = True):
|
||||
ad_files.add(os.path.abspath(ad_file))
|
||||
LOG.info(" -> found %s", pluralize("ad file", ad_files))
|
||||
if not ad_files:
|
||||
return []
|
||||
|
||||
descr_prefix = self.config["ad_defaults"]["description"]["prefix"] or ""
|
||||
descr_suffix = self.config["ad_defaults"]["description"]["suffix"] or ""
|
||||
|
||||
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
|
||||
ads = []
|
||||
for ad_file in sorted(ad_files):
|
||||
|
||||
ad_cfg_orig = utils.load_dict(ad_file, "ad file")
|
||||
ad_cfg = copy.deepcopy(ad_cfg_orig)
|
||||
apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "")
|
||||
apply_defaults(ad_cfg, ad_fields)
|
||||
|
||||
if exclude_inactive and not ad_cfg["active"]:
|
||||
LOG.info(" -> excluding inactive ad [%s]", ad_file)
|
||||
continue
|
||||
|
||||
if exclude_undue:
|
||||
if ad_cfg["updated_on"]:
|
||||
last_updated_on = datetime.fromisoformat(ad_cfg["updated_on"])
|
||||
elif ad_cfg["created_on"]:
|
||||
last_updated_on = datetime.fromisoformat(ad_cfg["created_on"])
|
||||
|
||||
if last_updated_on:
|
||||
ad_age = datetime.utcnow() - last_updated_on
|
||||
if ad_age.days <= ad_cfg["republication_interval"]:
|
||||
LOG.info(" -> skipping. last published %d days ago. republication is only required every %s days",
|
||||
ad_age.days,
|
||||
ad_cfg["republication_interval"]
|
||||
)
|
||||
continue
|
||||
|
||||
ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix
|
||||
|
||||
# pylint: disable=cell-var-from-loop
|
||||
def assert_one_of(path:str, allowed:Iterable):
|
||||
ensure(safe_get(ad_cfg, *path.split(".")) in allowed, f'-> property [{path}] must be one of: {allowed} @ [{ad_file}]')
|
||||
|
||||
def assert_min_len(path:str, minlen:int):
|
||||
ensure(len(safe_get(ad_cfg, *path.split("."))) >= minlen, f'-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]')
|
||||
|
||||
def assert_has_value(path:str):
|
||||
ensure(safe_get(ad_cfg, *path.split(".")), f'-> property [{path}] not specified @ [{ad_file}]')
|
||||
# pylint: enable=cell-var-from-loop
|
||||
|
||||
assert_one_of("type", ("OFFER", "WANTED"))
|
||||
assert_min_len("title", 10)
|
||||
assert_has_value("description")
|
||||
assert_has_value("price")
|
||||
assert_one_of("price_type", ("FIXED", "NEGOTIABLE", "GIVE_AWAY"))
|
||||
assert_one_of("shipping_type", ("PICKUP", "SHIPPING", "NOT_APPLICABLE"))
|
||||
assert_has_value("contact.name")
|
||||
assert_has_value("republication_interval")
|
||||
|
||||
if ad_cfg["id"]:
|
||||
ad_cfg["id"] = int(ad_cfg["id"])
|
||||
|
||||
if ad_cfg["category"]:
|
||||
ad_cfg["category"] = self.categories.get(ad_cfg["category"], ad_cfg["category"])
|
||||
|
||||
if ad_cfg["images"]:
|
||||
images = set()
|
||||
for image_pattern in ad_cfg["images"]:
|
||||
for image_file in glob.glob(image_pattern, root_dir = os.path.dirname(ad_file), recursive = True):
|
||||
_, image_file_ext = os.path.splitext(image_file)
|
||||
ensure(image_file_ext.lower() in (".gif", ".jpg", ".jpeg", ".png"), f'Unsupported image file type [{image_file}]')
|
||||
if os.path.isabs(image_file):
|
||||
images.add(image_file)
|
||||
else:
|
||||
images.add(os.path.join(os.path.dirname(ad_file), image_file))
|
||||
ensure(images or not ad_cfg["images"], f'No images found for given file patterns {ad_cfg["images"]} at {os.getcwd()}')
|
||||
ad_cfg["images"] = sorted(images)
|
||||
|
||||
ads.append((
|
||||
ad_file,
|
||||
ad_cfg,
|
||||
ad_cfg_orig
|
||||
))
|
||||
|
||||
LOG.info(" -> loaded %s", pluralize("ad", ads))
|
||||
return ads
|
||||
|
||||
def load_config(self) -> None:
|
||||
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
|
||||
config = utils.load_dict(self.config_file_path, "config", must_exist = False)
|
||||
|
||||
if config is None:
|
||||
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
|
||||
utils.save_dict(self.config_file_path, config_defaults)
|
||||
config = {}
|
||||
|
||||
self.config = apply_defaults(config, config_defaults)
|
||||
|
||||
self.categories = utils.load_dict_from_module(resources, "categories.yaml", "categories")
|
||||
if self.config["categories"]:
|
||||
self.categories.update(self.config["categories"])
|
||||
LOG.info(" -> found %s", pluralize("category", self.categories))
|
||||
|
||||
ensure(self.config["login"]["username"], f'[login.username] not specified @ [{self.config_file_path}]')
|
||||
ensure(self.config["login"]["password"], f'[login.password] not specified @ [{self.config_file_path}]')
|
||||
|
||||
self.browser_arguments = self.config["browser"]["arguments"]
|
||||
self.browser_binary_location = self.config["browser"]["binary_location"]
|
||||
|
||||
def login(self) -> None:
|
||||
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
|
||||
self.web_open(f'{self.root_url}/m-einloggen.html')
|
||||
|
||||
# accept privacy banner
|
||||
self.web_click(By.ID, 'gdpr-banner-accept')
|
||||
|
||||
self.web_input(By.ID, 'login-email', self.config["login"]["username"])
|
||||
self.web_input(By.ID, 'login-password', self.config["login"]["password"])
|
||||
|
||||
self.handle_captcha_if_present("login-recaptcha", "but DON'T click 'Einloggen'.")
|
||||
|
||||
self.web_click(By.ID, 'login-submit')
|
||||
|
||||
pause(800, 3000)
|
||||
|
||||
def handle_captcha_if_present(self, captcha_element_id:str, msg:str) -> None:
|
||||
try:
|
||||
self.web_click(By.XPATH, f'//*[@id="{captcha_element_id}"]')
|
||||
except NoSuchElementException:
|
||||
return
|
||||
|
||||
LOG.warning("############################################")
|
||||
LOG.warning("# Captcha present! Please solve and close the captcha, %s", msg)
|
||||
LOG.warning("############################################")
|
||||
self.webdriver.switch_to.frame(self.web_find(By.CSS_SELECTOR, f'#{captcha_element_id} iframe'))
|
||||
self.web_await(lambda _: self.webdriver.find_element(By.ID, 'recaptcha-anchor').get_attribute('aria-checked') == "true", timeout = 5 * 60)
|
||||
self.webdriver.switch_to.default_content()
|
||||
|
||||
def delete_ad(self, ad_cfg: Dict[str, Any]) -> bool:
|
||||
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
||||
|
||||
self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||||
csrf_token_elem = self.web_find(By.XPATH, '//meta[@name="_csrf"]')
|
||||
csrf_token = csrf_token_elem.get_attribute("content")
|
||||
|
||||
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
|
||||
|
||||
for published_ad in published_ads:
|
||||
published_ad_id = int(published_ad.get("id", -1))
|
||||
published_ad_title = published_ad.get("title", "")
|
||||
if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title:
|
||||
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
|
||||
self.web_request(
|
||||
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
|
||||
method = "POST",
|
||||
headers = {'x-csrf-token': csrf_token}
|
||||
)
|
||||
pause(1500, 3000)
|
||||
|
||||
ad_cfg["id"] = None
|
||||
return True
|
||||
|
||||
def publish_ads(self, ad_cfgs:Iterable[Dict[str, Any]]) -> None:
|
||||
count = 0
|
||||
|
||||
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
|
||||
count += 1
|
||||
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
|
||||
self.publish_ad(ad_file, ad_cfg, ad_cfg_orig)
|
||||
pause(3000, 5000)
|
||||
|
||||
LOG.info("############################################")
|
||||
LOG.info("(Re-)published %s", pluralize("ad", count))
|
||||
LOG.info("############################################")
|
||||
|
||||
def publish_ad(self, ad_file, ad_cfg: Dict[str, Any], ad_cfg_orig: Dict[str, Any]) -> None:
|
||||
self.delete_ad(ad_cfg)
|
||||
|
||||
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
|
||||
|
||||
if LOG.isEnabledFor(logging.DEBUG):
|
||||
LOG.debug(" -> effective ad meta:")
|
||||
YAML().dump(ad_cfg, sys.stdout)
|
||||
|
||||
self.web_open(f'{self.root_url}/p-anzeige-aufgeben-schritt2.html')
|
||||
|
||||
if ad_cfg["type"] == "WANTED":
|
||||
self.web_click(By.ID, 'adType2')
|
||||
|
||||
#############################
|
||||
# set title
|
||||
#############################
|
||||
self.web_input(By.ID, 'postad-title', ad_cfg["title"])
|
||||
|
||||
#############################
|
||||
# set category
|
||||
#############################
|
||||
# trigger and wait for automatic category detection
|
||||
self.web_click(By.ID, 'pstad-price')
|
||||
try:
|
||||
self.web_find(By.XPATH, "//*[@id='postad-category-path'][text()]")
|
||||
is_category_auto_selected = True
|
||||
except:
|
||||
is_category_auto_selected = False
|
||||
|
||||
if ad_cfg["category"]:
|
||||
self.web_click(By.ID, 'pstad-lnk-chngeCtgry')
|
||||
self.web_find(By.ID, 'postad-step1-sbmt')
|
||||
|
||||
category_url = f'{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg["category"]}'
|
||||
self.web_open(category_url)
|
||||
self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
|
||||
else:
|
||||
ensure(is_category_auto_selected, f'No category specified in [{ad_file}] and automatic category detection failed')
|
||||
|
||||
#############################
|
||||
# set price
|
||||
#############################
|
||||
self.web_select(By.XPATH, "//select[@id='priceType']", ad_cfg["price_type"])
|
||||
if ad_cfg["price_type"] != 'GIVE_AWAY':
|
||||
self.web_input(By.ID, 'pstad-price', ad_cfg["price"])
|
||||
|
||||
#############################
|
||||
# set description
|
||||
#############################
|
||||
self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
|
||||
|
||||
#############################
|
||||
# set contact zipcode
|
||||
#############################
|
||||
if ad_cfg["contact"]["zipcode"]:
|
||||
self.web_input(By.ID, 'pstad-zip', ad_cfg["contact"]["zipcode"])
|
||||
|
||||
#############################
|
||||
# set contact street
|
||||
#############################
|
||||
if ad_cfg["contact"]["street"]:
|
||||
self.web_input(By.ID, 'pstad-street', ad_cfg["contact"]["street"])
|
||||
|
||||
#############################
|
||||
# set contact name
|
||||
#############################
|
||||
if ad_cfg["contact"]["name"]:
|
||||
self.web_input(By.ID, 'postad-contactname', ad_cfg["contact"]["name"])
|
||||
|
||||
#############################
|
||||
# set contact phone
|
||||
#############################
|
||||
if ad_cfg["contact"]["phone"]:
|
||||
self.web_input(By.ID, 'postad-phonenumber', ad_cfg["contact"]["phone"])
|
||||
|
||||
#############################
|
||||
# upload images
|
||||
#############################
|
||||
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
||||
image_upload = self.web_find(By.XPATH, "//input[@type='file']")
|
||||
|
||||
def count_uploaded_images():
|
||||
return len(self.webdriver.find_elements(By.CLASS_NAME, "imagebox-new-thumbnail"))
|
||||
|
||||
for image in ad_cfg["images"]:
|
||||
LOG.info(" -> uploading image [%s]", image)
|
||||
previous_uploaded_images_count = count_uploaded_images()
|
||||
image_upload.send_keys(image)
|
||||
start_at = time.time()
|
||||
while previous_uploaded_images_count == count_uploaded_images() and time.time() - start_at < 60:
|
||||
print(".", end = '', flush = True)
|
||||
time.sleep(1)
|
||||
print(flush = True)
|
||||
|
||||
ensure(previous_uploaded_images_count < count_uploaded_images(), f"Couldn't upload image [{image}] within 60 seconds")
|
||||
LOG.debug(" => uploaded image within %i seconds", time.time() - start_at)
|
||||
|
||||
#############################
|
||||
# submit
|
||||
#############################
|
||||
self.web_click(By.ID, 'pstad-submit')
|
||||
self.web_await(EC.url_contains("p-anzeige-aufgeben-bestaetigung.html?adId="), 20)
|
||||
|
||||
ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat()
|
||||
if not ad_cfg_orig["created_on"] and not ad_cfg_orig["id"]:
|
||||
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
||||
|
||||
# extract the ad id from the URL's query parameter
|
||||
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
|
||||
ad_id = int(current_url_query_params.get('adId', None)[0])
|
||||
ad_cfg_orig["id"] = ad_id
|
||||
|
||||
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
|
||||
|
||||
utils.save_dict(ad_file, ad_cfg_orig)
|
||||
|
||||
|
||||
#############################
|
||||
# main entry point
|
||||
#############################
|
||||
def main(args:Iterable[str]):
|
||||
if "version" not in args:
|
||||
print(textwrap.dedent(r"""
|
||||
_ _ _ _ _ _
|
||||
| | _| | ___(_)_ __ __ _ _ __ _______(_) __ _ ___ _ __ | |__ ___ | |_
|
||||
| |/ / |/ _ \ | '_ \ / _` | '_ \|_ / _ \ |/ _` |/ _ \ '_ \ ____| '_ \ / _ \| __|
|
||||
| <| | __/ | | | | (_| | | | |/ / __/ | (_| | __/ | | |____| |_) | (_) | |_
|
||||
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|
||||
|___/
|
||||
https://github.com/kleinanzeigen-bot
|
||||
"""), flush = True)
|
||||
|
||||
utils.configure_console_logging()
|
||||
|
||||
signal.signal(signal.SIGINT, utils.on_sigint) # capture CTRL+C
|
||||
sys.excepthook = utils.on_exception
|
||||
atexit.register(utils.on_exit)
|
||||
|
||||
KleinanzeigenBot().run(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
utils.configure_console_logging()
|
||||
LOG.error("Direct execution not supported. Use 'python -m kleinanzeigen_bot'")
|
||||
sys.exit(1)
|
||||
8
kleinanzeigen_bot/__main__.py
Normal file
8
kleinanzeigen_bot/__main__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Copyright (C) 2022 Sebastian Thomschke and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import sys
|
||||
import kleinanzeigen_bot
|
||||
|
||||
kleinanzeigen_bot.main(sys.argv)
|
||||
0
kleinanzeigen_bot/resources/__init__.py
Normal file
0
kleinanzeigen_bot/resources/__init__.py
Normal file
18
kleinanzeigen_bot/resources/ad_fields.yaml
Normal file
18
kleinanzeigen_bot/resources/ad_fields.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
active:
|
||||
type:
|
||||
title:
|
||||
description:
|
||||
category:
|
||||
price:
|
||||
price_type:
|
||||
shipping_type:
|
||||
images: []
|
||||
contact:
|
||||
name:
|
||||
street:
|
||||
zipcode:
|
||||
phone:
|
||||
republication_interval:
|
||||
id:
|
||||
created_on:
|
||||
updated_on:
|
||||
17
kleinanzeigen_bot/resources/categories.yaml
Normal file
17
kleinanzeigen_bot/resources/categories.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Elektronik
|
||||
Notebooks: 161/27
|
||||
PCs: 161/228
|
||||
PC-Zubehör: 161/225/sonstiges
|
||||
Software: 161/225/software
|
||||
Telefone: 161/173/telefone
|
||||
|
||||
# Freizeit
|
||||
Sammeln: 185/234/sonstige
|
||||
|
||||
# Mode & Beauty
|
||||
Gesundheit: 153/224/gesundheit
|
||||
|
||||
# Sonstiges
|
||||
Tauschen: 272/273
|
||||
Verleihen: 272/274
|
||||
Verschenken: 272/192
|
||||
35
kleinanzeigen_bot/resources/config_defaults.yaml
Normal file
35
kleinanzeigen_bot/resources/config_defaults.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
ad_files:
|
||||
- "**/ad_*.json"
|
||||
- "**/ad_*.yml"
|
||||
- "**/ad_*.yaml"
|
||||
|
||||
ad_defaults:
|
||||
active: true
|
||||
type: OFFER
|
||||
description:
|
||||
prefix:
|
||||
suffix:
|
||||
price_type: NEGOTIABLE
|
||||
shipping_type: SHIPPING
|
||||
contact:
|
||||
name:
|
||||
street:
|
||||
zipcode:
|
||||
phone:
|
||||
republication_interval: 7
|
||||
|
||||
categories: []
|
||||
|
||||
browser:
|
||||
# https://peter.sh/experiments/chromium-command-line-switches/
|
||||
arguments:
|
||||
# https://stackoverflow.com/a/50725918/5116073
|
||||
- --disable-dev-shm-usage
|
||||
- --no-sandbox
|
||||
# --headless
|
||||
# --start-maximized
|
||||
binary_location:
|
||||
|
||||
login:
|
||||
username:
|
||||
password:
|
||||
261
kleinanzeigen_bot/selenium_mixin.py
Normal file
261
kleinanzeigen_bot/selenium_mixin.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
Copyright (C) 2022 Sebastian Thomschke and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import logging, os, shutil, sys, tempfile
|
||||
from typing import Any, Callable, Dict, Final, Iterable, Tuple
|
||||
from importlib.resources import read_text as get_resource_as_string
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.service import Service as ChromeService, DEFAULT_EXECUTEABLE_PATH as DEFAULT_CHROMEDRIVER_PATH
|
||||
from selenium.webdriver.chromium.webdriver import ChromiumDriver
|
||||
from selenium.webdriver.edge.service import Service as EdgeService, DEFAULT_EXECUTEABLE_PATH as DEFAULT_EDGEDRIVER_PATH
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import Select, WebDriverWait
|
||||
import selenium_stealth
|
||||
import webdriver_manager.utils as ChromeDriverManagerUtils
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from webdriver_manager.microsoft import EdgeChromiumDriverManager
|
||||
from webdriver_manager.utils import ChromeType
|
||||
|
||||
from .utils import ensure, is_frozen, pause
|
||||
|
||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
||||
|
||||
|
||||
class SeleniumMixin:
|
||||
|
||||
def __init__(self):
|
||||
self.browser_arguments:Iterable[str] = []
|
||||
self.browser_binary_location:str = None
|
||||
self.webdriver:WebDriver = None
|
||||
|
||||
def __del__(self):
|
||||
if getattr(self, 'cacertfile', None):
|
||||
os.remove(self.cacertfile)
|
||||
|
||||
def create_webdriver_session(self) -> None:
|
||||
LOG.info("Creating WebDriver session...")
|
||||
|
||||
def init_browser_options(browser_options):
|
||||
browser_options.add_argument("--disable-crash-reporter")
|
||||
browser_options.add_argument("--no-first-run")
|
||||
browser_options.add_argument("--no-service-autorun")
|
||||
for chrome_option in self.browser_arguments:
|
||||
LOG.info(" -> Custom chrome argument: %s", chrome_option)
|
||||
browser_options.add_argument(chrome_option)
|
||||
|
||||
browser_options.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
browser_options.add_experimental_option('useAutomationExtension', False)
|
||||
browser_options.add_experimental_option("prefs", {
|
||||
"credentials_enable_service": False,
|
||||
"profile.password_manager_enabled": False,
|
||||
"devtools.preferences.currentDockState": "\"bottom\""
|
||||
})
|
||||
|
||||
if self.browser_binary_location:
|
||||
browser_options.binary_location = self.browser_binary_location
|
||||
LOG.info(" -> Chrome binary location: %s", self.browser_binary_location)
|
||||
|
||||
return browser_options
|
||||
|
||||
# if run via py2exe fix resource lookup
|
||||
if is_frozen():
|
||||
import pathlib # pylint: disable=import-outside-toplevel
|
||||
|
||||
if not os.getenv("REQUESTS_CA_BUNDLE", None) or not os.path.exists(os.getenv("REQUESTS_CA_BUNDLE", None)):
|
||||
with tempfile.NamedTemporaryFile(delete = False) as tmp:
|
||||
LOG.debug("Writing cacert file to [%s]...", tmp.name)
|
||||
tmp.write(get_resource_as_string("certifi", "cacert.pem").encode('utf-8'))
|
||||
self.cacertfile = tmp.name
|
||||
os.environ['REQUESTS_CA_BUNDLE'] = self.cacertfile
|
||||
|
||||
read_text_orig = pathlib.Path.read_text
|
||||
|
||||
def read_text_new(self, encoding = None, errors = None):
|
||||
path = str(self)
|
||||
if "selenium_stealth" in path:
|
||||
return get_resource_as_string("selenium_stealth", self.name)
|
||||
return read_text_orig(self, encoding, errors)
|
||||
|
||||
pathlib.Path.read_text = read_text_new
|
||||
|
||||
# check if a chrome driver is present already
|
||||
if shutil.which(DEFAULT_CHROMEDRIVER_PATH):
|
||||
self.webdriver = webdriver.Chrome(options = init_browser_options(webdriver.ChromeOptions()))
|
||||
elif shutil.which(DEFAULT_EDGEDRIVER_PATH):
|
||||
self.webdriver = webdriver.ChromiumEdge(options = init_browser_options(webdriver.EdgeOptions()))
|
||||
else:
|
||||
# determine browser major version
|
||||
if self.browser_binary_location:
|
||||
chrome_type, chrome_version = self.get_browser_version(self.browser_binary_location)
|
||||
else:
|
||||
chrome_type, chrome_version = self.get_browser_version_from_os()
|
||||
chrome_major_version = chrome_version.split(".", 1)[0]
|
||||
|
||||
# download and install matching chrome driver
|
||||
if chrome_type == ChromeType.MSEDGE:
|
||||
webdriver_mgr = EdgeChromiumDriverManager(cache_valid_range = 14)
|
||||
webdriver_mgr.driver.browser_version = chrome_major_version
|
||||
webdriver_path = webdriver_mgr.install()
|
||||
self.webdriver = webdriver.ChromiumEdge(service = EdgeService(webdriver_path), options = init_browser_options(webdriver.EdgeOptions()))
|
||||
else:
|
||||
webdriver_mgr = ChromeDriverManager(chrome_type = chrome_type, cache_valid_range = 14)
|
||||
webdriver_mgr.driver.browser_version = chrome_major_version
|
||||
webdriver_path = webdriver_mgr.install()
|
||||
self.webdriver = webdriver.Chrome(service = ChromeService(webdriver_path), options = init_browser_options(webdriver.ChromeOptions()))
|
||||
|
||||
# workaround to support Edge, see https://github.com/diprajpatra/selenium-stealth/pull/25
|
||||
selenium_stealth.Driver = ChromiumDriver
|
||||
|
||||
selenium_stealth.stealth(self.webdriver, # https://github.com/diprajpatra/selenium-stealth#args
|
||||
languages = ("de-DE", "de", "en-US", "en"),
|
||||
vendor = "Google Inc.",
|
||||
platform = "Win32",
|
||||
webgl_vendor = "Intel Inc.",
|
||||
renderer = "Intel Iris OpenGL Engine",
|
||||
fix_hairline = True,
|
||||
)
|
||||
|
||||
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
|
||||
|
||||
def get_browser_version(self, executable_path: str) -> Tuple[ChromeType, str]:
|
||||
if sys.platform == "win32":
|
||||
import win32api # pylint: disable=import-outside-toplevel,import-error
|
||||
# pylint: disable=no-member
|
||||
lang, codepage = win32api.GetFileVersionInfo(executable_path, "\\VarFileInfo\\Translation")[0]
|
||||
product_name = win32api.GetFileVersionInfo(executable_path, f"\\StringFileInfo\\{lang:04X}{codepage:04X}\\ProductName")
|
||||
product_version = win32api.GetFileVersionInfo(executable_path, f"\\StringFileInfo\\{lang:04X}{codepage:04X}\\ProductVersion")
|
||||
# pylint: enable=no-member
|
||||
match product_name:
|
||||
case "Chromium":
|
||||
return (ChromeType.CHROMIUM, product_version)
|
||||
case "Microsoft Edge":
|
||||
return (ChromeType.MSEDGE, product_version)
|
||||
case _: # "Google Chrome"
|
||||
return (ChromeType.GOOGLE, product_version)
|
||||
|
||||
if sys.platform.startswith("linux"):
|
||||
cmd = ChromeDriverManagerUtils.linux_browser_apps_to_cmd(executable_path)
|
||||
else:
|
||||
cmd = executable_path + " --version"
|
||||
|
||||
version = ChromeDriverManagerUtils.read_version_from_cmd(cmd, r'\d+\.\d+\.\d+')
|
||||
filename = os.path.basename(executable_path).lower()
|
||||
if "chromium" in filename:
|
||||
return (ChromeType.CHROMIUM, version)
|
||||
if "edge" in filename:
|
||||
return (ChromeType.MSEDGE, version)
|
||||
return (ChromeType.GOOGLE, version)
|
||||
|
||||
def get_browser_version_from_os(self) -> Tuple[ChromeType, str]:
|
||||
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.CHROMIUM)
|
||||
if version != "UNKNOWN":
|
||||
return (ChromeType.CHROMIUM, version)
|
||||
LOG.debug("Chromium not found")
|
||||
|
||||
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.GOOGLE)
|
||||
if version != "UNKNOWN":
|
||||
return (ChromeType.GOOGLE, version)
|
||||
LOG.debug("Google Chrome not found")
|
||||
|
||||
version = ChromeDriverManagerUtils.get_browser_version_from_os(ChromeType.MSEDGE)
|
||||
if version != "UNKNOWN":
|
||||
return (ChromeType.MSEDGE, version)
|
||||
LOG.debug("Microsoft Edge not found")
|
||||
|
||||
return (None, None)
|
||||
|
||||
def web_await(self, condition: Callable[[WebDriver], WebElement], timeout:int = 5) -> WebElement:
|
||||
"""
|
||||
:param timeout: timeout in seconds
|
||||
:raises NoSuchElementException: if element could not be found within time
|
||||
"""
|
||||
try:
|
||||
return WebDriverWait(self.webdriver, timeout).until(condition)
|
||||
except TimeoutException as ex:
|
||||
raise NoSuchElementException from ex
|
||||
|
||||
def web_click(self, selector_type:By, selector_value:str, timeout:int = 5) -> WebElement:
|
||||
"""
|
||||
:param timeout: timeout in seconds
|
||||
:raises NoSuchElementException: if element could not be found within time
|
||||
"""
|
||||
elem = self.web_await(EC.element_to_be_clickable((selector_type, selector_value)), timeout)
|
||||
elem.click()
|
||||
pause()
|
||||
return elem
|
||||
|
||||
def web_execute(self, javascript:str) -> Any:
|
||||
"""
|
||||
:return: The command's JSON response
|
||||
"""
|
||||
return self.webdriver.execute_script(javascript)
|
||||
|
||||
def web_find(self, selector_type:By, selector_value:str, timeout:int = 5) -> WebElement:
|
||||
"""
|
||||
:param timeout: timeout in seconds
|
||||
:raises NoSuchElementException: if element could not be found within time
|
||||
"""
|
||||
return self.web_await(EC.presence_of_element_located((selector_type, selector_value)), timeout)
|
||||
|
||||
def web_input(self, selector_type:By, selector_value:str, text:str, timeout:int = 5) -> WebElement:
|
||||
"""
|
||||
:param timeout: timeout in seconds
|
||||
:raises NoSuchElementException: if element could not be found within time
|
||||
"""
|
||||
input_field = self.web_find(selector_type, selector_value, timeout)
|
||||
input_field.clear()
|
||||
input_field.send_keys(text)
|
||||
pause()
|
||||
|
||||
def web_open(self, url, timeout = 10, reload_if_already_open = False) -> None:
|
||||
LOG.debug(" -> Opening [%s]...", url)
|
||||
if not reload_if_already_open and url == self.webdriver.current_url:
|
||||
LOG.debug(" => skipping, [%s] is already open", url)
|
||||
return
|
||||
self.webdriver.get(url)
|
||||
WebDriverWait(self.webdriver, timeout).until(lambda _: self.web_execute("return document.readyState") == "complete")
|
||||
|
||||
# pylint: disable=dangerous-default-value
|
||||
def web_request(self, url:str, method:str = "GET", valid_response_codes:Iterable[int] = [200], headers:Dict[str, str] = None) -> Dict[str, Any]:
|
||||
method = method.upper()
|
||||
LOG.debug(" -> HTTP %s [%s]...", method, url)
|
||||
response = self.webdriver.execute_async_script(f"""
|
||||
var callback = arguments[arguments.length - 1];
|
||||
fetch("{url}", {{
|
||||
method: "{method}",
|
||||
redirect: "follow",
|
||||
headers: {headers or {}}
|
||||
}})
|
||||
.then(response => response.text().then(responseText => {{
|
||||
headers = {{}};
|
||||
response.headers.forEach((v, k) => headers[k] = v);
|
||||
callback({{
|
||||
"statusCode": response.status,
|
||||
"statusMessage": response.statusText,
|
||||
"headers": headers,
|
||||
"content": responseText
|
||||
}})
|
||||
}}))
|
||||
""")
|
||||
ensure(
|
||||
response["statusCode"] in valid_response_codes,
|
||||
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
|
||||
)
|
||||
return response
|
||||
# pylint: enable=dangerous-default-value
|
||||
|
||||
def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int = 5) -> WebElement:
|
||||
"""
|
||||
:param timeout: timeout in seconds
|
||||
:raises NoSuchElementException: if element could not be found within time
|
||||
"""
|
||||
elem = self.web_await(EC.element_to_be_clickable((selector_type, selector_value)), timeout)
|
||||
Select(elem).select_by_value(selected_value)
|
||||
pause()
|
||||
return elem
|
||||
183
kleinanzeigen_bot/utils.py
Normal file
183
kleinanzeigen_bot/utils.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
Copyright (C) 2022 Sebastian Thomschke and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import copy, json, logging, os, secrets, sys, traceback, time
|
||||
from importlib.resources import read_text as get_resource_as_string
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, Final, Iterable, Optional, Union
|
||||
|
||||
import coloredlogs, inflect
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.utils")
|
||||
|
||||
|
||||
def ensure(condition:bool, error_message:str) -> None:
|
||||
"""
|
||||
:raises AssertionError: if condition is False
|
||||
"""
|
||||
if not condition:
|
||||
raise AssertionError(error_message)
|
||||
|
||||
|
||||
def is_frozen() -> bool:
|
||||
"""
|
||||
>>> is_frozen()
|
||||
False
|
||||
"""
|
||||
return getattr(sys, 'frozen', False)
|
||||
|
||||
|
||||
def apply_defaults(target:Dict[Any, Any], defaults:Dict[Any, Any], ignore = lambda _k, _v: False, override = lambda _k, _v: False) -> Dict[Any, Any]:
|
||||
"""
|
||||
>>> apply_defaults({}, {"foo": "bar"})
|
||||
{'foo': 'bar'}
|
||||
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
|
||||
{'foo': 'foo'}
|
||||
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
|
||||
{'foo': ''}
|
||||
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
|
||||
{}
|
||||
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
|
||||
{'foo': 'bar'}
|
||||
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
|
||||
{'foo': None}
|
||||
"""
|
||||
for key, default_value in defaults.items():
|
||||
if key in target:
|
||||
if isinstance(target[key], Dict) and isinstance(default_value, Dict):
|
||||
apply_defaults(target[key], default_value, ignore = ignore)
|
||||
elif override(key, target[key]):
|
||||
target[key] = copy.deepcopy(default_value)
|
||||
else:
|
||||
if not ignore(key, default_value):
|
||||
target[key] = copy.deepcopy(default_value)
|
||||
return target
|
||||
|
||||
|
||||
def safe_get(a_map:Dict[Any, Any], *keys:str) -> Any:
|
||||
"""
|
||||
>>> safe_get({"foo": {}}, "foo", "bar") is None
|
||||
True
|
||||
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
|
||||
'some_value'
|
||||
"""
|
||||
if a_map:
|
||||
for key in keys:
|
||||
try:
|
||||
a_map = a_map[key]
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
return a_map
|
||||
|
||||
|
||||
def configure_console_logging() -> None:
|
||||
stdout_log = logging.StreamHandler(sys.stderr)
|
||||
stdout_log.setLevel(logging.DEBUG)
|
||||
stdout_log.setFormatter(coloredlogs.ColoredFormatter('[%(levelname)s] %(message)s'))
|
||||
stdout_log.addFilter(type("", (logging.Filter,), {
|
||||
"filter": lambda rec: rec.levelno <= logging.INFO
|
||||
}))
|
||||
LOG_ROOT.addHandler(stdout_log)
|
||||
|
||||
stderr_log = logging.StreamHandler(sys.stderr)
|
||||
stderr_log.setLevel(logging.WARNING)
|
||||
stderr_log.setFormatter(coloredlogs.ColoredFormatter('[%(levelname)s] %(message)s'))
|
||||
LOG_ROOT.addHandler(stderr_log)
|
||||
|
||||
|
||||
def on_exception(ex_type, ex_value, ex_traceback) -> None:
|
||||
if issubclass(ex_type, KeyboardInterrupt):
|
||||
sys.__excepthook__(ex_type, ex_value, ex_traceback)
|
||||
return
|
||||
if LOG.isEnabledFor(logging.DEBUG) or isinstance(ex_value, (AttributeError, ImportError, NameError)):
|
||||
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
|
||||
elif isinstance(ex_value, AssertionError):
|
||||
LOG.error(ex_value)
|
||||
else:
|
||||
LOG.error("%s: %s", ex_type.__name__, ex_value)
|
||||
|
||||
|
||||
def on_exit() -> None:
|
||||
for handler in LOG_ROOT.handlers:
|
||||
handler.flush()
|
||||
|
||||
|
||||
def on_sigint(_sig:int, _frame) -> None:
|
||||
LOG.warning('Aborted on user request.')
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def pause(min_ms:int = 200, max_ms:int = None) -> None:
|
||||
duration = secrets.randbelow((max_ms is None and 2000 or max_ms) - min_ms) + min_ms
|
||||
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
|
||||
time.sleep(duration / 1000)
|
||||
|
||||
|
||||
def pluralize(word:str, count:Union[int, Iterable], prefix = True):
|
||||
"""
|
||||
>>> pluralize("field", 1)
|
||||
'1 field'
|
||||
>>> pluralize("field", 2)
|
||||
'2 fields'
|
||||
>>> pluralize("field", 2, prefix = False)
|
||||
'fields'
|
||||
"""
|
||||
if not hasattr(pluralize, "inflect"):
|
||||
pluralize.inflect = inflect.engine()
|
||||
if isinstance(count, Iterable):
|
||||
count = len(count)
|
||||
plural = pluralize.inflect.plural_noun(word, count)
|
||||
if prefix:
|
||||
return f'{count} {plural}'
|
||||
return plural
|
||||
|
||||
|
||||
def load_dict(filepath:str, content_label:str = "", must_exist = True) -> Optional[Dict[str, Any]]:
|
||||
filepath = os.path.abspath(filepath)
|
||||
LOG.info("Loading %s[%s]...", content_label and content_label + " from " or "", filepath)
|
||||
|
||||
_, file_ext = os.path.splitext(filepath)
|
||||
if not file_ext in [ ".json", ".yaml" , ".yml" ]:
|
||||
raise ValueError(f'Unsupported file type. The file name "{filepath}" must end with *.json, *.yaml, or *.yml')
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
if must_exist:
|
||||
raise FileNotFoundError(filepath)
|
||||
return None
|
||||
|
||||
with open(filepath, encoding = "utf-8") as file:
|
||||
return json.load(file) if filepath.endswith(".json") else YAML().load(file)
|
||||
|
||||
|
||||
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "", must_exist = True) -> Optional[Dict[str, Any]]:
|
||||
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
|
||||
|
||||
_, file_ext = os.path.splitext(filename)
|
||||
if not file_ext in [ ".json", ".yaml" , ".yml" ]:
|
||||
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
|
||||
|
||||
try:
|
||||
content = get_resource_as_string(module, filename)
|
||||
except FileNotFoundError as ex:
|
||||
if must_exist:
|
||||
raise ex
|
||||
return None
|
||||
|
||||
return json.loads(content) if filename.endswith(".json") else YAML().load(content)
|
||||
|
||||
|
||||
def save_dict(filepath:str, content:Dict[str, Any]) -> None:
|
||||
filepath = os.path.abspath(filepath)
|
||||
LOG.info("Saving [%s]...", filepath)
|
||||
with open(filepath, "w", encoding = "utf-8") as file:
|
||||
if filepath.endswith(".json"):
|
||||
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
|
||||
else:
|
||||
yaml = YAML()
|
||||
yaml.indent(mapping = 2, sequence = 4, offset = 2)
|
||||
yaml.allow_duplicate_keys = False
|
||||
yaml.explicit_start = False
|
||||
yaml.dump(content, file)
|
||||
Reference in New Issue
Block a user