feat: allow auto-restart on captcha (#481)

Co-authored-by: sebthom <sebthom@users.noreply.github.com>
This commit is contained in:
Airwave1981
2025-04-26 14:40:47 +02:00
committed by GitHub
parent 4891c142a9
commit d87ae6e740
6 changed files with 67 additions and 6 deletions

View File

@@ -265,6 +265,14 @@ publishing:
delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER
delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH
# captcha-Handling (optional)
# To ensure that the bot does not require manual confirmation after a captcha, but instead automatically pauses for a defined period and then restarts, you can enable the captcha section:
captcha:
auto_restart: true # If true, the bot aborts when a Captcha appears and retries publishing later
# If false (default), the Captcha must be solved manually to continue
restart_delay: 1h 30m # Time to wait before retrying after a Captcha was encountered (default: 6h)
# browser configuration
browser:
# https://peter.sh/experiments/chromium-command-line-switches/

View File

@@ -20,6 +20,7 @@ from . import extract, resources
from ._version import __version__
from .ads import calculate_content_hash, get_description_affixes
from .utils import dicts, error_handlers, loggers, misc
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
from .utils.misc import ainput, ensure, is_frozen, parse_datetime, parse_decimal
@@ -773,7 +774,16 @@ class KleinanzeigenBot(WebScrapingMixin):
# wait for captcha
#############################
try:
await self.web_find(By.CSS_SELECTOR, "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']", timeout = 2)
await self.web_find(
By.CSS_SELECTOR,
"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']",
timeout = 2)
if self.config.get("captcha", {}).get("auto_restart", False):
LOG.warning("Captcha recognized - auto-restart enabled, abort run...")
raise CaptchaEncountered(misc.parse_duration(self.config.get("captcha", {}).get("restart_delay", "6h")))
# Fallback: manuell
LOG.warning("############################################")
LOG.warning("# Captcha present! Please solve the captcha.")
LOG.warning("############################################")
@@ -1128,7 +1138,7 @@ class KleinanzeigenBot(WebScrapingMixin):
else dicts.safe_get(ad_cfg, "description", "prefix")
if dicts.safe_get(ad_cfg, "description", "prefix") is not None
# 3. Global prefix from config
else get_description_affixes(self.config, prefix=True)
else get_description_affixes(self.config, prefix = True)
or "" # Default to empty string if all sources are None
)
@@ -1140,7 +1150,7 @@ class KleinanzeigenBot(WebScrapingMixin):
else dicts.safe_get(ad_cfg, "description", "suffix")
if dicts.safe_get(ad_cfg, "description", "suffix") is not None
# 3. Global suffix from config
else get_description_affixes(self.config, prefix=False)
else get_description_affixes(self.config, prefix = False)
or "" # Default to empty string if all sources are None
)

View File

@@ -3,7 +3,23 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import sys
import kleinanzeigen_bot
import sys, time
from gettext import gettext as _
kleinanzeigen_bot.main(sys.argv)
import kleinanzeigen_bot
from kleinanzeigen_bot.utils.exceptions import CaptchaEncountered
from kleinanzeigen_bot.utils.misc import format_timedelta
# --------------------------------------------------------------------------- #
# Main loop: run bot → if captcha → sleep → restart
# --------------------------------------------------------------------------- #
while True:
try:
kleinanzeigen_bot.main(sys.argv) # runs & returns when finished
sys.exit(0) # not using `break` to prevent process closing issues
except CaptchaEncountered as ex:
delay = ex.restart_delay
print(_("[INFO] Captcha detected. Sleeping %s before restart...") % format_timedelta(delay))
time.sleep(delay.total_seconds())
# loop continues and starts a fresh run

View File

@@ -12,6 +12,11 @@ getopt.py:
short_has_arg:
"option -%s not recognized": "Option -%s unbekannt"
#################################################
kleinanzeigen_bot/__main__.py:
#################################################
module:
"[INFO] Captcha detected. Sleeping %s before restart...": "[INFO] Captcha erkannt. Warte %s h bis zum Neustart..."
#################################################
kleinanzeigen_bot/__init__.py:
@@ -78,6 +83,7 @@ kleinanzeigen_bot/__init__.py:
" -> SUCCESS: ad published with ID %s": " -> ERFOLG: Anzeige mit ID %s veröffentlicht"
" -> effective ad meta:": " -> effektive Anzeigen-Metadaten:"
"Could not set city from location": "Stadt konnte nicht aus dem Standort gesetzt werden"
"Captcha recognized - auto-restart enabled, abort run...": "Captcha erkannt - Auto-Neustart aktiviert, Durchlauf wird beendet..."
__set_condition:
"Unable to close condition dialog!": "Kann den Dialog für Artikelzustand nicht schließen!"

View File

@@ -0,0 +1,18 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
from datetime import timedelta
class KleinanzeigenBotError(RuntimeError):
"""Base class for all custom bot-related exceptions."""
class CaptchaEncountered(KleinanzeigenBotError):
"""Raised when a Captcha was detected and auto-restart is enabled."""
def __init__(self, restart_delay: timedelta):
super().__init__()
self.restart_delay = restart_delay

View File

@@ -28,6 +28,9 @@ LOG_ROOT:Final[logging.Logger] = logging.getLogger()
def configure_console_logging() -> None:
# if a StreamHandler already exists, do not append it again
if any(isinstance(h, logging.StreamHandler) for h in LOG_ROOT.handlers):
return
class CustomFormatter(logging.Formatter):
LEVEL_COLORS = {