From 7b0774874e169b1854ca7f1eb5fe220b22d7cfc6 Mon Sep 17 00:00:00 2001 From: sebthom Date: Sat, 26 Apr 2025 21:07:34 +0200 Subject: [PATCH] fix: harden extract_ad_id_from_ad_url --- src/kleinanzeigen_bot/extract.py | 7 +++++-- src/kleinanzeigen_bot/resources/translations.de.yaml | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 2b586bf..c1908c7 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -122,9 +122,12 @@ class AdExtractor(WebScrapingMixin): id_part = num_part.split('-')[0] try: + path = url.split('?', 1)[0] # Remove query string if present + last_segment = path.rstrip('/').split('/')[-1] # Get last path component + id_part = last_segment.split('-')[0] # Extract part before first hyphen return int(id_part) - except ValueError: - LOG.warning('The ad ID could not be extracted from the given URL %s', url) + except (IndexError, ValueError) as ex: + LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex) return -1 async def extract_own_ads_urls(self) -> list[str]: diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 6e06a2c..469bd5a 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -155,7 +155,7 @@ kleinanzeigen_bot/extract.py: "No image area found. Continuing without downloading images.": "Keine Bildbereiche gefunden. Fahre ohne Bilder-Download fort." extract_ad_id_from_ad_url: - "The ad ID could not be extracted from the given URL %s": "Die Anzeigen-ID konnte nicht aus der angegebenen URL %s extrahiert werden" + "Failed to extract ad ID from URL '%s': %s": "Fehler beim Extrahieren der Anzeigen-ID aus der URL '%s': %s" extract_own_ads_urls: "Ad list container #my-manageitems-adlist not found. Maybe no ads present?": "Anzeigenlistencontainer #my-manageitems-adlist nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?"