mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
fix: harden extract_ad_id_from_ad_url
This commit is contained in:
@@ -122,9 +122,12 @@ class AdExtractor(WebScrapingMixin):
|
||||
id_part = num_part.split('-')[0]
|
||||
|
||||
try:
|
||||
path = url.split('?', 1)[0] # Remove query string if present
|
||||
last_segment = path.rstrip('/').split('/')[-1] # Get last path component
|
||||
id_part = last_segment.split('-')[0] # Extract part before first hyphen
|
||||
return int(id_part)
|
||||
except ValueError:
|
||||
LOG.warning('The ad ID could not be extracted from the given URL %s', url)
|
||||
except (IndexError, ValueError) as ex:
|
||||
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
||||
return -1
|
||||
|
||||
async def extract_own_ads_urls(self) -> list[str]:
|
||||
|
||||
@@ -155,7 +155,7 @@ kleinanzeigen_bot/extract.py:
|
||||
"No image area found. Continuing without downloading images.": "Keine Bildbereiche gefunden. Fahre ohne Bilder-Download fort."
|
||||
|
||||
extract_ad_id_from_ad_url:
|
||||
"The ad ID could not be extracted from the given URL %s": "Die Anzeigen-ID konnte nicht aus der angegebenen URL %s extrahiert werden"
|
||||
"Failed to extract ad ID from URL '%s': %s": "Fehler beim Extrahieren der Anzeigen-ID aus der URL '%s': %s"
|
||||
|
||||
extract_own_ads_urls:
|
||||
"Ad list container #my-manageitems-adlist not found. Maybe no ads present?": "Anzeigenlistencontainer #my-manageitems-adlist nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?"
|
||||
|
||||
Reference in New Issue
Block a user