feat: remove default prefix/suffix text from downloaded ads

2026-03-12 10:31:50 +01:00 · 2024-11-21 23:28:13 +01:00
parent 5086721082
commit 6a315c97ce
2 changed files with 6 additions and 4 deletions
--- a/src/kleinanzeigen_bot/init.py
+++ b/src/kleinanzeigen_bot/init.py
@@ -828,7 +828,7 @@ class KleinanzeigenBot(WebScrapingMixin):
        This downloads either all, only unsaved (new), or specific ads given by ID.
        """

-        ad_extractor = extract.AdExtractor(self.browser)
+        ad_extractor = extract.AdExtractor(self.browser, self.config)

        # use relevant download routine
        if self.ads_selector in {'all', 'new'}:  # explore ads overview for these two modes
--- a/src/kleinanzeigen_bot/extract.py
+++ b/src/kleinanzeigen_bot/extract.py
@@ -20,9 +20,10 @@ class AdExtractor(WebScrapingMixin):
    Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
    """

-    def __init__(self, browser:Browser):
+    def __init__(self, browser:Browser, config:dict[str, Any]):
        super().__init__()
        self.browser = browser
+        self.config = config

    async def download_ad(self, ad_id:int) -> None:
        """
@@ -230,8 +231,9 @@ class AdExtractor(WebScrapingMixin):
        LOG.info('Extracting information from ad with title \"%s\"', title)
        info['title'] = title

-        descr:str = await self.web_text(By.ID, 'viewad-description-text')
-        info['description'] = descr
+        info['description'] = (await self.web_text(By.ID, 'viewad-description-text')).strip() \
+            .removeprefix((self.config["ad_defaults"]["description"]["prefix"] or "").strip()) \
+            .removesuffix((self.config["ad_defaults"]["description"]["suffix"] or "").strip())

        # extract category
        info['category'] = await self._extract_category_from_ad_page()