feat: remove default prefix/suffix text from downloaded ads

This commit is contained in:
sebthom
2024-11-21 23:28:13 +01:00
parent 5086721082
commit 6a315c97ce
2 changed files with 6 additions and 4 deletions

View File

@@ -828,7 +828,7 @@ class KleinanzeigenBot(WebScrapingMixin):
This downloads either all, only unsaved (new), or specific ads given by ID.
"""
ad_extractor = extract.AdExtractor(self.browser)
ad_extractor = extract.AdExtractor(self.browser, self.config)
# use relevant download routine
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes

View File

@@ -20,9 +20,10 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
"""
def __init__(self, browser:Browser):
def __init__(self, browser:Browser, config:dict[str, Any]):
super().__init__()
self.browser = browser
self.config = config
async def download_ad(self, ad_id:int) -> None:
"""
@@ -230,8 +231,9 @@ class AdExtractor(WebScrapingMixin):
LOG.info('Extracting information from ad with title \"%s\"', title)
info['title'] = title
descr:str = await self.web_text(By.ID, 'viewad-description-text')
info['description'] = descr
info['description'] = (await self.web_text(By.ID, 'viewad-description-text')).strip() \
.removeprefix((self.config["ad_defaults"]["description"]["prefix"] or "").strip()) \
.removesuffix((self.config["ad_defaults"]["description"]["suffix"] or "").strip())
# extract category
info['category'] = await self._extract_category_from_ad_page()