mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
feat: remove default prefix/suffix text from downloaded ads
This commit is contained in:
@@ -828,7 +828,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
This downloads either all, only unsaved (new), or specific ads given by ID.
|
||||
"""
|
||||
|
||||
ad_extractor = extract.AdExtractor(self.browser)
|
||||
ad_extractor = extract.AdExtractor(self.browser, self.config)
|
||||
|
||||
# use relevant download routine
|
||||
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
||||
|
||||
@@ -20,9 +20,10 @@ class AdExtractor(WebScrapingMixin):
|
||||
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
|
||||
"""
|
||||
|
||||
def __init__(self, browser:Browser):
|
||||
def __init__(self, browser:Browser, config:dict[str, Any]):
|
||||
super().__init__()
|
||||
self.browser = browser
|
||||
self.config = config
|
||||
|
||||
async def download_ad(self, ad_id:int) -> None:
|
||||
"""
|
||||
@@ -230,8 +231,9 @@ class AdExtractor(WebScrapingMixin):
|
||||
LOG.info('Extracting information from ad with title \"%s\"', title)
|
||||
info['title'] = title
|
||||
|
||||
descr:str = await self.web_text(By.ID, 'viewad-description-text')
|
||||
info['description'] = descr
|
||||
info['description'] = (await self.web_text(By.ID, 'viewad-description-text')).strip() \
|
||||
.removeprefix((self.config["ad_defaults"]["description"]["prefix"] or "").strip()) \
|
||||
.removesuffix((self.config["ad_defaults"]["description"]["suffix"] or "").strip())
|
||||
|
||||
# extract category
|
||||
info['category'] = await self._extract_category_from_ad_page()
|
||||
|
||||
Reference in New Issue
Block a user