feat: cache published ads data to avoid repetitive API calls during ad download (#809)

This commit is contained in:
Jens
2026-02-03 14:51:59 +01:00
committed by GitHub
parent e994ce1b1f
commit a8051c3814
5 changed files with 136 additions and 326 deletions

View File

@@ -581,11 +581,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
dicts.save_commented_model(
self.config_file_path,
default_config,
header=(
"# yaml-language-server: $schema="
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot"
"/main/schemas/config.schema.json"
),
header = ("# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json"),
exclude = {"ad_defaults": {"description"}},
)
@@ -2020,8 +2016,21 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
Determines which download mode was chosen with the arguments, and calls the specified download routine.
This downloads either all, only unsaved (new), or specific ads given by ID.
"""
# Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction
# Build lookup dict inline and pass directly to extractor (no cache abstraction needed)
LOG.info("Fetching published ads...")
published_ads = await self._fetch_published_ads()
published_ads_by_id:dict[int, dict[str, Any]] = {}
for published_ad in published_ads:
try:
ad_id = published_ad.get("id")
if ad_id is not None:
published_ads_by_id[int(ad_id)] = published_ad
except (ValueError, TypeError):
LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id"))
LOG.info("Loaded %s published ads.", len(published_ads_by_id))
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable)
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable, published_ads_by_id = published_ads_by_id)
# use relevant download routine
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes