feat: add hash-based ad change detection (#343) (#388)

Co-authored-by: sebthom <sebthom@users.noreply.github.com>
This commit is contained in:
1cu
2025-01-26 23:37:33 +01:00
committed by GitHub
parent 3d27755207
commit f01109c956
5 changed files with 104 additions and 30 deletions

View File

@@ -11,7 +11,7 @@ from typing import Any, Final
import json
from .i18n import get_translating_logger, pluralize
from .utils import is_integer, parse_decimal, save_dict
from .utils import is_integer, parse_decimal, save_dict, calculate_content_hash
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
__all__ = [
@@ -269,6 +269,9 @@ class AdExtractor(WebScrapingMixin):
info['created_on'] = creation_date
info['updated_on'] = None # will be set later on
# Calculate the initial hash for the downloaded ad
info['content_hash'] = calculate_content_hash(info)
return info
async def _extract_category_from_ad_page(self) -> str: