mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
fix: serialize downloaded ad timestamps as schema-compliant strings (#863)
## ℹ️ Description - Link to the related issue(s): Issue # - Fixes drift where `pdm run app download` wrote timestamp values in YAML-native datetime form that could violate `schemas/ad.schema.json` string expectations. - Ensures downloaded ads persist `created_on`/`updated_on` as JSON-serialized ISO-8601 strings and adds a regression test validating written YAML against the schema. ## 📋 Changes Summary - Updated downloader save path to use `ad_cfg.model_dump(mode = \"json\")` before writing YAML in `src/kleinanzeigen_bot/extract.py`. - Updated existing `download_ad` unit assertion to match JSON-mode serialization. - Added `test_download_ad_writes_schema_compliant_yaml` in `tests/unit/test_extract.py` that writes a real tmp YAML file and validates it against `schemas/ad.schema.json` with `jsonschema`. - Added dev dependency `jsonschema>=4.26.0` (and lockfile updates). - Dependencies/config updates introduced: new dev dependency (`jsonschema`) for full schema validation in tests. ### ⚙️ Type of Change - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit # Release Notes * **Bug Fixes** * Improved ad data serialization to ensure consistent JSON format when saving ad configurations. * **Tests** * Added schema validation tests to verify ad YAML output compliance. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -2,19 +2,28 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import json # isort: skip
|
||||
import asyncio
|
||||
from gettext import gettext as _
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
from typing import Any, Final, TypedDict
|
||||
from unittest.mock import AsyncMock, MagicMock, call, patch
|
||||
from urllib.error import URLError
|
||||
|
||||
import pytest
|
||||
from jsonschema import Draft202012Validator
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
import kleinanzeigen_bot.extract as extract_module
|
||||
from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial
|
||||
from kleinanzeigen_bot.model.config_model import Config, DownloadConfig
|
||||
from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
|
||||
|
||||
SCHEMA_PATH:Final[Path] = Path(__file__).resolve().parents[2] / "schemas" / "ad.schema.json"
|
||||
|
||||
|
||||
def _read_text_file(path:Path) -> str:
|
||||
return path.read_text(encoding = "utf-8")
|
||||
|
||||
|
||||
class _DimensionsDict(TypedDict):
|
||||
ad_attributes:str
|
||||
@@ -1255,7 +1264,38 @@ class TestAdExtractorDownload:
|
||||
actual_call = mock_save_dict.call_args
|
||||
actual_path = Path(actual_call[0][0])
|
||||
assert actual_path == yaml_path
|
||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump(mode = "json")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_ad_writes_schema_compliant_yaml(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
||||
"""Test that downloaded ad YAML validates against ad.schema.json."""
|
||||
download_base = tmp_path / "downloaded-ads"
|
||||
final_dir = download_base / "ad_12345_Test Advertisement Title"
|
||||
yaml_path = final_dir / "ad_12345.yaml"
|
||||
extractor.download_dir = download_base
|
||||
|
||||
with patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||
mock_extract_with_dir.return_value = (
|
||||
AdPartial.model_validate(
|
||||
{
|
||||
"title": "Test Advertisement Title",
|
||||
"description": "Test Description",
|
||||
"category": "Dienstleistungen",
|
||||
"created_on": "2026-03-08T00:00:00+01:00",
|
||||
"updated_on": "2026-03-09T01:02:03+01:00",
|
||||
}
|
||||
),
|
||||
final_dir,
|
||||
)
|
||||
|
||||
await extractor.download_ad(12345)
|
||||
|
||||
loaded_ad = YAML(typ = "safe").load(await asyncio.to_thread(_read_text_file, yaml_path))
|
||||
schema = json.loads(await asyncio.to_thread(_read_text_file, SCHEMA_PATH))
|
||||
|
||||
Draft202012Validator(schema).validate(loaded_ad)
|
||||
assert isinstance(loaded_ad["created_on"], str)
|
||||
assert isinstance(loaded_ad["updated_on"], str)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
|
||||
Reference in New Issue
Block a user