mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
This commit is contained in:
40
tests/unit/test_dicts.py
Normal file
40
tests/unit/test_dicts.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
"""Tests for the dicts utility module."""
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_save_dict_normalizes_unicode_paths(tmp_path:Path) -> None:
|
||||
"""Test that save_dict normalizes paths to NFC for cross-platform consistency (issue #728).
|
||||
|
||||
Directories are created with NFC normalization (via sanitize_folder_name).
|
||||
This test verifies save_dict's defensive normalization handles edge cases where
|
||||
an NFD path is passed (e.g., "ä" as "a" + combining diacritic vs single character).
|
||||
It should normalize to NFC and use the existing NFC directory.
|
||||
"""
|
||||
from kleinanzeigen_bot.utils import dicts # noqa: PLC0415
|
||||
|
||||
# Create directory with NFC normalization (as sanitize_folder_name does)
|
||||
title_nfc = unicodedata.normalize("NFC", "KitchenAid Zuhälter - nie benutzt")
|
||||
nfc_dir = tmp_path / f"ad_12345_{title_nfc}"
|
||||
nfc_dir.mkdir(parents = True)
|
||||
|
||||
# Call save_dict with NFD path (different normalization)
|
||||
title_nfd = unicodedata.normalize("NFD", title_nfc)
|
||||
assert title_nfc != title_nfd, "NFC and NFD should be different strings"
|
||||
|
||||
nfd_path = tmp_path / f"ad_12345_{title_nfd}" / "ad_12345.yaml"
|
||||
dicts.save_dict(str(nfd_path), {"test": "data", "title": title_nfc})
|
||||
|
||||
# Verify file was saved successfully
|
||||
nfc_files = list(nfc_dir.glob("*.yaml"))
|
||||
assert len(nfc_files) == 1, "Should have exactly one file in NFC directory"
|
||||
assert nfc_files[0].name == "ad_12345.yaml"
|
||||
|
||||
# On macOS/APFS, the filesystem normalizes both NFC and NFD to the same directory
|
||||
# On Linux ext4, NFC normalization in save_dict ensures it uses the existing directory
|
||||
# Either way, we should have exactly one YAML file total (no duplicates)
|
||||
all_yaml_files = list(tmp_path.rglob("*.yaml"))
|
||||
assert len(all_yaml_files) == 1, f"Expected exactly 1 YAML file total, found {len(all_yaml_files)}: {all_yaml_files}"
|
||||
@@ -1225,3 +1225,71 @@ class TestAdExtractorDownload:
|
||||
assert result_dir.exists()
|
||||
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
|
||||
assert ad_cfg.title == "Test Title"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_ad_with_umlauts_in_title(self, extractor:AdExtractor, tmp_path:Path) -> None:
|
||||
"""Test cross-platform Unicode handling for ad titles with umlauts (issue #728).
|
||||
|
||||
Verifies that:
|
||||
1. Directories are created with NFC-normalized names (via sanitize_folder_name)
|
||||
2. Files can be saved to those directories (via save_dict's NFC normalization)
|
||||
3. No FileNotFoundError occurs due to NFC/NFD mismatch on Linux/Windows
|
||||
"""
|
||||
# Title with German umlauts (ä) - common in real ads
|
||||
title_with_umlauts = "KitchenAid Zuhälter - nie benutzt"
|
||||
|
||||
# Mock the page
|
||||
page_mock = MagicMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
extractor.page = page_mock
|
||||
|
||||
base_dir = tmp_path / "downloaded-ads"
|
||||
base_dir.mkdir()
|
||||
|
||||
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
|
||||
title_with_umlauts, # Title extraction
|
||||
title_with_umlauts, # Second title call for full extraction
|
||||
"Description text", # Description
|
||||
"03.02.2025" # Creation date
|
||||
]), \
|
||||
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
|
||||
"universalAnalyticsOpts": {
|
||||
"dimensions": {
|
||||
"dimension92": "",
|
||||
"dimension108": ""
|
||||
}
|
||||
}
|
||||
}), \
|
||||
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
|
||||
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
|
||||
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
||||
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
||||
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
|
||||
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
|
||||
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
|
||||
name = "Test", zipcode = "12345", location = "Berlin"
|
||||
)):
|
||||
|
||||
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
|
||||
base_dir, 12345
|
||||
)
|
||||
|
||||
# Verify directory was created with NFC-normalized name
|
||||
assert result_dir.exists()
|
||||
assert ad_cfg.title == title_with_umlauts
|
||||
|
||||
# Test saving YAML file to the Unicode directory path
|
||||
# Before fix: Failed on Linux/Windows due to NFC/NFD mismatch
|
||||
# After fix: Both directory and file use NFC normalization
|
||||
ad_file_path = Path(result_dir) / "ad_12345.yaml"
|
||||
|
||||
from kleinanzeigen_bot.utils import dicts # noqa: PLC0415
|
||||
|
||||
header_string = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
|
||||
|
||||
# save_dict normalizes path to NFC, matching the NFC directory name
|
||||
dicts.save_dict(str(ad_file_path), ad_cfg.model_dump(), header = header_string)
|
||||
|
||||
# Verify file was created successfully (no FileNotFoundError)
|
||||
assert ad_file_path.exists()
|
||||
assert ad_file_path.is_file()
|
||||
|
||||
@@ -144,9 +144,9 @@ def test_ensure_non_callable_truthy_and_falsy() -> None:
|
||||
# Basic sanitization
|
||||
("My Ad Title!", "My Ad Title!", "Basic sanitization"),
|
||||
|
||||
# Unicode normalization (sanitize-filename changes normalization)
|
||||
("café", "cafe\u0301", "Unicode normalization"),
|
||||
("caf\u00e9", "cafe\u0301", "Unicode normalization from escaped"),
|
||||
# Unicode normalization - sanitize-filename converts to NFD, then we normalize to NFC (issue #728)
|
||||
("café", "café", "Unicode NFC → NFD (by sanitize) → NFC (by normalize)"),
|
||||
("caf\u00e9", "café", "Unicode NFC (escaped) → NFD → NFC"),
|
||||
|
||||
# Edge cases
|
||||
("", "untitled", "Empty string"),
|
||||
|
||||
Reference in New Issue
Block a user