# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ import json, os # isort: skip from typing import Any, TypedDict from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from kleinanzeigen_bot.extract import AdExtractor from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial from kleinanzeigen_bot.model.config_model import Config, DownloadConfig from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element class _DimensionsDict(TypedDict): dimension108:str class _UniversalAnalyticsOptsDict(TypedDict): dimensions:_DimensionsDict class _BelenConfDict(TypedDict): universalAnalyticsOpts:_UniversalAnalyticsOptsDict class _SpecialAttributesDict(TypedDict, total = False): art_s:str condition_s:str class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used belen_conf:_BelenConfDict expected:_SpecialAttributesDict class TestAdExtractorBasics: """Basic synchronous tests for AdExtractor.""" def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None: """Test the constructor of AdExtractor""" extractor = AdExtractor(browser_mock, test_bot_config) assert extractor.browser == browser_mock assert extractor.config == test_bot_config @pytest.mark.parametrize(("url", "expected_id"), [ ("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678), ("https://www.kleinanzeigen.de/s-anzeige/another-test/98765432", 98765432), ("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1), ("https://www.kleinanzeigen.de/invalid-url", -1), ]) def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None: """Test extraction of ad ID from different URL formats.""" assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id class TestAdExtractorPricing: """Tests for pricing related functionality.""" @pytest.mark.parametrize(("price_text", "expected_price", "expected_type"), [ ("50 €", 50, "FIXED"), ("1.234 €", 1234, "FIXED"), ("50 € VB", 50, "NEGOTIABLE"), ("VB", None, "NEGOTIABLE"), ("Zu verschenken", None, "GIVE_AWAY"), ]) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_pricing_info( self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str ) -> None: """Test price extraction with different formats""" with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text): price, price_type = await test_extractor._extract_pricing_info_from_ad_page() assert price == expected_price assert price_type == expected_type @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None: """Test price extraction when element is not found""" with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError): price, price_type = await test_extractor._extract_pricing_info_from_ad_page() assert price is None assert price_type == "NOT_APPLICABLE" class TestAdExtractorShipping: """Tests for shipping related functionality.""" @pytest.mark.parametrize(("shipping_text", "expected_type", "expected_cost"), [ ("+ Versand ab 2,99 €", "SHIPPING", 2.99), ("Nur Abholung", "PICKUP", None), ("Versand möglich", "SHIPPING", None), ]) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info( self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None ) -> None: """Test shipping info extraction with different text formats.""" with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text), \ patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: if expected_cost: shipping_response:dict[str, Any] = { "data": { "shippingOptionsResponse": { "options": [ {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100), "packageSize": "SMALL"} ] } } } mock_web_request.return_value = {"content": json.dumps(shipping_response)} shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() assert shipping_type == expected_type assert costs == expected_cost if expected_cost: assert options == ["DHL_2"] else: assert options is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None: """Test shipping info extraction with shipping options.""" shipping_response = { "content": json.dumps({ "data": { "shippingOptionsResponse": { "options": [ {"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"} ] } } }) } with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \ patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() assert shipping_type == "SHIPPING" assert costs == 5.49 assert options == ["DHL_2"] @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:AdExtractor) -> None: """Test shipping info extraction with all matching options enabled.""" shipping_response = { "content": json.dumps({ "data": { "shippingOptionsResponse": { "options": [ {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"}, {"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"} ] } } }) } # Enable all matching options in config test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True}) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() assert shipping_type == "SHIPPING" assert costs == 4.89 if options is not None: assert sorted(options) == ["DHL_2", "Hermes_Päckchen", "Hermes_S"] else: assert options is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info_with_excluded_options(self, test_extractor:AdExtractor) -> None: """Test shipping info extraction with excluded options.""" shipping_response = { "content": json.dumps({ "data": { "shippingOptionsResponse": { "options": [ {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"}, {"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"} ] } } }) } # Enable all matching options and exclude DHL in config test_extractor.config.download = DownloadConfig.model_validate({ "include_all_matching_shipping_options": True, "excluded_shipping_options": ["DHL_2"] }) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() assert shipping_type == "SHIPPING" assert costs == 4.89 if options is not None: assert sorted(options) == ["Hermes_Päckchen", "Hermes_S"] else: assert options is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:AdExtractor) -> None: """Test shipping info extraction when the matching option is excluded.""" shipping_response = { "content": json.dumps({ "data": { "shippingOptionsResponse": { "options": [ {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"} ] } } }) } # Exclude the matching option test_extractor.config.download = DownloadConfig.model_validate({ "excluded_shipping_options": ["Hermes_Päckchen"] }) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() assert shipping_type == "NOT_APPLICABLE" assert costs == 4.89 assert options is None class TestAdExtractorNavigation: """Tests for navigation related functionality.""" @pytest.mark.asyncio async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None: """Test navigation to ad page using a URL.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" with patch.object(test_extractor, "page", page_mock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \ patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError): result = await test_extractor.navigate_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345") assert result is True mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345") @pytest.mark.asyncio async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None: """Test navigation to ad page using an ID.""" ad_id = 12345 page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/{0}".format(ad_id) popup_close_mock = AsyncMock() popup_close_mock.click = AsyncMock() popup_close_mock.apply = AsyncMock(return_value = True) def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None: if selector_type == By.CLASS_NAME and selector_value == "mfp-close": return popup_close_mock return None with patch.object(test_extractor, "page", page_mock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \ patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock): result = await test_extractor.navigate_to_ad_page(ad_id) assert result is True mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={0}".format(ad_id)) popup_close_mock.click.assert_awaited_once() @pytest.mark.asyncio async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None: """Test navigation to ad page with popup handling.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" input_mock = AsyncMock() input_mock.clear_input = AsyncMock() input_mock.send_keys = AsyncMock() input_mock.apply = AsyncMock(return_value = True) with patch.object(test_extractor, "page", page_mock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock), \ patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock), \ patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click, \ patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True): result = await test_extractor.navigate_to_ad_page(12345) assert result is True mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close") @pytest.mark.asyncio async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None: """Test navigation to ad page with invalid ID.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0" input_mock = AsyncMock() input_mock.clear_input = AsyncMock() input_mock.send_keys = AsyncMock() input_mock.apply = AsyncMock(return_value = True) input_mock.attrs = {} with patch.object(test_extractor, "page", page_mock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock), \ patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock): result = await test_extractor.navigate_to_ad_page(99999) assert result is False @pytest.mark.asyncio async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None: """Test extraction of own ads URLs - basic test.""" with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \ patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \ patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, \ patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all, \ patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \ patch.object(test_extractor, "web_execute", new_callable = AsyncMock): # --- Setup mock objects for DOM elements --- # Mocks needed for the actual execution flow ad_list_container_mock = MagicMock() pagination_section_mock = MagicMock() cardbox_mock = MagicMock() # Represents the
  • element link_mock = MagicMock() # Represents the element link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output # Mocks for elements potentially checked but maybe not strictly needed for output # (depending on how robust the mocking is) # next_button_mock = MagicMock() # If needed for multi_page logic # --- Setup mock responses for web_find and web_find_all in CORRECT ORDER --- # 1. Initial find for ad list container (before loop) # 2. Find for pagination section (pagination check) # 3. Find for ad list container (inside loop) # 4. Find for the link (inside list comprehension) mock_web_find.side_effect = [ ad_list_container_mock, # Call 1: find #my-manageitems-adlist (before loop) pagination_section_mock, # Call 2: find .Pagination ad_list_container_mock, # Call 3: find #my-manageitems-adlist (inside loop) link_mock # Call 4: find 'div.manageitems-item-ad h3 a.text-onSurface' # Add more mocks here if the pagination navigation logic calls web_find again ] # 1. Find all 'Nächste' buttons (pagination check) - Return empty list for single page test case # 2. Find all '.cardbox' elements (inside loop) mock_web_find_all.side_effect = [ [], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page [cardbox_mock] # Call 2: find .cardbox -> One ad item # Add more mocks here if pagination navigation calls web_find_all ] # --- Execute test and verify results --- refs = await test_extractor.extract_own_ads_urls() # --- Assertions --- assert refs == ["/s-anzeige/test/12345"] # Now it should match # Optional: Verify calls were made as expected mock_web_find.assert_has_calls([ call(By.ID, "my-manageitems-adlist"), call(By.CSS_SELECTOR, ".Pagination", timeout = 10), call(By.ID, "my-manageitems-adlist"), call(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = cardbox_mock), ], any_order = False) # Check order if important mock_web_find_all.assert_has_calls([ call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock), call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock), ], any_order = False) class TestAdExtractorContent: """Tests for content extraction functionality.""" # pylint: disable=protected-access @pytest.fixture def extractor_with_config(self) -> AdExtractor: """Create extractor with specific config for testing prefix/suffix handling.""" browser_mock = MagicMock(spec = Browser) return AdExtractor(browser_mock, Config()) # Empty config, will be overridden in tests @pytest.mark.asyncio async def test_extract_description_with_affixes( self, test_extractor:AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config ) -> None: """Test extraction of description with various prefix/suffix configurations.""" # Mock the page page_mock = MagicMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" test_extractor.page = page_mock for config, raw_description, _ in description_test_cases: # Changed to _ since we don't use expected_description test_extractor.config = test_bot_config.with_values(config) with patch.multiple(test_extractor, web_text = AsyncMock(side_effect = [ "Test Title", # Title raw_description, # Raw description (without affixes) "03.02.2025" # Creation date ]), web_execute=AsyncMock(return_value={ "universalAnalyticsOpts": { "dimensions": { "dimension92": "", "dimension108": "" } } }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False), _download_images_from_ad_page = AsyncMock(return_value = []), _extract_contact_from_ad_page = AsyncMock(return_value = {}) ): info = await test_extractor._extract_ad_page_info("/some/dir", 12345) assert info.description == raw_description @pytest.mark.asyncio async def test_extract_description_with_affixes_timeout( self, test_extractor:AdExtractor ) -> None: """Test handling of timeout when extracting description.""" # Mock the page page_mock = MagicMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" test_extractor.page = page_mock with patch.multiple(test_extractor, web_text = AsyncMock(side_effect = [ "Test Title", # Title succeeds TimeoutError("Timeout"), # Description times out "03.02.2025" # Date succeeds ]), web_execute=AsyncMock(return_value={ "universalAnalyticsOpts": { "dimensions": { "dimension92": "", "dimension108": "" } } }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False), _download_images_from_ad_page = AsyncMock(return_value = []), _extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()) ): try: info = await test_extractor._extract_ad_page_info("/some/dir", 12345) assert not info.description except TimeoutError: # This is also acceptable - depends on how we want to handle timeouts pass @pytest.mark.asyncio async def test_extract_description_with_affixes_no_affixes( self, test_extractor:AdExtractor ) -> None: """Test extraction of description without any affixes in config.""" # Mock the page page_mock = MagicMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" test_extractor.page = page_mock raw_description = "Original Description" with patch.multiple(test_extractor, web_text = AsyncMock(side_effect = [ "Test Title", # Title raw_description, # Description without affixes "03.02.2025" # Creation date ]), web_execute = AsyncMock(return_value = { "universalAnalyticsOpts": { "dimensions": { "dimension92": "", "dimension108": "" } } }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False), _download_images_from_ad_page = AsyncMock(return_value = []), _extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()) ): info = await test_extractor._extract_ad_page_info("/some/dir", 12345) assert info.description == raw_description @pytest.mark.asyncio async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None: """Test extraction of sell directly option.""" test_cases = [ ("Direkt kaufen", True), ("Other text", False), ] for text, expected in test_cases: with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text): result = await test_extractor._extract_sell_directly_from_ad_page() assert result is expected with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError): result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None class TestAdExtractorCategory: """Tests for category extraction functionality.""" @pytest.fixture def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } }) return AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_category(self, extractor:AdExtractor) -> None: """Test category extraction from breadcrumb.""" category_line = MagicMock() first_part = MagicMock() first_part.attrs = {"href": "/s-familie-kind-baby/c17"} second_part = MagicMock() second_part.attrs = {"href": "/s-spielzeug/c23"} with patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find: mock_web_find.side_effect = [ category_line, first_part, second_part ] result = await extractor._extract_category_from_ad_page() assert result == "17/23" mock_web_find.assert_any_call(By.ID, "vap-brdcrmb") mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line) mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None: """Test extraction of special attributes when empty.""" with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute: mock_web_execute.return_value = { "universalAnalyticsOpts": { "dimensions": { "dimension108": "" } } } result = await extractor._extract_special_attributes_from_ad_page(mock_web_execute.return_value) assert result == {} @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_special_attributes_not_empty(self, extractor: AdExtractor) -> None: """Test extraction of special attributes when not empty.""" special_atts = { "universalAnalyticsOpts": { "dimensions": { "dimension108": "versand_s:t|color_s:creme|groesse_s:68|condition_s:alright|type_s:accessoires|art_s:maedchen" } } } result = await extractor._extract_special_attributes_from_ad_page(special_atts) assert len(result) == 5 assert "versand_s" not in result assert "color_s" in result assert result["color_s"] == "creme" assert "groesse_s" in result assert result["groesse_s"] == "68" assert "condition_s" in result assert result["condition_s"] == "alright" assert "type_s" in result assert result["type_s"] == "accessoires" assert "art_s" in result assert result["art_s"] == "maedchen" class TestAdExtractorContact: """Tests for contact information extraction.""" @pytest.fixture def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } }) return AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info(self, extractor:AdExtractor) -> None: """Test extraction of contact information.""" with patch.object(extractor, "page", MagicMock()), \ patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \ patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find: mock_web_text.side_effect = [ "12345 Berlin - Mitte", "Example Street 123,", "Test User", ] mock_web_find.side_effect = [ MagicMock(), # contact person element MagicMock(), # name element TimeoutError(), # phone element (simulating no phone) ] contact_info = await extractor._extract_contact_from_ad_page() assert contact_info.street == "Example Street 123" assert contact_info.zipcode == "12345" assert contact_info.location == "Berlin - Mitte" assert contact_info.name == "Test User" assert contact_info.phone is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None: """Test contact info extraction when elements are not found.""" with patch.object(extractor, "page", MagicMock()), \ patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()), \ patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()), \ pytest.raises(TimeoutError): await extractor._extract_contact_from_ad_page() @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None: """Test extraction of contact information including phone number.""" with patch.object(extractor, "page", MagicMock()), \ patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \ patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find: mock_web_text.side_effect = [ "12345 Berlin - Mitte", "Example Street 123,", "Test User", "+49(0)1234 567890" ] phone_element = MagicMock() mock_web_find.side_effect = [ MagicMock(), # contact person element MagicMock(), # name element phone_element, # phone element ] contact_info = await extractor._extract_contact_from_ad_page() assert contact_info.phone == "01234567890" # Normalized phone number class TestAdExtractorDownload: """Tests for download functionality.""" @pytest.fixture def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } }) return AdExtractor(browser_mock, config) @pytest.mark.asyncio async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None: """Test downloading an ad when the directory already exists.""" with patch("os.path.exists") as mock_exists, \ patch("os.path.isdir") as mock_isdir, \ patch("os.makedirs") as mock_makedirs, \ patch("os.mkdir") as mock_mkdir, \ patch("shutil.rmtree") as mock_rmtree, \ patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \ patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract: base_dir = "downloaded-ads" ad_dir = os.path.join(base_dir, "ad_12345") yaml_path = os.path.join(ad_dir, "ad_12345.yaml") # Configure mocks for directory checks existing_paths = {base_dir, ad_dir} mock_exists.side_effect = lambda path: path in existing_paths mock_isdir.side_effect = lambda path: path == base_dir mock_extract.return_value = AdPartial.model_validate({ "title": "Test Advertisement Title", "description": "Test Description", "category": "Dienstleistungen", "price": 100, "images": [], "contact": { "name": "Test User", "street": "Test Street 123", "zipcode": "12345", "location": "Test City" } }) await extractor.download_ad(12345) # Verify the correct functions were called mock_extract.assert_called_once() mock_rmtree.assert_called_once_with(ad_dir) mock_mkdir.assert_called_once_with(ad_dir) mock_makedirs.assert_not_called() # Directory already exists # Get the actual call arguments # Workaround for hard-coded path in download_ad actual_call = mock_save_dict.call_args assert actual_call is not None actual_path = actual_call[0][0].replace("/", os.path.sep) assert actual_path == yaml_path assert actual_call[0][1] == mock_extract.return_value.model_dump() @pytest.mark.asyncio # pylint: disable=protected-access async def test_download_images_no_images(self, extractor:AdExtractor) -> None: """Test image download when no images are found.""" with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError): image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) assert len(image_paths) == 0 @pytest.mark.asyncio async def test_download_ad(self, extractor:AdExtractor) -> None: """Test downloading an entire ad.""" with patch("os.path.exists") as mock_exists, \ patch("os.path.isdir") as mock_isdir, \ patch("os.makedirs") as mock_makedirs, \ patch("os.mkdir") as mock_mkdir, \ patch("shutil.rmtree") as mock_rmtree, \ patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \ patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract: base_dir = "downloaded-ads" ad_dir = os.path.join(base_dir, "ad_12345") yaml_path = os.path.join(ad_dir, "ad_12345.yaml") # Configure mocks for directory checks mock_exists.return_value = False mock_isdir.return_value = False mock_extract.return_value = AdPartial.model_validate({ "title": "Test Advertisement Title", "description": "Test Description", "category": "Dienstleistungen", "price": 100, "images": [], "contact": { "name": "Test User", "street": "Test Street 123", "zipcode": "12345", "location": "Test City" } }) await extractor.download_ad(12345) # Verify the correct functions were called mock_extract.assert_called_once() mock_rmtree.assert_not_called() # No directory to remove mock_mkdir.assert_has_calls([ call(base_dir), call(ad_dir) ]) mock_makedirs.assert_not_called() # Using mkdir instead # Get the actual call arguments actual_call = mock_save_dict.call_args assert actual_call is not None actual_path = actual_call[0][0].replace("/", os.path.sep) assert actual_path == yaml_path assert actual_call[0][1] == mock_extract.return_value.model_dump()