""" SPDX-FileCopyrightText: © Sebastian Thomschke and contributors SPDX-License-Identifier: AGPL-3.0-or-later SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ """ import json import os from typing import Any, TypedDict from unittest.mock import MagicMock, AsyncMock, patch, call import pytest from kleinanzeigen_bot.extract import AdExtractor from kleinanzeigen_bot.web_scraping_mixin import Browser, By, Element class _DimensionsDict(TypedDict): dimension108: str class _UniversalAnalyticsOptsDict(TypedDict): dimensions: _DimensionsDict class _BelenConfDict(TypedDict): universalAnalyticsOpts: _UniversalAnalyticsOptsDict class _SpecialAttributesDict(TypedDict, total=False): art_s: str condition_s: str class _TestCaseDict(TypedDict): belen_conf: _BelenConfDict expected: _SpecialAttributesDict class TestAdExtractorBasics: """Basic synchronous tests for AdExtractor.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) def test_constructor(self) -> None: """Test the constructor of AdExtractor""" browser_mock = MagicMock(spec=Browser) config = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } extractor = AdExtractor(browser_mock, config) assert extractor.browser == browser_mock assert extractor.config == config @pytest.mark.parametrize( "url,expected_id", [ ("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678), ("https://www.kleinanzeigen.de/s-anzeige/another-test/98765432", 98765432), ("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1), ("https://www.kleinanzeigen.de/invalid-url", -1), ], ) def test_extract_ad_id_from_ad_url(self, extractor: AdExtractor, url: str, expected_id: int) -> None: """Test extraction of ad ID from different URL formats.""" assert extractor.extract_ad_id_from_ad_url(url) == expected_id class TestAdExtractorPricing: """Tests for pricing related functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.parametrize( "price_text,expected_price,expected_type", [ ("50 €", 50, "FIXED"), ("1.234 €", 1234, "FIXED"), ("50 € VB", 50, "NEGOTIABLE"), ("VB", None, "NEGOTIABLE"), ("Zu verschenken", None, "GIVE_AWAY"), ], ) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_pricing_info( self, extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str ) -> None: """Test price extraction with different formats""" with patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=price_text): price, price_type = await extractor._extract_pricing_info_from_ad_page() assert price == expected_price assert price_type == expected_type @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_pricing_info_timeout(self, extractor: AdExtractor) -> None: """Test price extraction when element is not found""" with patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError): price, price_type = await extractor._extract_pricing_info_from_ad_page() assert price is None assert price_type == "NOT_APPLICABLE" class TestAdExtractorShipping: """Tests for shipping related functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.parametrize( "shipping_text,expected_type,expected_cost", [ ("+ Versand ab 2,99 €", "SHIPPING", 2.99), ("Nur Abholung", "PICKUP", None), ("Versand möglich", "SHIPPING", None), ], ) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info( self, extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None ) -> None: """Test shipping info extraction with different text formats.""" with patch.object(extractor, 'page', MagicMock()), \ patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=shipping_text), \ patch.object(extractor, 'web_request', new_callable=AsyncMock) as mock_web_request: if expected_cost: shipping_response: dict[str, Any] = { "data": { "shippingOptionsResponse": { "options": [ {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100)} ] } } } mock_web_request.return_value = {"content": json.dumps(shipping_response)} shipping_type, costs, options = await extractor._extract_shipping_info_from_ad_page() assert shipping_type == expected_type assert costs == expected_cost if expected_cost: assert options == ["DHL_2"] else: assert options is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_shipping_info_with_options(self, extractor: AdExtractor) -> None: """Test shipping info extraction with shipping options.""" shipping_response = { "content": json.dumps({ "data": { "shippingOptionsResponse": { "options": [ {"id": "DHL_001", "priceInEuroCent": 549} ] } } }) } with patch.object(extractor, 'page', MagicMock()), \ patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value="+ Versand ab 5,49 €"), \ patch.object(extractor, 'web_request', new_callable=AsyncMock, return_value=shipping_response): shipping_type, costs, options = await extractor._extract_shipping_info_from_ad_page() assert shipping_type == "SHIPPING" assert costs == 5.49 assert options == ["DHL_2"] class TestAdExtractorNavigation: """Tests for navigation related functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.asyncio async def test_navigate_to_ad_page_with_url(self, extractor: AdExtractor) -> None: """Test navigation to ad page using a URL.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" with patch.object(extractor, 'page', page_mock), \ patch.object(extractor, 'web_open', new_callable=AsyncMock) as mock_web_open, \ patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError): result = await extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345") assert result is True mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345") @pytest.mark.asyncio async def test_navigate_to_ad_page_with_id(self, extractor: AdExtractor) -> None: """Test navigation to ad page using an ID.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" submit_button_mock = AsyncMock() submit_button_mock.click = AsyncMock() submit_button_mock.apply = AsyncMock(return_value=True) input_mock = AsyncMock() input_mock.clear_input = AsyncMock() input_mock.send_keys = AsyncMock() input_mock.apply = AsyncMock(return_value=True) popup_close_mock = AsyncMock() popup_close_mock.click = AsyncMock() popup_close_mock.apply = AsyncMock(return_value=True) def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None: if selector_type == By.ID and selector_value == "site-search-query": return input_mock if selector_type == By.ID and selector_value == "site-search-submit": return submit_button_mock if selector_type == By.CLASS_NAME and selector_value == "mfp-close": return popup_close_mock return None with patch.object(extractor, 'page', page_mock), \ patch.object(extractor, 'web_open', new_callable=AsyncMock) as mock_web_open, \ patch.object(extractor, 'web_input', new_callable=AsyncMock), \ patch.object(extractor, 'web_check', new_callable=AsyncMock, return_value=True), \ patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=find_mock): result = await extractor.naviagte_to_ad_page(12345) assert result is True mock_web_open.assert_called_with('https://www.kleinanzeigen.de/') submit_button_mock.click.assert_awaited_once() popup_close_mock.click.assert_awaited_once() @pytest.mark.asyncio async def test_navigate_to_ad_page_with_popup(self, extractor: AdExtractor) -> None: """Test navigation to ad page with popup handling.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" input_mock = AsyncMock() input_mock.clear_input = AsyncMock() input_mock.send_keys = AsyncMock() input_mock.apply = AsyncMock(return_value=True) with patch.object(extractor, 'page', page_mock), \ patch.object(extractor, 'web_open', new_callable=AsyncMock), \ patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=input_mock), \ patch.object(extractor, 'web_click', new_callable=AsyncMock) as mock_web_click, \ patch.object(extractor, 'web_check', new_callable=AsyncMock, return_value=True): result = await extractor.naviagte_to_ad_page(12345) assert result is True mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close') @pytest.mark.asyncio async def test_navigate_to_ad_page_invalid_id(self, extractor: AdExtractor) -> None: """Test navigation to ad page with invalid ID.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0" input_mock = AsyncMock() input_mock.clear_input = AsyncMock() input_mock.send_keys = AsyncMock() input_mock.apply = AsyncMock(return_value=True) input_mock.attrs = {} with patch.object(extractor, 'page', page_mock), \ patch.object(extractor, 'web_open', new_callable=AsyncMock), \ patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=input_mock): result = await extractor.naviagte_to_ad_page(99999) assert result is False @pytest.mark.asyncio async def test_extract_own_ads_urls(self, extractor: AdExtractor) -> None: """Test extraction of own ads URLs - basic test.""" with patch.object(extractor, 'web_open', new_callable=AsyncMock), \ patch.object(extractor, 'web_sleep', new_callable=AsyncMock), \ patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \ patch.object(extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \ patch.object(extractor, 'web_scroll_page_down', new_callable=AsyncMock), \ patch.object(extractor, 'web_execute', new_callable=AsyncMock): # Setup mock objects for DOM elements splitpage = MagicMock() pagination_section = MagicMock() pagination = MagicMock() pagination_div = MagicMock() ad_list = MagicMock() cardbox = MagicMock() link = MagicMock() link.attrs = {'href': '/s-anzeige/test/12345'} # Setup mock responses for web_find mock_web_find.side_effect = [ splitpage, # .l-splitpage pagination_section, # section:nth-of-type(4) pagination, # div > div:nth-of-type(2) > div:nth-of-type(2) > div pagination_div, # div:nth-of-type(1) ad_list, # my-manageitems-adlist link # article > section > section:nth-of-type(2) > h2 > div > a ] # Setup mock responses for web_find_all mock_web_find_all.side_effect = [ [MagicMock()], # buttons in pagination [cardbox] # cardbox elements ] # Execute test and verify results refs = await extractor.extract_own_ads_urls() assert refs == ['/s-anzeige/test/12345'] class TestAdExtractorContent: """Tests for content extraction functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_title_and_description(self, extractor: AdExtractor) -> None: """Test basic extraction of title and description.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" category_mock = AsyncMock() category_mock.attrs = {'href': '/s-kategorie/c123'} with patch.object(extractor, 'page', page_mock), \ patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=category_mock), \ patch.object(extractor, '_extract_category_from_ad_page', new_callable=AsyncMock, return_value="17/23"), \ patch.object(extractor, '_extract_special_attributes_from_ad_page', new_callable=AsyncMock, return_value={}), \ patch.object(extractor, '_extract_pricing_info_from_ad_page', new_callable=AsyncMock, return_value=(None, "NOT_APPLICABLE")), \ patch.object(extractor, '_extract_shipping_info_from_ad_page', new_callable=AsyncMock, return_value=("NOT_APPLICABLE", None, None)), \ patch.object(extractor, '_extract_sell_directly_from_ad_page', new_callable=AsyncMock, return_value=False), \ patch.object(extractor, '_download_images_from_ad_page', new_callable=AsyncMock, return_value=[]), \ patch.object(extractor, '_extract_contact_from_ad_page', new_callable=AsyncMock, return_value={}): mock_web_text.side_effect = [ "Test Title", "Test Prefix Original Description Test Suffix", "03.02.2025" ] info = await extractor._extract_ad_page_info("/some/dir", 12345) assert isinstance(info, dict) assert info["title"] == "Test Title" assert info["description"].strip() == "Original Description" assert info["created_on"] == "2025-02-03T00:00:00" @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_sell_directly(self, extractor: AdExtractor) -> None: """Test extraction of sell directly option.""" test_cases = [ ("Direkt kaufen", True), ("Other text", False), ] for text, expected in test_cases: with patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=text): result = await extractor._extract_sell_directly_from_ad_page() assert result is expected with patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError): result = await extractor._extract_sell_directly_from_ad_page() assert result is None class TestAdExtractorCategory: """Tests for category extraction functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_category(self, extractor: AdExtractor) -> None: """Test category extraction from breadcrumb.""" category_line = MagicMock() first_part = MagicMock() first_part.attrs = {'href': '/s-familie-kind-baby/c17'} second_part = MagicMock() second_part.attrs = {'href': '/s-spielzeug/c23'} with patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: mock_web_find.side_effect = [ category_line, first_part, second_part ] result = await extractor._extract_category_from_ad_page() assert result == "17/23" mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb') mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent=category_line) mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent=category_line) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None: """Test extraction of special attributes when empty.""" with patch.object(extractor, 'web_execute', new_callable=AsyncMock) as mock_web_execute: mock_web_execute.return_value = { "universalAnalyticsOpts": { "dimensions": { "dimension108": "" } } } result = await extractor._extract_special_attributes_from_ad_page() assert result == {} class TestAdExtractorContact: """Tests for contact information extraction.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info(self, extractor: AdExtractor) -> None: """Test extraction of contact information.""" with patch.object(extractor, 'page', MagicMock()), \ patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: mock_web_text.side_effect = [ "12345 Berlin - Mitte", "Example Street 123,", "Test User", ] mock_web_find.side_effect = [ MagicMock(), # contact person element MagicMock(), # name element TimeoutError(), # phone element (simulating no phone) ] contact_info = await extractor._extract_contact_from_ad_page() assert isinstance(contact_info, dict) assert contact_info["street"] == "Example Street 123" assert contact_info["zipcode"] == "12345" assert contact_info["location"] == "Berlin - Mitte" assert contact_info["name"] == "Test User" assert contact_info["phone"] is None @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None: """Test contact info extraction when elements are not found.""" with patch.object(extractor, 'page', MagicMock()), \ patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError()), \ patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError()): with pytest.raises(TimeoutError): await extractor._extract_contact_from_ad_page() @pytest.mark.asyncio # pylint: disable=protected-access async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None: """Test extraction of contact information including phone number.""" with patch.object(extractor, 'page', MagicMock()), \ patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: mock_web_text.side_effect = [ "12345 Berlin - Mitte", "Example Street 123,", "Test User", "+49(0)1234 567890" ] phone_element = MagicMock() mock_web_find.side_effect = [ MagicMock(), # contact person element MagicMock(), # name element phone_element, # phone element ] contact_info = await extractor._extract_contact_from_ad_page() assert isinstance(contact_info, dict) assert contact_info["phone"] == "01234567890" # Normalized phone number class TestAdExtractorDownload: """Tests for download functionality.""" @pytest.fixture def extractor(self) -> AdExtractor: browser_mock = MagicMock(spec=Browser) config_mock = { "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } } return AdExtractor(browser_mock, config_mock) @pytest.mark.asyncio async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None: """Test downloading an ad when the directory already exists.""" with patch('os.path.exists') as mock_exists, \ patch('os.path.isdir') as mock_isdir, \ patch('os.makedirs') as mock_makedirs, \ patch('os.mkdir') as mock_mkdir, \ patch('shutil.rmtree') as mock_rmtree, \ patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ patch.object(extractor, '_extract_ad_page_info', new_callable=AsyncMock) as mock_extract: base_dir = 'downloaded-ads' ad_dir = os.path.join(base_dir, 'ad_12345') yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') # Configure mocks for directory checks existing_paths = {base_dir, ad_dir} mock_exists.side_effect = lambda path: path in existing_paths mock_isdir.side_effect = lambda path: path == base_dir mock_extract.return_value = { "title": "Test Advertisement Title", "description": "Test Description", "price": 100, "images": [], "contact": { "name": "Test User", "street": "Test Street 123", "zipcode": "12345", "location": "Test City" } } await extractor.download_ad(12345) # Verify the correct functions were called mock_extract.assert_called_once() mock_rmtree.assert_called_once_with(ad_dir) mock_mkdir.assert_called_once_with(ad_dir) mock_makedirs.assert_not_called() # Directory already exists # Get the actual call arguments # Workaround for hard-coded path in download_ad actual_call = mock_save_dict.call_args assert actual_call is not None actual_path = actual_call[0][0].replace('/', os.path.sep) assert actual_path == yaml_path assert actual_call[0][1] == mock_extract.return_value @pytest.mark.asyncio # pylint: disable=protected-access async def test_download_images_no_images(self, extractor: AdExtractor) -> None: """Test image download when no images are found.""" with patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError): image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) assert len(image_paths) == 0 @pytest.mark.asyncio async def test_download_ad(self, extractor: AdExtractor) -> None: """Test downloading an entire ad.""" with patch('os.path.exists') as mock_exists, \ patch('os.path.isdir') as mock_isdir, \ patch('os.makedirs') as mock_makedirs, \ patch('os.mkdir') as mock_mkdir, \ patch('shutil.rmtree') as mock_rmtree, \ patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ patch.object(extractor, '_extract_ad_page_info', new_callable=AsyncMock) as mock_extract: base_dir = 'downloaded-ads' ad_dir = os.path.join(base_dir, 'ad_12345') yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') # Configure mocks for directory checks mock_exists.return_value = False mock_isdir.return_value = False mock_extract.return_value = { "title": "Test Advertisement Title", "description": "Test Description", "price": 100, "images": [], "contact": { "name": "Test User", "street": "Test Street 123", "zipcode": "12345", "location": "Test City" } } await extractor.download_ad(12345) # Verify the correct functions were called mock_extract.assert_called_once() mock_rmtree.assert_not_called() # No directory to remove mock_mkdir.assert_has_calls([ call(base_dir), call(ad_dir) ]) mock_makedirs.assert_not_called() # Using mkdir instead # Get the actual call arguments actual_call = mock_save_dict.call_args assert actual_call is not None actual_path = actual_call[0][0].replace('/', os.path.sep) assert actual_path == yaml_path assert actual_call[0][1] == mock_extract.return_value