diff --git a/pdm.lock b/pdm.lock index ee50ee8..9f45f00 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "dev"] +groups = ["default", "dev", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:cf10c346cfa438b268bfb7d8ca5e9e89da483ad09800148dceafa07b9f8ccfa9" +content_hash = "sha256:684af2e6d958227b326df61f5c46a06de9eb11a977ddd6e822f6388b280055f8" [[metadata.targets]] requires_python = ">=3.10,<3.14" @@ -116,7 +116,7 @@ name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." -groups = ["default", "dev"] +groups = ["default", "dev", "test"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -279,7 +279,7 @@ name = "exceptiongroup" version = "1.2.2" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" -groups = ["dev"] +groups = ["dev", "test"] marker = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, @@ -291,7 +291,7 @@ name = "iniconfig" version = "2.0.0" requires_python = ">=3.7" summary = "brain-dead simple config-ini parsing" -groups = ["dev"] +groups = ["dev", "test"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -507,7 +507,7 @@ name = "packaging" version = "24.2" requires_python = ">=3.8" summary = "Core utilities for Python packages" -groups = ["dev"] +groups = ["dev", "test"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -552,7 +552,7 @@ name = "pluggy" version = "1.5.0" requires_python = ">=3.8" summary = "plugin and hook calling mechanisms for python" -groups = ["dev"] +groups = ["dev", "test"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -688,7 +688,7 @@ name = "pytest" version = "8.3.4" requires_python = ">=3.8" summary = "pytest: simple powerful testing with Python" -groups = ["dev"] +groups = ["dev", "test"] dependencies = [ "colorama; sys_platform == \"win32\"", "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", @@ -702,6 +702,20 @@ files = [ {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +requires_python = ">=3.9" +summary = "Pytest support for asyncio" +groups = ["test"] +dependencies = [ + "pytest<9,>=8.2", +] +files = [ + {file = "pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3"}, + {file = "pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a"}, +] + [[package]] name = "pytest-cov" version = "6.0.0" @@ -908,7 +922,7 @@ name = "tomli" version = "2.2.1" requires_python = ">=3.8" summary = "A lil' TOML parser" -groups = ["dev"] +groups = ["dev", "test"] files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, diff --git a/pyproject.toml b/pyproject.toml index e6cd6ca..0aa28ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -222,7 +222,14 @@ max-positional-arguments = 6 # max. number of positional args for function / me # https://docs.pytest.org/en/stable/reference.html#confval-addopts addopts = "--strict-markers -p no:cacheprovider --doctest-modules --cov=kleinanzeigen_bot --cov-report=term-missing --ignore=kleinanzeigen_bot/__main__.py" markers = [ - "itest: marks a test as an integration test (i.e. a test with external dependencies)" + "itest: marks a test as an integration test (i.e. a test with external dependencies)", + "asyncio: mark test as async" +] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +filterwarnings = [ + "ignore:Exception ignored in:pytest.PytestUnraisableExceptionWarning", + "ignore::DeprecationWarning" ] [dependency-groups] @@ -231,7 +238,8 @@ dev = [ "bandit", "toml", "tomli", - "pytest", + "pytest>=8.3.4", + "pytest-asyncio>=0.25.3", "pytest-rerunfailures", "pyinstaller", "pylint", diff --git a/tests/conftest.py b/tests/conftest.py index 46cc86e..a224b32 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,9 +4,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ """ import logging -from typing import Any, Final - -import pytest +from typing import Final from kleinanzeigen_bot import utils from kleinanzeigen_bot.i18n import get_translating_logger @@ -15,49 +13,3 @@ utils.configure_console_logging() LOG:Final[logging.Logger] = get_translating_logger("kleinanzeigen_bot") LOG.setLevel(logging.DEBUG) - - -@pytest.fixture -def sample_config() -> dict[str, Any]: - return { - "login": { - "username": "test_user", - "password": "test_password" - }, - "browser": { - "arguments": [], - "binary_location": None, - "extensions": [], - "use_private_window": True, - "user_data_dir": None, - "profile_name": None - }, - "ad_defaults": { - "description": { - "prefix": "", - "suffix": "" - } - }, - "ad_files": ["ads/*.yaml"] - } - - -@pytest.fixture -def sample_ad_config() -> dict[str, Any]: - return { - "title": "Test Item", - "description": "Test Description", - "price": "100", - "price_type": "FIXED", - "shipping_type": "PICKUP", - "active": True, - "contact": { - "name": "Test User", - "zipcode": "12345" - }, - "images": [], - "id": None, - "created_on": None, - "updated_on": None, - "republication_interval": 30 - } diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py new file mode 100644 index 0000000..2c9688b --- /dev/null +++ b/tests/unit/test_extract.py @@ -0,0 +1,689 @@ +""" +SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +SPDX-License-Identifier: AGPL-3.0-or-later +SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +""" +import json +import os +from typing import Any, TypedDict +from unittest.mock import MagicMock, AsyncMock, patch, call +import pytest +from kleinanzeigen_bot.extract import AdExtractor +from kleinanzeigen_bot.web_scraping_mixin import Browser, By, Element + + +class _DimensionsDict(TypedDict): + dimension108: str + + +class _UniversalAnalyticsOptsDict(TypedDict): + dimensions: _DimensionsDict + + +class _BelenConfDict(TypedDict): + universalAnalyticsOpts: _UniversalAnalyticsOptsDict + + +class _SpecialAttributesDict(TypedDict, total=False): + art_s: str + condition_s: str + + +class _TestCaseDict(TypedDict): + belen_conf: _BelenConfDict + expected: _SpecialAttributesDict + + +class TestAdExtractorBasics: + """Basic synchronous tests for AdExtractor.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + def test_constructor(self) -> None: + """Test the constructor of AdExtractor""" + browser_mock = MagicMock(spec=Browser) + config = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + extractor = AdExtractor(browser_mock, config) + assert extractor.browser == browser_mock + assert extractor.config == config + + @pytest.mark.parametrize( + "url,expected_id", + [ + ("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678), + ("https://www.kleinanzeigen.de/s-anzeige/another-test/98765432", 98765432), + ("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1), + ("https://www.kleinanzeigen.de/invalid-url", -1), + ], + ) + def test_extract_ad_id_from_ad_url(self, extractor: AdExtractor, url: str, expected_id: int) -> None: + """Test extraction of ad ID from different URL formats.""" + assert extractor.extract_ad_id_from_ad_url(url) == expected_id + + +class TestAdExtractorPricing: + """Tests for pricing related functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.parametrize( + "price_text,expected_price,expected_type", + [ + ("50 €", 50, "FIXED"), + ("1.234 €", 1234, "FIXED"), + ("50 € VB", 50, "NEGOTIABLE"), + ("VB", None, "NEGOTIABLE"), + ("Zu verschenken", None, "GIVE_AWAY"), + ], + ) + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_pricing_info( + self, extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str + ) -> None: + """Test price extraction with different formats""" + with patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=price_text): + price, price_type = await extractor._extract_pricing_info_from_ad_page() + assert price == expected_price + assert price_type == expected_type + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_pricing_info_timeout(self, extractor: AdExtractor) -> None: + """Test price extraction when element is not found""" + with patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError): + price, price_type = await extractor._extract_pricing_info_from_ad_page() + assert price is None + assert price_type == "NOT_APPLICABLE" + + +class TestAdExtractorShipping: + """Tests for shipping related functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.parametrize( + "shipping_text,expected_type,expected_cost", + [ + ("+ Versand ab 2,99 €", "SHIPPING", 2.99), + ("Nur Abholung", "PICKUP", None), + ("Versand möglich", "SHIPPING", None), + ], + ) + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_shipping_info( + self, extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None + ) -> None: + """Test shipping info extraction with different text formats.""" + with patch.object(extractor, 'page', MagicMock()), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=shipping_text), \ + patch.object(extractor, 'web_request', new_callable=AsyncMock) as mock_web_request: + + if expected_cost: + shipping_response: dict[str, Any] = { + "data": { + "shippingOptionsResponse": { + "options": [ + {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100)} + ] + } + } + } + mock_web_request.return_value = {"content": json.dumps(shipping_response)} + + shipping_type, costs, options = await extractor._extract_shipping_info_from_ad_page() + + assert shipping_type == expected_type + assert costs == expected_cost + if expected_cost: + assert options == ["DHL_2"] + else: + assert options is None + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_shipping_info_with_options(self, extractor: AdExtractor) -> None: + """Test shipping info extraction with shipping options.""" + shipping_response = { + "content": json.dumps({ + "data": { + "shippingOptionsResponse": { + "options": [ + {"id": "DHL_001", "priceInEuroCent": 549} + ] + } + } + }) + } + + with patch.object(extractor, 'page', MagicMock()), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value="+ Versand ab 5,49 €"), \ + patch.object(extractor, 'web_request', new_callable=AsyncMock, return_value=shipping_response): + + shipping_type, costs, options = await extractor._extract_shipping_info_from_ad_page() + + assert shipping_type == "SHIPPING" + assert costs == 5.49 + assert options == ["DHL_2"] + + +class TestAdExtractorNavigation: + """Tests for navigation related functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.asyncio + async def test_navigate_to_ad_page_with_url(self, extractor: AdExtractor) -> None: + """Test navigation to ad page using a URL.""" + page_mock = AsyncMock() + page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" + + with patch.object(extractor, 'page', page_mock), \ + patch.object(extractor, 'web_open', new_callable=AsyncMock) as mock_web_open, \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError): + + result = await extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345") + assert result is True + mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345") + + @pytest.mark.asyncio + async def test_navigate_to_ad_page_with_id(self, extractor: AdExtractor) -> None: + """Test navigation to ad page using an ID.""" + page_mock = AsyncMock() + page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" + + submit_button_mock = AsyncMock() + submit_button_mock.click = AsyncMock() + submit_button_mock.apply = AsyncMock(return_value=True) + + input_mock = AsyncMock() + input_mock.clear_input = AsyncMock() + input_mock.send_keys = AsyncMock() + input_mock.apply = AsyncMock(return_value=True) + + popup_close_mock = AsyncMock() + popup_close_mock.click = AsyncMock() + popup_close_mock.apply = AsyncMock(return_value=True) + + def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None: + if selector_type == By.ID and selector_value == "site-search-query": + return input_mock + if selector_type == By.ID and selector_value == "site-search-submit": + return submit_button_mock + if selector_type == By.CLASS_NAME and selector_value == "mfp-close": + return popup_close_mock + return None + + with patch.object(extractor, 'page', page_mock), \ + patch.object(extractor, 'web_open', new_callable=AsyncMock) as mock_web_open, \ + patch.object(extractor, 'web_input', new_callable=AsyncMock), \ + patch.object(extractor, 'web_check', new_callable=AsyncMock, return_value=True), \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=find_mock): + + result = await extractor.naviagte_to_ad_page(12345) + assert result is True + mock_web_open.assert_called_with('https://www.kleinanzeigen.de/') + submit_button_mock.click.assert_awaited_once() + popup_close_mock.click.assert_awaited_once() + + @pytest.mark.asyncio + async def test_navigate_to_ad_page_with_popup(self, extractor: AdExtractor) -> None: + """Test navigation to ad page with popup handling.""" + page_mock = AsyncMock() + page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" + + input_mock = AsyncMock() + input_mock.clear_input = AsyncMock() + input_mock.send_keys = AsyncMock() + input_mock.apply = AsyncMock(return_value=True) + + with patch.object(extractor, 'page', page_mock), \ + patch.object(extractor, 'web_open', new_callable=AsyncMock), \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=input_mock), \ + patch.object(extractor, 'web_click', new_callable=AsyncMock) as mock_web_click, \ + patch.object(extractor, 'web_check', new_callable=AsyncMock, return_value=True): + + result = await extractor.naviagte_to_ad_page(12345) + assert result is True + mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close') + + @pytest.mark.asyncio + async def test_navigate_to_ad_page_invalid_id(self, extractor: AdExtractor) -> None: + """Test navigation to ad page with invalid ID.""" + page_mock = AsyncMock() + page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0" + + input_mock = AsyncMock() + input_mock.clear_input = AsyncMock() + input_mock.send_keys = AsyncMock() + input_mock.apply = AsyncMock(return_value=True) + input_mock.attrs = {} + + with patch.object(extractor, 'page', page_mock), \ + patch.object(extractor, 'web_open', new_callable=AsyncMock), \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=input_mock): + + result = await extractor.naviagte_to_ad_page(99999) + assert result is False + + @pytest.mark.asyncio + async def test_extract_own_ads_urls(self, extractor: AdExtractor) -> None: + """Test extraction of own ads URLs - basic test.""" + with patch.object(extractor, 'web_open', new_callable=AsyncMock), \ + patch.object(extractor, 'web_sleep', new_callable=AsyncMock), \ + patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \ + patch.object(extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \ + patch.object(extractor, 'web_scroll_page_down', new_callable=AsyncMock), \ + patch.object(extractor, 'web_execute', new_callable=AsyncMock): + + # Setup mock objects for DOM elements + splitpage = MagicMock() + pagination_section = MagicMock() + pagination = MagicMock() + pagination_div = MagicMock() + ad_list = MagicMock() + cardbox = MagicMock() + link = MagicMock() + link.attrs = {'href': '/s-anzeige/test/12345'} + + # Setup mock responses for web_find + mock_web_find.side_effect = [ + splitpage, # .l-splitpage + pagination_section, # section:nth-of-type(4) + pagination, # div > div:nth-of-type(2) > div:nth-of-type(2) > div + pagination_div, # div:nth-of-type(1) + ad_list, # my-manageitems-adlist + link # article > section > section:nth-of-type(2) > h2 > div > a + ] + + # Setup mock responses for web_find_all + mock_web_find_all.side_effect = [ + [MagicMock()], # buttons in pagination + [cardbox] # cardbox elements + ] + + # Execute test and verify results + refs = await extractor.extract_own_ads_urls() + assert refs == ['/s-anzeige/test/12345'] + + +class TestAdExtractorContent: + """Tests for content extraction functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_title_and_description(self, extractor: AdExtractor) -> None: + """Test basic extraction of title and description.""" + page_mock = AsyncMock() + page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" + + category_mock = AsyncMock() + category_mock.attrs = {'href': '/s-kategorie/c123'} + + with patch.object(extractor, 'page', page_mock), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, return_value=category_mock), \ + patch.object(extractor, '_extract_category_from_ad_page', new_callable=AsyncMock, return_value="17/23"), \ + patch.object(extractor, '_extract_special_attributes_from_ad_page', new_callable=AsyncMock, return_value={}), \ + patch.object(extractor, '_extract_pricing_info_from_ad_page', new_callable=AsyncMock, return_value=(None, "NOT_APPLICABLE")), \ + patch.object(extractor, '_extract_shipping_info_from_ad_page', new_callable=AsyncMock, return_value=("NOT_APPLICABLE", None, None)), \ + patch.object(extractor, '_extract_sell_directly_from_ad_page', new_callable=AsyncMock, return_value=False), \ + patch.object(extractor, '_download_images_from_ad_page', new_callable=AsyncMock, return_value=[]), \ + patch.object(extractor, '_extract_contact_from_ad_page', new_callable=AsyncMock, return_value={}): + + mock_web_text.side_effect = [ + "Test Title", + "Test Prefix Original Description Test Suffix", + "03.02.2025" + ] + + info = await extractor._extract_ad_page_info("/some/dir", 12345) + assert isinstance(info, dict) + assert info["title"] == "Test Title" + assert info["description"].strip() == "Original Description" + assert info["created_on"] == "2025-02-03T00:00:00" + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_sell_directly(self, extractor: AdExtractor) -> None: + """Test extraction of sell directly option.""" + test_cases = [ + ("Direkt kaufen", True), + ("Other text", False), + ] + + for text, expected in test_cases: + with patch.object(extractor, 'web_text', new_callable=AsyncMock, return_value=text): + result = await extractor._extract_sell_directly_from_ad_page() + assert result is expected + + with patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError): + result = await extractor._extract_sell_directly_from_ad_page() + assert result is None + + +class TestAdExtractorCategory: + """Tests for category extraction functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_category(self, extractor: AdExtractor) -> None: + """Test category extraction from breadcrumb.""" + category_line = MagicMock() + first_part = MagicMock() + first_part.attrs = {'href': '/s-familie-kind-baby/c17'} + second_part = MagicMock() + second_part.attrs = {'href': '/s-spielzeug/c23'} + + with patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: + mock_web_find.side_effect = [ + category_line, + first_part, + second_part + ] + + result = await extractor._extract_category_from_ad_page() + assert result == "17/23" + + mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb') + mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent=category_line) + mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent=category_line) + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None: + """Test extraction of special attributes when empty.""" + with patch.object(extractor, 'web_execute', new_callable=AsyncMock) as mock_web_execute: + mock_web_execute.return_value = { + "universalAnalyticsOpts": { + "dimensions": { + "dimension108": "" + } + } + } + result = await extractor._extract_special_attributes_from_ad_page() + assert result == {} + + +class TestAdExtractorContact: + """Tests for contact information extraction.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_contact_info(self, extractor: AdExtractor) -> None: + """Test extraction of contact information.""" + with patch.object(extractor, 'page', MagicMock()), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ + patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: + + mock_web_text.side_effect = [ + "12345 Berlin - Mitte", + "Example Street 123,", + "Test User", + ] + + mock_web_find.side_effect = [ + MagicMock(), # contact person element + MagicMock(), # name element + TimeoutError(), # phone element (simulating no phone) + ] + + contact_info = await extractor._extract_contact_from_ad_page() + assert isinstance(contact_info, dict) + assert contact_info["street"] == "Example Street 123" + assert contact_info["zipcode"] == "12345" + assert contact_info["location"] == "Berlin - Mitte" + assert contact_info["name"] == "Test User" + assert contact_info["phone"] is None + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None: + """Test contact info extraction when elements are not found.""" + with patch.object(extractor, 'page', MagicMock()), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError()), \ + patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError()): + + with pytest.raises(TimeoutError): + await extractor._extract_contact_from_ad_page() + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None: + """Test extraction of contact information including phone number.""" + with patch.object(extractor, 'page', MagicMock()), \ + patch.object(extractor, 'web_text', new_callable=AsyncMock) as mock_web_text, \ + patch.object(extractor, 'web_find', new_callable=AsyncMock) as mock_web_find: + + mock_web_text.side_effect = [ + "12345 Berlin - Mitte", + "Example Street 123,", + "Test User", + "+49(0)1234 567890" + ] + + phone_element = MagicMock() + mock_web_find.side_effect = [ + MagicMock(), # contact person element + MagicMock(), # name element + phone_element, # phone element + ] + + contact_info = await extractor._extract_contact_from_ad_page() + assert isinstance(contact_info, dict) + assert contact_info["phone"] == "01234567890" # Normalized phone number + + +class TestAdExtractorDownload: + """Tests for download functionality.""" + + @pytest.fixture + def extractor(self) -> AdExtractor: + browser_mock = MagicMock(spec=Browser) + config_mock = { + "ad_defaults": { + "description": { + "prefix": "Test Prefix", + "suffix": "Test Suffix" + } + } + } + return AdExtractor(browser_mock, config_mock) + + @pytest.mark.asyncio + async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None: + """Test downloading an ad when the directory already exists.""" + with patch('os.path.exists') as mock_exists, \ + patch('os.path.isdir') as mock_isdir, \ + patch('os.makedirs') as mock_makedirs, \ + patch('os.mkdir') as mock_mkdir, \ + patch('shutil.rmtree') as mock_rmtree, \ + patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ + patch.object(extractor, '_extract_ad_page_info', new_callable=AsyncMock) as mock_extract: + + base_dir = 'downloaded-ads' + ad_dir = os.path.join(base_dir, 'ad_12345') + yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') + + # Configure mocks for directory checks + existing_paths = {base_dir, ad_dir} + mock_exists.side_effect = lambda path: path in existing_paths + mock_isdir.side_effect = lambda path: path == base_dir + + mock_extract.return_value = { + "title": "Test Advertisement Title", + "description": "Test Description", + "price": 100, + "images": [], + "contact": { + "name": "Test User", + "street": "Test Street 123", + "zipcode": "12345", + "location": "Test City" + } + } + + await extractor.download_ad(12345) + + # Verify the correct functions were called + mock_extract.assert_called_once() + mock_rmtree.assert_called_once_with(ad_dir) + mock_mkdir.assert_called_once_with(ad_dir) + mock_makedirs.assert_not_called() # Directory already exists + + # Get the actual call arguments + # Workaround for hard-coded path in download_ad + actual_call = mock_save_dict.call_args + assert actual_call is not None + actual_path = actual_call[0][0].replace('/', os.path.sep) + assert actual_path == yaml_path + assert actual_call[0][1] == mock_extract.return_value + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_download_images_no_images(self, extractor: AdExtractor) -> None: + """Test image download when no images are found.""" + with patch.object(extractor, 'web_find', new_callable=AsyncMock, side_effect=TimeoutError): + image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) + assert len(image_paths) == 0 + + @pytest.mark.asyncio + async def test_download_ad(self, extractor: AdExtractor) -> None: + """Test downloading an entire ad.""" + with patch('os.path.exists') as mock_exists, \ + patch('os.path.isdir') as mock_isdir, \ + patch('os.makedirs') as mock_makedirs, \ + patch('os.mkdir') as mock_mkdir, \ + patch('shutil.rmtree') as mock_rmtree, \ + patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ + patch.object(extractor, '_extract_ad_page_info', new_callable=AsyncMock) as mock_extract: + + base_dir = 'downloaded-ads' + ad_dir = os.path.join(base_dir, 'ad_12345') + yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') + + # Configure mocks for directory checks + mock_exists.return_value = False + mock_isdir.return_value = False + + mock_extract.return_value = { + "title": "Test Advertisement Title", + "description": "Test Description", + "price": 100, + "images": [], + "contact": { + "name": "Test User", + "street": "Test Street 123", + "zipcode": "12345", + "location": "Test City" + } + } + + await extractor.download_ad(12345) + + # Verify the correct functions were called + mock_extract.assert_called_once() + mock_rmtree.assert_not_called() # No directory to remove + mock_mkdir.assert_has_calls([ + call(base_dir), + call(ad_dir) + ]) + mock_makedirs.assert_not_called() # Using mkdir instead + + # Get the actual call arguments + actual_call = mock_save_dict.call_args + assert actual_call is not None + actual_path = actual_call[0][0].replace('/', os.path.sep) + assert actual_path == yaml_path + assert actual_call[0][1] == mock_extract.return_value