diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py index 685cf66..a910f43 100644 --- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py +++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py @@ -1009,6 +1009,17 @@ class WebScrapingMixin: # Return primitive values as-is return data + async def _xpath_first(self, selector_value:str) -> Element | None: + matches = await self.page.xpath(selector_value, timeout = 0) + for match in matches: + if match is not None: + return cast(Element, match) + return None + + async def _xpath_all(self, selector_value:str) -> list[Element]: + matches = await self.page.xpath(selector_value, timeout = 0) + return [cast(Element, match) for match in matches if match is not None] + async def web_find(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> Element: """ Locates an HTML element by the given selector type and value. @@ -1084,7 +1095,7 @@ class WebScrapingMixin: case By.XPATH: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}") return await self.web_await( - lambda: self.page.find_element_by_text(selector_value, best_match = True), + lambda: self._xpath_first(selector_value), timeout = timeout, timeout_error_message = f"No HTML element found using XPath '{selector_value}'{timeout_suffix}", apply_multiplier = False, @@ -1129,7 +1140,7 @@ class WebScrapingMixin: case By.XPATH: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}") return await self.web_await( - lambda: self.page.find_elements_by_text(selector_value), + lambda: self._xpath_all(selector_value), timeout = timeout, timeout_error_message = f"No HTML elements found using XPath '{selector_value}'{timeout_suffix}", apply_multiplier = False, diff --git a/tests/unit/test_web_scraping_mixin.py b/tests/unit/test_web_scraping_mixin.py index b6fbf17..bb8995d 100644 --- a/tests/unit/test_web_scraping_mixin.py +++ b/tests/unit/test_web_scraping_mixin.py @@ -666,6 +666,51 @@ class TestSelectorTimeoutMessages: assert expected_message == call.kwargs["timeout_error_message"] assert call.kwargs["apply_multiplier"] is False + @pytest.mark.asyncio + async def test_web_find_once_xpath_uses_xpath_api(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None: + """By.XPATH should use the native page.xpath API and return first non-null match.""" + xpath_match = MagicMock(spec = Element) + mock_page.xpath = AsyncMock(return_value = [None, xpath_match]) + + result = await web_scraper._web_find_once(By.XPATH, "//div[@id='hero']", 0.2) + + assert result is xpath_match + mock_page.xpath.assert_awaited_once_with("//div[@id='hero']", timeout = 0) + + @pytest.mark.asyncio + async def test_web_find_once_xpath_returns_none_when_no_matches(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None: + """_xpath_first should return None when xpath yields no usable match.""" + mock_page.xpath = AsyncMock(return_value = [None, None]) + + result = await web_scraper._xpath_first("//missing") + + assert result is None + mock_page.xpath.assert_awaited_once_with("//missing", timeout = 0) + + @pytest.mark.asyncio + async def test_web_find_all_once_xpath_uses_xpath_api(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None: + """By.XPATH should use page.xpath and return all non-null matches.""" + first_match = MagicMock(spec = Element) + second_match = MagicMock(spec = Element) + mock_page.xpath = AsyncMock(return_value = [first_match, None, second_match]) + + result = await web_scraper._web_find_all_once(By.XPATH, "//footer", 0.2) + + assert result == [first_match, second_match] + mock_page.xpath.assert_awaited_once_with("//footer", timeout = 0) + + @pytest.mark.asyncio + async def test_web_find_once_xpath_rejects_parent(self, web_scraper:WebScrapingMixin) -> None: + """XPath lookups currently do not support parent-scoped queries.""" + with pytest.raises(AssertionError, match = "Specifying a parent element currently not supported"): + await web_scraper._web_find_once(By.XPATH, "//div", 0.1, parent = AsyncMock(spec = Element)) + + @pytest.mark.asyncio + async def test_web_find_all_once_xpath_rejects_parent(self, web_scraper:WebScrapingMixin) -> None: + """XPath multi-lookups currently do not support parent-scoped queries.""" + with pytest.raises(AssertionError, match = "Specifying a parent element currently not supported"): + await web_scraper._web_find_all_once(By.XPATH, "//div", 0.1, parent = AsyncMock(spec = Element)) + @pytest.mark.asyncio @pytest.mark.parametrize( ("selector_type", "selector_value", "expected_message"),