mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
fix: use native page xpath api for xpath selectors (#853)
## ℹ️ Description *Provide a concise summary of the changes introduced in this pull request.* - Link to the related issue(s): n/a - Describe the motivation and context for this change. This replaces the stacked XPath work from #845 with a standalone fix from `main`. It makes `By.XPATH` use the native page XPath API instead of routing XPath selectors through text lookup. ## 📋 Changes Summary - Add private XPath helpers in `WebScrapingMixin` for first-match and all-match lookups. - Route `By.XPATH` in `_web_find_once()` and `_web_find_all_once()` through `page.xpath(...)`. - Add unit coverage for XPath helper behavior, empty results, and unsupported parent scoping. - No configuration changes or new dependencies. ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactoring** * Improved web scraping element selection reliability through streamlined XPath operations and better internal helper methods. * **Tests** * Added comprehensive unit tests for XPath-based element lookup operations to ensure consistent behavior. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -666,6 +666,51 @@ class TestSelectorTimeoutMessages:
|
||||
assert expected_message == call.kwargs["timeout_error_message"]
|
||||
assert call.kwargs["apply_multiplier"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_once_xpath_uses_xpath_api(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""By.XPATH should use the native page.xpath API and return first non-null match."""
|
||||
xpath_match = MagicMock(spec = Element)
|
||||
mock_page.xpath = AsyncMock(return_value = [None, xpath_match])
|
||||
|
||||
result = await web_scraper._web_find_once(By.XPATH, "//div[@id='hero']", 0.2)
|
||||
|
||||
assert result is xpath_match
|
||||
mock_page.xpath.assert_awaited_once_with("//div[@id='hero']", timeout = 0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_once_xpath_returns_none_when_no_matches(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""_xpath_first should return None when xpath yields no usable match."""
|
||||
mock_page.xpath = AsyncMock(return_value = [None, None])
|
||||
|
||||
result = await web_scraper._xpath_first("//missing")
|
||||
|
||||
assert result is None
|
||||
mock_page.xpath.assert_awaited_once_with("//missing", timeout = 0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_all_once_xpath_uses_xpath_api(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""By.XPATH should use page.xpath and return all non-null matches."""
|
||||
first_match = MagicMock(spec = Element)
|
||||
second_match = MagicMock(spec = Element)
|
||||
mock_page.xpath = AsyncMock(return_value = [first_match, None, second_match])
|
||||
|
||||
result = await web_scraper._web_find_all_once(By.XPATH, "//footer", 0.2)
|
||||
|
||||
assert result == [first_match, second_match]
|
||||
mock_page.xpath.assert_awaited_once_with("//footer", timeout = 0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_once_xpath_rejects_parent(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""XPath lookups currently do not support parent-scoped queries."""
|
||||
with pytest.raises(AssertionError, match = "Specifying a parent element currently not supported"):
|
||||
await web_scraper._web_find_once(By.XPATH, "//div", 0.1, parent = AsyncMock(spec = Element))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_all_once_xpath_rejects_parent(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""XPath multi-lookups currently do not support parent-scoped queries."""
|
||||
with pytest.raises(AssertionError, match = "Specifying a parent element currently not supported"):
|
||||
await web_scraper._web_find_all_once(By.XPATH, "//div", 0.1, parent = AsyncMock(spec = Element))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("selector_type", "selector_value", "expected_message"),
|
||||
|
||||
Reference in New Issue
Block a user