mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: Improved WebSelect Handling: Added Combobox Support, Enhanced Element Detection, and Smarter Option Matching (#679)
## ℹ️ Description Added Webselect-Function for Input/Dropdown Combobox PR for issue/missing feature #677 # Fixes / Enhancements Finding Special Attributes Elements can fail because they are currently only selected using the name="..." attributes of the HTML elements. If it fails, ALSO fallback-handle selecting special attribute HTML elements by ID instead / additionally. (For example the "brands" Input/Combobox for Mens Shoes... When trying to select a Value in a <select>, it does not only rely on the actual Option value (xxx in the example <options value="xxx">yyy</...>) but instead also on the displayed HTML value (i.e. yyy in above example). This improves UX because the User doesnt have to check the actual "value" of the Option but instead can check the displayed Value from the Browsers Display directly. Testcases for Webselect_Combobox were not added due to missing knowledge about Async Mocking properly. ## 📋 Changes Summary ✅ Fixes & Enhancements - New WebSelect Functionality - Improved Element Detection for Special Attributes - Enhanced <select> Option Matching Logic This improves UX and test robustness — users no longer need to know the exact underlying value, as matching also works with the visible label shown in the browser. 🧩 Result These updates make dropdown and combobox interactions more intuitive, resilient, and user-friendly across diverse HTML structures. ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [ ] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Field lookup now falls back to locating by ID when name lookup times out. * Option selection uses a two-pass match (value then displayed text); JS-path failures now surface as timeouts. * Error and log messages localized and clarified. * **New Features** * Support for combobox-style inputs: type into the input, open dropdown, and select by visible text (handles special characters). * **Tests** * Added tests for combobox selection, missing dropdowns, no-match errors, value-path selection, and special-character handling. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Jens <1742418+1cu@users.noreply.github.com> Co-authored-by: Claude <claude@anthropic.com>
This commit is contained in:
@@ -1068,24 +1068,31 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
try:
|
||||
# finding element by name cause id are composed sometimes eg. autos.marke_s+autos.model_s for Modell by cars
|
||||
special_attr_elem = await self.web_find(By.XPATH, f"//*[contains(@name, '{special_attribute_key}')]")
|
||||
except TimeoutError as ex:
|
||||
LOG.debug("Attribute field '%s' could not be found.", special_attribute_key)
|
||||
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}] (not found)") from ex
|
||||
except TimeoutError:
|
||||
# Trying to find element by ID instead cause sometimes there is NO name attribute...
|
||||
try:
|
||||
special_attr_elem = await self.web_find(By.ID, special_attribute_key)
|
||||
except TimeoutError as ex:
|
||||
LOG.debug(_("Attribute field '%s' could not be found."), special_attribute_key)
|
||||
raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
|
||||
|
||||
try:
|
||||
elem_id:str = str(special_attr_elem.attrs.id)
|
||||
if special_attr_elem.local_name == "select":
|
||||
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
|
||||
LOG.debug(_("Attribute field '%s' seems to be a select..."), special_attribute_key)
|
||||
await self.web_select(By.ID, elem_id, special_attribute_value_str)
|
||||
elif special_attr_elem.attrs.type == "checkbox":
|
||||
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
|
||||
LOG.debug(_("Attribute field '%s' seems to be a checkbox..."), special_attribute_key)
|
||||
await self.web_click(By.ID, elem_id)
|
||||
elif special_attr_elem.attrs.type == "text" and special_attr_elem.attrs.get("role") == "combobox":
|
||||
LOG.debug(_("Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)..."), special_attribute_key)
|
||||
await self.web_select_combobox(By.ID, elem_id, special_attribute_value_str)
|
||||
else:
|
||||
LOG.debug("Attribute field '%s' seems to be a text input...", special_attribute_key)
|
||||
LOG.debug(_("Attribute field '%s' seems to be a text input..."), special_attribute_key)
|
||||
await self.web_input(By.ID, elem_id, special_attribute_value_str)
|
||||
except TimeoutError as ex:
|
||||
LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key)
|
||||
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
||||
LOG.debug(_("Failed to set attribute field '%s' via known input types."), special_attribute_key)
|
||||
raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
|
||||
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value_str)
|
||||
|
||||
async def __set_shipping(self, ad_cfg:Ad, mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None:
|
||||
|
||||
@@ -121,10 +121,12 @@ kleinanzeigen_bot/__init__.py:
|
||||
"Setting special attribute [%s] to [%s]...": "Setze spezielles Attribut [%s] auf [%s]..."
|
||||
"Successfully set attribute field [%s] to [%s]...": "Attributfeld [%s] erfolgreich auf [%s] gesetzt..."
|
||||
"Attribute field '%s' could not be found.": "Attributfeld '%s' konnte nicht gefunden werden."
|
||||
"Failed to set attribute '%s'": "Fehler beim Setzen des Attributs '%s'"
|
||||
"Attribute field '%s' seems to be a select...": "Attributfeld '%s' scheint ein Auswahlfeld zu sein..."
|
||||
"Attribute field '%s' is not of kind radio button.": "Attributfeld '%s' ist kein Radiobutton."
|
||||
"Failed to set attribute field '%s' via known input types.": "Fehler beim Setzen des Attributfelds '%s' über bekannte Eingabetypen."
|
||||
"Attribute field '%s' seems to be a checkbox...": "Attributfeld '%s' scheint eine Checkbox zu sein..."
|
||||
"Attribute field '%s' seems to be a text input...": "Attributfeld '%s' scheint ein Texteingabefeld zu sein..."
|
||||
"Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..."
|
||||
|
||||
download_ads:
|
||||
"Scanning your ad overview...": "Scanne Anzeigenübersicht..."
|
||||
@@ -403,6 +405,14 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
web_check:
|
||||
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
|
||||
|
||||
web_select:
|
||||
"Option not found by value or displayed text: %s": "Option nicht gefunden nach Wert oder angezeigtem Text: %s"
|
||||
|
||||
web_select_combobox:
|
||||
"Combobox input field does not have 'aria-controls' attribute.": "Das Eingabefeld der Combobox hat kein 'aria-controls'-Attribut."
|
||||
"Combobox missing aria-controls attribute": "Combobox fehlt aria-controls Attribut"
|
||||
"No matching option found in combobox: '%s'": "Keine passende Option in Combobox gefunden: '%s'"
|
||||
|
||||
close_browser_session:
|
||||
"Closing Browser session...": "Schließe Browser-Sitzung..."
|
||||
|
||||
|
||||
@@ -969,23 +969,110 @@ class WebScrapingMixin:
|
||||
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
|
||||
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
|
||||
)
|
||||
elem = await self.web_find(selector_type, selector_value)
|
||||
await elem.apply(f"""
|
||||
function (element) {{
|
||||
for(let i=0; i < element.options.length; i++)
|
||||
{{
|
||||
if(element.options[i].value == "{selected_value}") {{
|
||||
element.selectedIndex = i;
|
||||
element.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
break;
|
||||
elem = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||
|
||||
js_value = json.dumps(selected_value) # safe escaping for JS
|
||||
try:
|
||||
await elem.apply(f"""
|
||||
function (element) {{
|
||||
const wanted = String({js_value});
|
||||
|
||||
// 1) Try by value
|
||||
for (let i = 0; i < element.options.length; i++) {{
|
||||
if (element.options[i].value === wanted) {{
|
||||
element.selectedIndex = i;
|
||||
element.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
return;
|
||||
}}
|
||||
}}
|
||||
|
||||
// 2) Fallback by displayed text (trimmed)
|
||||
const needle = wanted.trim();
|
||||
for (let i = 0; i < element.options.length; i++) {{
|
||||
const opt = element.options[i];
|
||||
const shown = (opt.label ?? opt.text ?? opt.textContent ?? '').trim();
|
||||
if (shown === needle) {{
|
||||
element.selectedIndex = i;
|
||||
element.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
return;
|
||||
}}
|
||||
}}
|
||||
|
||||
throw new Error("Option not found by value or displayed text: " + wanted);
|
||||
}}
|
||||
}}
|
||||
throw new Error("Option with value {selected_value} not found.");
|
||||
}}
|
||||
""")
|
||||
""")
|
||||
except Exception as ex:
|
||||
# Normalize selection failures to TimeoutError
|
||||
raise TimeoutError(_("Option not found by value or displayed text: %s") % selected_value) from ex
|
||||
await self.web_sleep()
|
||||
return elem
|
||||
|
||||
async def web_select_combobox(self, selector_type:By, selector_value:str, selected_value:str | int, timeout:int | float | None = None) -> Element:
|
||||
"""
|
||||
Selects an option from a text-input combobox by typing the given value to
|
||||
filter the dropdown and clicking the first <li> whose visible text matches.
|
||||
Returns the dropdown <ul> element on success.
|
||||
|
||||
:param timeout: timeout in seconds
|
||||
:raises TimeoutError: when the input or matching dropdown option cannot be located
|
||||
"""
|
||||
if timeout is None:
|
||||
timeout = self._timeout("default")
|
||||
|
||||
input_field = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||
await input_field.clear_input()
|
||||
await input_field.send_keys(str(selected_value))
|
||||
await self.web_sleep()
|
||||
|
||||
# From the Inputfield, get the attribute "aria-controls" which POINTS to the Dropdown ul #id:
|
||||
dropdown_id = input_field.attrs.get("aria-controls")
|
||||
if not dropdown_id:
|
||||
LOG.error(_("Combobox input field does not have 'aria-controls' attribute."))
|
||||
raise TimeoutError(_("Combobox missing aria-controls attribute"))
|
||||
|
||||
dropdown_elem = await self.web_find(By.ID, dropdown_id, timeout = timeout)
|
||||
js_value = json.dumps(selected_value) # safe escaping for JS
|
||||
|
||||
# This selects the correct <li> by visible text inside the dropdown. It includes normalization, i.e. trimming
|
||||
# leading/trailing spaces and collapsing multiple spaces to single spaces for matching. It is done case-insensitive.
|
||||
ok = await dropdown_elem.apply(f"""
|
||||
function (element) {{
|
||||
const selected = String({js_value});
|
||||
const normalize = s => (s ?? '').replace(/\\s+/g, ' ').trim().toLowerCase();
|
||||
// Normalize whitespace and convert to lowercase for comparison
|
||||
|
||||
// Get all <li> elements inside the dropdown
|
||||
const items = element.querySelectorAll(':scope > li[role="option"], :scope > li');
|
||||
|
||||
for (const li of items) {{
|
||||
// The visible label is typically inside the last <span>
|
||||
const labelEl = li.querySelector(':scope > span:last-of-type');
|
||||
const label = normalize(labelEl ? labelEl.textContent : li.textContent);
|
||||
|
||||
// Compare normalized lowercase values
|
||||
if (label === normalize(selected)) {{
|
||||
// Scroll to make sure the element is visible
|
||||
try {{
|
||||
li.scrollIntoView({{block: 'nearest'}});
|
||||
}} catch (e) {{}}
|
||||
|
||||
// Click the matched element
|
||||
li.click();
|
||||
return true;
|
||||
}}
|
||||
}}
|
||||
|
||||
// Return false if no matching item was found
|
||||
return false;
|
||||
}}
|
||||
""")
|
||||
if not ok:
|
||||
LOG.error(_("No matching option found in combobox: '%s'"), selected_value)
|
||||
raise TimeoutError(_("No matching option found in combobox: '%s'") % selected_value)
|
||||
|
||||
await self.web_sleep()
|
||||
return dropdown_elem
|
||||
|
||||
async def _validate_chrome_version_configuration(self) -> None:
|
||||
"""
|
||||
Validate Chrome version configuration for Chrome 136+ security requirements.
|
||||
|
||||
@@ -191,6 +191,119 @@ class TestWebScrapingErrorHandling:
|
||||
await web_scraper.web_input(By.ID, "test-id", "test text")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_combobox_missing_dropdown_options(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test combobox selection when aria-controls attribute is missing."""
|
||||
input_field = AsyncMock(spec = Element)
|
||||
input_field.attrs = {}
|
||||
input_field.clear_input = AsyncMock()
|
||||
input_field.send_keys = AsyncMock()
|
||||
web_scraper.web_find = AsyncMock(return_value = input_field) # type: ignore[method-assign]
|
||||
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
|
||||
|
||||
with pytest.raises(TimeoutError, match = "Combobox missing aria-controls attribute"):
|
||||
await web_scraper.web_select_combobox(By.ID, "combo-id", "Option", timeout = 0.1)
|
||||
|
||||
input_field.clear_input.assert_awaited_once()
|
||||
input_field.send_keys.assert_awaited_once_with("Option")
|
||||
assert web_scraper.web_sleep.await_count == 1 # Only one sleep before checking aria-controls
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_combobox_selects_matching_option(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test combobox selection matches a visible <li> option."""
|
||||
input_field = AsyncMock(spec = Element)
|
||||
input_field.attrs = {"aria-controls": "dropdown-id"}
|
||||
input_field.clear_input = AsyncMock()
|
||||
input_field.send_keys = AsyncMock()
|
||||
|
||||
dropdown_elem = AsyncMock(spec = Element)
|
||||
dropdown_elem.apply = AsyncMock(return_value = True)
|
||||
|
||||
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
|
||||
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
|
||||
|
||||
result = await web_scraper.web_select_combobox(By.ID, "combo-id", "Visible Label")
|
||||
|
||||
assert result is dropdown_elem
|
||||
input_field.clear_input.assert_awaited_once()
|
||||
input_field.send_keys.assert_awaited_once_with("Visible Label")
|
||||
dropdown_elem.apply.assert_awaited_once()
|
||||
assert web_scraper.web_sleep.await_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_combobox_no_matching_option_raises(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test combobox selection raises when no <li> matches the entered text."""
|
||||
input_field = AsyncMock(spec = Element)
|
||||
input_field.attrs = {"aria-controls": "dropdown-id"}
|
||||
input_field.clear_input = AsyncMock()
|
||||
input_field.send_keys = AsyncMock()
|
||||
|
||||
dropdown_elem = AsyncMock(spec = Element)
|
||||
dropdown_elem.apply = AsyncMock(return_value = False)
|
||||
|
||||
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
|
||||
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
|
||||
|
||||
with pytest.raises(TimeoutError, match = "No matching option found in combobox"):
|
||||
await web_scraper.web_select_combobox(By.ID, "combo-id", "Missing Label")
|
||||
|
||||
dropdown_elem.apply.assert_awaited_once()
|
||||
assert web_scraper.web_sleep.await_count == 1 # One sleep after typing, error before second sleep
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_combobox_special_characters(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test combobox selection with special characters (quotes, newlines, etc)."""
|
||||
input_field = AsyncMock(spec = Element)
|
||||
input_field.attrs = {"aria-controls": "dropdown-id"}
|
||||
input_field.clear_input = AsyncMock()
|
||||
input_field.send_keys = AsyncMock()
|
||||
|
||||
dropdown_elem = AsyncMock(spec = Element)
|
||||
dropdown_elem.apply = AsyncMock(return_value = True)
|
||||
|
||||
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
|
||||
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
|
||||
|
||||
# Test with quotes, backslashes, and newlines
|
||||
special_value = 'Value with "quotes" and \\ backslash'
|
||||
result = await web_scraper.web_select_combobox(By.ID, "combo-id", special_value)
|
||||
|
||||
assert result is dropdown_elem
|
||||
input_field.send_keys.assert_awaited_once_with(special_value)
|
||||
# Verify that the JavaScript received properly escaped value
|
||||
call_args = dropdown_elem.apply.call_args[0][0]
|
||||
assert '"quotes"' in call_args or r'\"quotes\"' in call_args # JSON escaping should handle quotes
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_by_value(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test web_select successfully matches by option value."""
|
||||
select_elem = AsyncMock(spec = Element)
|
||||
select_elem.apply = AsyncMock()
|
||||
|
||||
web_scraper.web_check = AsyncMock(return_value = True) # type: ignore[method-assign]
|
||||
web_scraper.web_await = AsyncMock(return_value = True) # type: ignore[method-assign]
|
||||
web_scraper.web_find = AsyncMock(return_value = select_elem) # type: ignore[method-assign]
|
||||
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
|
||||
|
||||
result = await web_scraper.web_select(By.ID, "select-id", "option-value")
|
||||
|
||||
assert result is select_elem
|
||||
select_elem.apply.assert_awaited_once()
|
||||
web_scraper.web_sleep.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_select_raises_on_missing_option(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Test web_select raises TimeoutError when option not found."""
|
||||
select_elem = AsyncMock(spec = Element)
|
||||
# Simulate JS throwing an error when option not found
|
||||
select_elem.apply = AsyncMock(side_effect = Exception("Option not found by value or displayed text: missing"))
|
||||
|
||||
web_scraper.web_check = AsyncMock(return_value = True) # type: ignore[method-assign]
|
||||
web_scraper.web_await = AsyncMock(return_value = True) # type: ignore[method-assign]
|
||||
web_scraper.web_find = AsyncMock(return_value = select_elem) # type: ignore[method-assign]
|
||||
|
||||
with pytest.raises(TimeoutError, match = "Option not found by value or displayed text"):
|
||||
await web_scraper.web_select(By.ID, "select-id", "missing-option")
|
||||
|
||||
async def test_web_input_success_returns_element(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""Successful web_input should send keys, wait, and return the element."""
|
||||
mock_element = AsyncMock(spec = Element)
|
||||
|
||||
Reference in New Issue
Block a user