feat: Improved WebSelect Handling: Added Combobox Support, Enhanced Element Detection, and Smarter Option Matching (#679)

## ℹ️ Description

Added Webselect-Function for Input/Dropdown Combobox
PR for issue/missing feature #677

# Fixes / Enhancements

Finding Special Attributes Elements can fail because they are currently
only selected using the name="..." attributes of the HTML elements. If
it fails, ALSO fallback-handle selecting special attribute HTML elements
by ID instead / additionally. (For example the "brands" Input/Combobox
for Mens Shoes...

When trying to select a Value in a <select>, it does not only rely on
the actual Option value (xxx in the example <options
value="xxx">yyy</...>) but instead also on the displayed HTML value
(i.e. yyy in above example). This improves UX because the User doesnt
have to check the actual "value" of the Option but instead can check the
displayed Value from the Browsers Display directly.


Testcases for Webselect_Combobox were not added due to missing knowledge
about Async Mocking properly.


## 📋 Changes Summary

 Fixes & Enhancements
- New WebSelect Functionality
- Improved Element Detection for Special Attributes
- Enhanced <select> Option Matching Logic

This improves UX and test robustness — users no longer need to know the
exact underlying value, as matching also works with the visible label
shown in the browser.

🧩 Result

These updates make dropdown and combobox interactions more intuitive,
resilient, and user-friendly across diverse HTML structures.


### ⚙️ Type of Change
Select the type(s) of change(s) included in this pull request:
- [x] 🐞 Bug fix (non-breaking change which fixes an issue)
- [x]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)


##  Checklist
Before requesting a review, confirm the following:
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [ ] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Bug Fixes**
* Field lookup now falls back to locating by ID when name lookup times
out.
* Option selection uses a two-pass match (value then displayed text);
JS-path failures now surface as timeouts.
  * Error and log messages localized and clarified.

* **New Features**
* Support for combobox-style inputs: type into the input, open dropdown,
and select by visible text (handles special characters).

* **Tests**
* Added tests for combobox selection, missing dropdowns, no-match
errors, value-path selection, and special-character handling.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Jens <1742418+1cu@users.noreply.github.com>
Co-authored-by: Claude <claude@anthropic.com>
This commit is contained in:
Bjoern147
2025-12-05 21:03:31 +01:00
committed by GitHub
parent 220c01f257
commit 5f68c09899
4 changed files with 239 additions and 22 deletions

View File

@@ -1068,24 +1068,31 @@ class KleinanzeigenBot(WebScrapingMixin):
try: try:
# finding element by name cause id are composed sometimes eg. autos.marke_s+autos.model_s for Modell by cars # finding element by name cause id are composed sometimes eg. autos.marke_s+autos.model_s for Modell by cars
special_attr_elem = await self.web_find(By.XPATH, f"//*[contains(@name, '{special_attribute_key}')]") special_attr_elem = await self.web_find(By.XPATH, f"//*[contains(@name, '{special_attribute_key}')]")
except TimeoutError:
# Trying to find element by ID instead cause sometimes there is NO name attribute...
try:
special_attr_elem = await self.web_find(By.ID, special_attribute_key)
except TimeoutError as ex: except TimeoutError as ex:
LOG.debug("Attribute field '%s' could not be found.", special_attribute_key) LOG.debug(_("Attribute field '%s' could not be found."), special_attribute_key)
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}] (not found)") from ex raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
try: try:
elem_id:str = str(special_attr_elem.attrs.id) elem_id:str = str(special_attr_elem.attrs.id)
if special_attr_elem.local_name == "select": if special_attr_elem.local_name == "select":
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key) LOG.debug(_("Attribute field '%s' seems to be a select..."), special_attribute_key)
await self.web_select(By.ID, elem_id, special_attribute_value_str) await self.web_select(By.ID, elem_id, special_attribute_value_str)
elif special_attr_elem.attrs.type == "checkbox": elif special_attr_elem.attrs.type == "checkbox":
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key) LOG.debug(_("Attribute field '%s' seems to be a checkbox..."), special_attribute_key)
await self.web_click(By.ID, elem_id) await self.web_click(By.ID, elem_id)
elif special_attr_elem.attrs.type == "text" and special_attr_elem.attrs.get("role") == "combobox":
LOG.debug(_("Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)..."), special_attribute_key)
await self.web_select_combobox(By.ID, elem_id, special_attribute_value_str)
else: else:
LOG.debug("Attribute field '%s' seems to be a text input...", special_attribute_key) LOG.debug(_("Attribute field '%s' seems to be a text input..."), special_attribute_key)
await self.web_input(By.ID, elem_id, special_attribute_value_str) await self.web_input(By.ID, elem_id, special_attribute_value_str)
except TimeoutError as ex: except TimeoutError as ex:
LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key) LOG.debug(_("Failed to set attribute field '%s' via known input types."), special_attribute_key)
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value_str) LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value_str)
async def __set_shipping(self, ad_cfg:Ad, mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None: async def __set_shipping(self, ad_cfg:Ad, mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None:

View File

@@ -121,10 +121,12 @@ kleinanzeigen_bot/__init__.py:
"Setting special attribute [%s] to [%s]...": "Setze spezielles Attribut [%s] auf [%s]..." "Setting special attribute [%s] to [%s]...": "Setze spezielles Attribut [%s] auf [%s]..."
"Successfully set attribute field [%s] to [%s]...": "Attributfeld [%s] erfolgreich auf [%s] gesetzt..." "Successfully set attribute field [%s] to [%s]...": "Attributfeld [%s] erfolgreich auf [%s] gesetzt..."
"Attribute field '%s' could not be found.": "Attributfeld '%s' konnte nicht gefunden werden." "Attribute field '%s' could not be found.": "Attributfeld '%s' konnte nicht gefunden werden."
"Failed to set attribute '%s'": "Fehler beim Setzen des Attributs '%s'"
"Attribute field '%s' seems to be a select...": "Attributfeld '%s' scheint ein Auswahlfeld zu sein..." "Attribute field '%s' seems to be a select...": "Attributfeld '%s' scheint ein Auswahlfeld zu sein..."
"Attribute field '%s' is not of kind radio button.": "Attributfeld '%s' ist kein Radiobutton." "Failed to set attribute field '%s' via known input types.": "Fehler beim Setzen des Attributfelds '%s' über bekannte Eingabetypen."
"Attribute field '%s' seems to be a checkbox...": "Attributfeld '%s' scheint eine Checkbox zu sein..." "Attribute field '%s' seems to be a checkbox...": "Attributfeld '%s' scheint eine Checkbox zu sein..."
"Attribute field '%s' seems to be a text input...": "Attributfeld '%s' scheint ein Texteingabefeld zu sein..." "Attribute field '%s' seems to be a text input...": "Attributfeld '%s' scheint ein Texteingabefeld zu sein..."
"Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..."
download_ads: download_ads:
"Scanning your ad overview...": "Scanne Anzeigenübersicht..." "Scanning your ad overview...": "Scanne Anzeigenübersicht..."
@@ -403,6 +405,14 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
web_check: web_check:
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s" "Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
web_select:
"Option not found by value or displayed text: %s": "Option nicht gefunden nach Wert oder angezeigtem Text: %s"
web_select_combobox:
"Combobox input field does not have 'aria-controls' attribute.": "Das Eingabefeld der Combobox hat kein 'aria-controls'-Attribut."
"Combobox missing aria-controls attribute": "Combobox fehlt aria-controls Attribut"
"No matching option found in combobox: '%s'": "Keine passende Option in Combobox gefunden: '%s'"
close_browser_session: close_browser_session:
"Closing Browser session...": "Schließe Browser-Sitzung..." "Closing Browser session...": "Schließe Browser-Sitzung..."

View File

@@ -969,23 +969,110 @@ class WebScrapingMixin:
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout, lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found" timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
) )
elem = await self.web_find(selector_type, selector_value) elem = await self.web_find(selector_type, selector_value, timeout = timeout)
js_value = json.dumps(selected_value) # safe escaping for JS
try:
await elem.apply(f""" await elem.apply(f"""
function (element) {{ function (element) {{
for(let i=0; i < element.options.length; i++) const wanted = String({js_value});
{{
if(element.options[i].value == "{selected_value}") {{ // 1) Try by value
for (let i = 0; i < element.options.length; i++) {{
if (element.options[i].value === wanted) {{
element.selectedIndex = i; element.selectedIndex = i;
element.dispatchEvent(new Event('change', {{ bubbles: true }})); element.dispatchEvent(new Event('change', {{ bubbles: true }}));
break; return;
}} }}
}} }}
throw new Error("Option with value {selected_value} not found.");
// 2) Fallback by displayed text (trimmed)
const needle = wanted.trim();
for (let i = 0; i < element.options.length; i++) {{
const opt = element.options[i];
const shown = (opt.label ?? opt.text ?? opt.textContent ?? '').trim();
if (shown === needle) {{
element.selectedIndex = i;
element.dispatchEvent(new Event('change', {{ bubbles: true }}));
return;
}}
}}
throw new Error("Option not found by value or displayed text: " + wanted);
}} }}
""") """)
except Exception as ex:
# Normalize selection failures to TimeoutError
raise TimeoutError(_("Option not found by value or displayed text: %s") % selected_value) from ex
await self.web_sleep() await self.web_sleep()
return elem return elem
async def web_select_combobox(self, selector_type:By, selector_value:str, selected_value:str | int, timeout:int | float | None = None) -> Element:
"""
Selects an option from a text-input combobox by typing the given value to
filter the dropdown and clicking the first <li> whose visible text matches.
Returns the dropdown <ul> element on success.
:param timeout: timeout in seconds
:raises TimeoutError: when the input or matching dropdown option cannot be located
"""
if timeout is None:
timeout = self._timeout("default")
input_field = await self.web_find(selector_type, selector_value, timeout = timeout)
await input_field.clear_input()
await input_field.send_keys(str(selected_value))
await self.web_sleep()
# From the Inputfield, get the attribute "aria-controls" which POINTS to the Dropdown ul #id:
dropdown_id = input_field.attrs.get("aria-controls")
if not dropdown_id:
LOG.error(_("Combobox input field does not have 'aria-controls' attribute."))
raise TimeoutError(_("Combobox missing aria-controls attribute"))
dropdown_elem = await self.web_find(By.ID, dropdown_id, timeout = timeout)
js_value = json.dumps(selected_value) # safe escaping for JS
# This selects the correct <li> by visible text inside the dropdown. It includes normalization, i.e. trimming
# leading/trailing spaces and collapsing multiple spaces to single spaces for matching. It is done case-insensitive.
ok = await dropdown_elem.apply(f"""
function (element) {{
const selected = String({js_value});
const normalize = s => (s ?? '').replace(/\\s+/g, ' ').trim().toLowerCase();
// Normalize whitespace and convert to lowercase for comparison
// Get all <li> elements inside the dropdown
const items = element.querySelectorAll(':scope > li[role="option"], :scope > li');
for (const li of items) {{
// The visible label is typically inside the last <span>
const labelEl = li.querySelector(':scope > span:last-of-type');
const label = normalize(labelEl ? labelEl.textContent : li.textContent);
// Compare normalized lowercase values
if (label === normalize(selected)) {{
// Scroll to make sure the element is visible
try {{
li.scrollIntoView({{block: 'nearest'}});
}} catch (e) {{}}
// Click the matched element
li.click();
return true;
}}
}}
// Return false if no matching item was found
return false;
}}
""")
if not ok:
LOG.error(_("No matching option found in combobox: '%s'"), selected_value)
raise TimeoutError(_("No matching option found in combobox: '%s'") % selected_value)
await self.web_sleep()
return dropdown_elem
async def _validate_chrome_version_configuration(self) -> None: async def _validate_chrome_version_configuration(self) -> None:
""" """
Validate Chrome version configuration for Chrome 136+ security requirements. Validate Chrome version configuration for Chrome 136+ security requirements.

View File

@@ -191,6 +191,119 @@ class TestWebScrapingErrorHandling:
await web_scraper.web_input(By.ID, "test-id", "test text") await web_scraper.web_input(By.ID, "test-id", "test text")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_web_select_combobox_missing_dropdown_options(self, web_scraper:WebScrapingMixin) -> None:
"""Test combobox selection when aria-controls attribute is missing."""
input_field = AsyncMock(spec = Element)
input_field.attrs = {}
input_field.clear_input = AsyncMock()
input_field.send_keys = AsyncMock()
web_scraper.web_find = AsyncMock(return_value = input_field) # type: ignore[method-assign]
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
with pytest.raises(TimeoutError, match = "Combobox missing aria-controls attribute"):
await web_scraper.web_select_combobox(By.ID, "combo-id", "Option", timeout = 0.1)
input_field.clear_input.assert_awaited_once()
input_field.send_keys.assert_awaited_once_with("Option")
assert web_scraper.web_sleep.await_count == 1 # Only one sleep before checking aria-controls
@pytest.mark.asyncio
async def test_web_select_combobox_selects_matching_option(self, web_scraper:WebScrapingMixin) -> None:
"""Test combobox selection matches a visible <li> option."""
input_field = AsyncMock(spec = Element)
input_field.attrs = {"aria-controls": "dropdown-id"}
input_field.clear_input = AsyncMock()
input_field.send_keys = AsyncMock()
dropdown_elem = AsyncMock(spec = Element)
dropdown_elem.apply = AsyncMock(return_value = True)
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
result = await web_scraper.web_select_combobox(By.ID, "combo-id", "Visible Label")
assert result is dropdown_elem
input_field.clear_input.assert_awaited_once()
input_field.send_keys.assert_awaited_once_with("Visible Label")
dropdown_elem.apply.assert_awaited_once()
assert web_scraper.web_sleep.await_count == 2
@pytest.mark.asyncio
async def test_web_select_combobox_no_matching_option_raises(self, web_scraper:WebScrapingMixin) -> None:
"""Test combobox selection raises when no <li> matches the entered text."""
input_field = AsyncMock(spec = Element)
input_field.attrs = {"aria-controls": "dropdown-id"}
input_field.clear_input = AsyncMock()
input_field.send_keys = AsyncMock()
dropdown_elem = AsyncMock(spec = Element)
dropdown_elem.apply = AsyncMock(return_value = False)
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
with pytest.raises(TimeoutError, match = "No matching option found in combobox"):
await web_scraper.web_select_combobox(By.ID, "combo-id", "Missing Label")
dropdown_elem.apply.assert_awaited_once()
assert web_scraper.web_sleep.await_count == 1 # One sleep after typing, error before second sleep
@pytest.mark.asyncio
async def test_web_select_combobox_special_characters(self, web_scraper:WebScrapingMixin) -> None:
"""Test combobox selection with special characters (quotes, newlines, etc)."""
input_field = AsyncMock(spec = Element)
input_field.attrs = {"aria-controls": "dropdown-id"}
input_field.clear_input = AsyncMock()
input_field.send_keys = AsyncMock()
dropdown_elem = AsyncMock(spec = Element)
dropdown_elem.apply = AsyncMock(return_value = True)
web_scraper.web_find = AsyncMock(side_effect = [input_field, dropdown_elem]) # type: ignore[method-assign]
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
# Test with quotes, backslashes, and newlines
special_value = 'Value with "quotes" and \\ backslash'
result = await web_scraper.web_select_combobox(By.ID, "combo-id", special_value)
assert result is dropdown_elem
input_field.send_keys.assert_awaited_once_with(special_value)
# Verify that the JavaScript received properly escaped value
call_args = dropdown_elem.apply.call_args[0][0]
assert '"quotes"' in call_args or r'\"quotes\"' in call_args # JSON escaping should handle quotes
@pytest.mark.asyncio
async def test_web_select_by_value(self, web_scraper:WebScrapingMixin) -> None:
"""Test web_select successfully matches by option value."""
select_elem = AsyncMock(spec = Element)
select_elem.apply = AsyncMock()
web_scraper.web_check = AsyncMock(return_value = True) # type: ignore[method-assign]
web_scraper.web_await = AsyncMock(return_value = True) # type: ignore[method-assign]
web_scraper.web_find = AsyncMock(return_value = select_elem) # type: ignore[method-assign]
web_scraper.web_sleep = AsyncMock() # type: ignore[method-assign]
result = await web_scraper.web_select(By.ID, "select-id", "option-value")
assert result is select_elem
select_elem.apply.assert_awaited_once()
web_scraper.web_sleep.assert_awaited_once()
@pytest.mark.asyncio
async def test_web_select_raises_on_missing_option(self, web_scraper:WebScrapingMixin) -> None:
"""Test web_select raises TimeoutError when option not found."""
select_elem = AsyncMock(spec = Element)
# Simulate JS throwing an error when option not found
select_elem.apply = AsyncMock(side_effect = Exception("Option not found by value or displayed text: missing"))
web_scraper.web_check = AsyncMock(return_value = True) # type: ignore[method-assign]
web_scraper.web_await = AsyncMock(return_value = True) # type: ignore[method-assign]
web_scraper.web_find = AsyncMock(return_value = select_elem) # type: ignore[method-assign]
with pytest.raises(TimeoutError, match = "Option not found by value or displayed text"):
await web_scraper.web_select(By.ID, "select-id", "missing-option")
async def test_web_input_success_returns_element(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None: async def test_web_input_success_returns_element(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
"""Successful web_input should send keys, wait, and return the element.""" """Successful web_input should send keys, wait, and return the element."""
mock_element = AsyncMock(spec = Element) mock_element = AsyncMock(spec = Element)