fix: correct sell_directly extraction using JSON API (#765)

This commit is contained in:
Jens
2026-01-17 16:34:31 +01:00
committed by GitHub
parent 12dc3d2e13
commit 183c01078e
3 changed files with 211 additions and 15 deletions

View File

@@ -608,14 +608,36 @@ class AdExtractor(WebScrapingMixin):
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
"""
Extracts the sell directly option from an ad page.
Extracts the sell directly option from an ad page using the JSON API.
:return: a boolean indicating whether the sell directly option is active (optional)
:return: bool | None - True if buyNowEligible, False if not eligible, None if unknown
"""
try:
buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
return buy_now_is_active
except TimeoutError:
# Extract current ad ID from the page URL first
current_ad_id = self.extract_ad_id_from_ad_url(self.page.url)
if current_ad_id == -1:
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
return None
# Fetch the management JSON data using web_request
response = await self.web_request("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
json_data = json.loads(response["content"])
# Find the current ad in the ads list
if isinstance(json_data, dict) and "ads" in json_data:
ads_list = json_data["ads"]
if isinstance(ads_list, list):
# Filter ads to find the current ad by ID
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
if current_ad and "buyNowEligible" in current_ad:
buy_now_eligible = current_ad["buyNowEligible"]
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
# If the key doesn't exist or ad not found, return None (unknown)
return None
except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e:
LOG.debug("Could not determine sell_directly status: %s", e)
return None
async def _extract_contact_from_ad_page(self) -> ContactPartial:

View File

@@ -257,6 +257,9 @@ kleinanzeigen_bot/extract.py:
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)": "Ältere Breadcrumb-Selektoren nicht innerhalb von %.1f Sekunden gefunden (gesammelte IDs: %s)"
"Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.": "Ältere Breadcrumb-Selektoren konnten nicht innerhalb von %(seconds).1f Sekunden gefunden werden."
_extract_sell_directly_from_ad_page:
"Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
#################################################
kleinanzeigen_bot/utils/i18n.py:
#################################################

View File

@@ -718,20 +718,191 @@ class TestAdExtractorContent:
@pytest.mark.asyncio
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
"""Test extraction of sell directly option."""
test_cases = [
("Direkt kaufen", True),
("Other text", False),
]
# Mock the page URL to extract the ad ID
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
for text, expected in test_cases:
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is expected
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
# Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was NOT called when URL is invalid
mock_web_request.assert_not_awaited()
# Reset to valid URL for subsequent tests
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
# Test successful extraction with buyNowEligible = true
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 123456789, "buyNowEligible": True},
{"id": 987654321, "buyNowEligible": False}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test successful extraction with buyNowEligible = false
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 123456789, "buyNowEligible": False},
{"id": 987654321, "buyNowEligible": True}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is False
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when buyNowEligible is missing from the current ad
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 123456789}, # No buyNowEligible field
{"id": 987654321, "buyNowEligible": True}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when current ad is not found in the ads list
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 987654321, "buyNowEligible": True}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test timeout error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test JSON decode error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": "invalid json"
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when ads list is empty
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": []})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when buyNowEligible is a non-boolean value (string "true")
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 123456789, "buyNowEligible": "true"},
{"id": 987654321, "buyNowEligible": False}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when buyNowEligible is a non-boolean value (integer 1)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({
"ads": [
{"id": 123456789, "buyNowEligible": 1},
{"id": 987654321, "buyNowEligible": False}
]
})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when json_data is not a dict (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps(["not", "a", "dict"])
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"other_key": "value"})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Test when ads_list is not a list (covers line 624)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": "not a list"})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
class TestAdExtractorCategory:
"""Tests for category extraction functionality."""