mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: correct sell_directly extraction using JSON API (#765)
This commit is contained in:
@@ -608,14 +608,36 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
|
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
|
||||||
"""
|
"""
|
||||||
Extracts the sell directly option from an ad page.
|
Extracts the sell directly option from an ad page using the JSON API.
|
||||||
|
|
||||||
:return: a boolean indicating whether the sell directly option is active (optional)
|
:return: bool | None - True if buyNowEligible, False if not eligible, None if unknown
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
|
# Extract current ad ID from the page URL first
|
||||||
return buy_now_is_active
|
current_ad_id = self.extract_ad_id_from_ad_url(self.page.url)
|
||||||
except TimeoutError:
|
if current_ad_id == -1:
|
||||||
|
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Fetch the management JSON data using web_request
|
||||||
|
response = await self.web_request("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
json_data = json.loads(response["content"])
|
||||||
|
|
||||||
|
# Find the current ad in the ads list
|
||||||
|
if isinstance(json_data, dict) and "ads" in json_data:
|
||||||
|
ads_list = json_data["ads"]
|
||||||
|
if isinstance(ads_list, list):
|
||||||
|
# Filter ads to find the current ad by ID
|
||||||
|
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
|
||||||
|
if current_ad and "buyNowEligible" in current_ad:
|
||||||
|
buy_now_eligible = current_ad["buyNowEligible"]
|
||||||
|
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
|
||||||
|
|
||||||
|
# If the key doesn't exist or ad not found, return None (unknown)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e:
|
||||||
|
LOG.debug("Could not determine sell_directly status: %s", e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _extract_contact_from_ad_page(self) -> ContactPartial:
|
async def _extract_contact_from_ad_page(self) -> ContactPartial:
|
||||||
|
|||||||
@@ -257,6 +257,9 @@ kleinanzeigen_bot/extract.py:
|
|||||||
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)": "Ältere Breadcrumb-Selektoren nicht innerhalb von %.1f Sekunden gefunden (gesammelte IDs: %s)"
|
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)": "Ältere Breadcrumb-Selektoren nicht innerhalb von %.1f Sekunden gefunden (gesammelte IDs: %s)"
|
||||||
"Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.": "Ältere Breadcrumb-Selektoren konnten nicht innerhalb von %(seconds).1f Sekunden gefunden werden."
|
"Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.": "Ältere Breadcrumb-Selektoren konnten nicht innerhalb von %(seconds).1f Sekunden gefunden werden."
|
||||||
|
|
||||||
|
_extract_sell_directly_from_ad_page:
|
||||||
|
"Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
|
||||||
|
|
||||||
#################################################
|
#################################################
|
||||||
kleinanzeigen_bot/utils/i18n.py:
|
kleinanzeigen_bot/utils/i18n.py:
|
||||||
#################################################
|
#################################################
|
||||||
|
|||||||
@@ -718,20 +718,191 @@ class TestAdExtractorContent:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
|
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of sell directly option."""
|
"""Test extraction of sell directly option."""
|
||||||
test_cases = [
|
# Mock the page URL to extract the ad ID
|
||||||
("Direkt kaufen", True),
|
test_extractor.page = MagicMock()
|
||||||
("Other text", False),
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||||
]
|
|
||||||
|
|
||||||
for text, expected in test_cases:
|
# Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
|
||||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
|
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
|
||||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
assert result is expected
|
|
||||||
|
|
||||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
|
||||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was NOT called when URL is invalid
|
||||||
|
mock_web_request.assert_not_awaited()
|
||||||
|
|
||||||
|
# Reset to valid URL for subsequent tests
|
||||||
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||||
|
|
||||||
|
# Test successful extraction with buyNowEligible = true
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 123456789, "buyNowEligible": True},
|
||||||
|
{"id": 987654321, "buyNowEligible": False}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test successful extraction with buyNowEligible = false
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 123456789, "buyNowEligible": False},
|
||||||
|
{"id": 987654321, "buyNowEligible": True}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when buyNowEligible is missing from the current ad
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 123456789}, # No buyNowEligible field
|
||||||
|
{"id": 987654321, "buyNowEligible": True}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when current ad is not found in the ads list
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 987654321, "buyNowEligible": True}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test timeout error
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test JSON decode error
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": "invalid json"
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when ads list is empty
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({"ads": []})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when buyNowEligible is a non-boolean value (string "true")
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 123456789, "buyNowEligible": "true"},
|
||||||
|
{"id": 987654321, "buyNowEligible": False}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when buyNowEligible is a non-boolean value (integer 1)
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({
|
||||||
|
"ads": [
|
||||||
|
{"id": 123456789, "buyNowEligible": 1},
|
||||||
|
{"id": 987654321, "buyNowEligible": False}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when json_data is not a dict (covers line 622)
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps(["not", "a", "dict"])
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({"other_key": "value"})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
# Test when ads_list is not a list (covers line 624)
|
||||||
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
mock_web_request.return_value = {
|
||||||
|
"content": json.dumps({"ads": "not a list"})
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Verify web_request was called with the correct URL
|
||||||
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||||
|
|
||||||
|
|
||||||
class TestAdExtractorCategory:
|
class TestAdExtractorCategory:
|
||||||
"""Tests for category extraction functionality."""
|
"""Tests for category extraction functionality."""
|
||||||
|
|||||||
Reference in New Issue
Block a user