feat: add support for multiple matching shipping options (#483)

This commit is contained in:
Benedikt
2025-04-29 21:02:09 +02:00
committed by GitHub
parent 3e8072973a
commit 9bcc669c48
3 changed files with 145 additions and 11 deletions

View File

@@ -422,11 +422,6 @@ class AdExtractor(WebScrapingMixin):
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE")) (await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))
["content"])["data"]["shippingOptionsResponse"]["options"] ["content"])["data"]["shippingOptionsResponse"]["options"]
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
if not internal_shipping_opt:
return "NOT_APPLICABLE", ship_costs, shipping_options
# map to internal shipping identifiers used by kleinanzeigen-bot # map to internal shipping identifiers used by kleinanzeigen-bot
shipping_option_mapping = { shipping_option_mapping = {
"DHL_001": "DHL_2", "DHL_001": "DHL_2",
@@ -440,11 +435,41 @@ class AdExtractor(WebScrapingMixin):
"HERMES_004": "Hermes_L" "HERMES_004": "Hermes_L"
} }
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"]) # Convert Euro to cents and round to nearest integer
if not shipping_option: price_in_cent = round(ship_costs * 100)
# Get excluded shipping options from config
excluded_options = self.config.get("download", {}).get("excluded_shipping_options", [])
# If include_all_matching_shipping_options is enabled, get all options for the same package size
if self.config.get("download", {}).get("include_all_matching_shipping_options", False):
# Find all options with the same price to determine the package size
matching_options = [opt for opt in shipping_costs if opt["priceInEuroCent"] == price_in_cent]
if not matching_options:
return "NOT_APPLICABLE", ship_costs, shipping_options return "NOT_APPLICABLE", ship_costs, shipping_options
# Use the package size of the first matching option
matching_size = matching_options[0]["packageSize"]
# Get all options of the same size
shipping_options = [
shipping_option_mapping[opt["id"]]
for opt in shipping_costs
if opt["packageSize"] == matching_size
and opt["id"] in shipping_option_mapping
and shipping_option_mapping[opt["id"]] not in excluded_options
]
else:
# Only use the matching option if it's not excluded
matching_option = next((x for x in shipping_costs if x["priceInEuroCent"] == price_in_cent), None)
if not matching_option:
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_option = shipping_option_mapping.get(matching_option["id"])
if not shipping_option or shipping_option in excluded_options:
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_options = [shipping_option] shipping_options = [shipping_option]
except TimeoutError: # no pricing box -> no shipping given except TimeoutError: # no pricing box -> no shipping given
ship_type = "NOT_APPLICABLE" ship_type = "NOT_APPLICABLE"

View File

@@ -26,6 +26,12 @@ ad_defaults:
# Jobs > Praktika: 102/125 # Jobs > Praktika: 102/125
categories: {} categories: {}
download:
# if true, all shipping options matching the package size will be included
include_all_matching_shipping_options: false
# list of shipping options to exclude, e.g. ["DHL_2", "DHL_5"]
excluded_shipping_options: []
publishing: publishing:
delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER
delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH

View File

@@ -107,7 +107,7 @@ class TestAdExtractorShipping:
"data": { "data": {
"shippingOptionsResponse": { "shippingOptionsResponse": {
"options": [ "options": [
{"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100)} {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100), "packageSize": "SMALL"}
] ]
} }
} }
@@ -132,7 +132,7 @@ class TestAdExtractorShipping:
"data": { "data": {
"shippingOptionsResponse": { "shippingOptionsResponse": {
"options": [ "options": [
{"id": "DHL_001", "priceInEuroCent": 549} {"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"}
] ]
} }
} }
@@ -149,6 +149,109 @@ class TestAdExtractorShipping:
assert costs == 5.49 assert costs == 5.49
assert options == ["DHL_2"] assert options == ["DHL_2"]
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_all_matching_options(self, test_extractor: AdExtractor) -> None:
"""Test shipping info extraction with all matching options enabled."""
shipping_response = {
"content": json.dumps({
"data": {
"shippingOptionsResponse": {
"options": [
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"},
{"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"}
]
}
}
})
}
# Enable all matching options in config
test_extractor.config["download"] = {"include_all_matching_shipping_options": True}
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
assert shipping_type == "SHIPPING"
assert costs == 4.89
if options is not None:
assert sorted(options) == ["DHL_2", "Hermes_Päckchen", "Hermes_S"]
else:
assert options is None
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_excluded_options(self, test_extractor: AdExtractor) -> None:
"""Test shipping info extraction with excluded options."""
shipping_response = {
"content": json.dumps({
"data": {
"shippingOptionsResponse": {
"options": [
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"},
{"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"}
]
}
}
})
}
# Enable all matching options and exclude DHL in config
test_extractor.config["download"] = {
"include_all_matching_shipping_options": True,
"excluded_shipping_options": ["DHL_2"]
}
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
assert shipping_type == "SHIPPING"
assert costs == 4.89
if options is not None:
assert sorted(options) == ["Hermes_Päckchen", "Hermes_S"]
else:
assert options is None
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor: AdExtractor) -> None:
"""Test shipping info extraction when the matching option is excluded."""
shipping_response = {
"content": json.dumps({
"data": {
"shippingOptionsResponse": {
"options": [
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"}
]
}
}
})
}
# Exclude the matching option
test_extractor.config["download"] = {
"excluded_shipping_options": ["Hermes_Päckchen"]
}
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
assert shipping_type == "NOT_APPLICABLE"
assert costs == 4.89
assert options is None
class TestAdExtractorNavigation: class TestAdExtractorNavigation:
"""Tests for navigation related functionality.""" """Tests for navigation related functionality."""