From 9bcc669c483e06bef17e3965966a75cdcbd6935e Mon Sep 17 00:00:00 2001 From: Benedikt <1311868+benjidea@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:02:09 +0200 Subject: [PATCH] feat: add support for multiple matching shipping options (#483) --- src/kleinanzeigen_bot/extract.py | 43 +++++-- .../resources/config_defaults.yaml | 6 + tests/unit/test_extract.py | 107 +++++++++++++++++- 3 files changed, 145 insertions(+), 11 deletions(-) diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 82e4daa..52b7bf9 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -422,11 +422,6 @@ class AdExtractor(WebScrapingMixin): (await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE")) ["content"])["data"]["shippingOptionsResponse"]["options"] - internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100] - - if not internal_shipping_opt: - return "NOT_APPLICABLE", ship_costs, shipping_options - # map to internal shipping identifiers used by kleinanzeigen-bot shipping_option_mapping = { "DHL_001": "DHL_2", @@ -440,11 +435,41 @@ class AdExtractor(WebScrapingMixin): "HERMES_004": "Hermes_L" } - shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"]) - if not shipping_option: - return "NOT_APPLICABLE", ship_costs, shipping_options + # Convert Euro to cents and round to nearest integer + price_in_cent = round(ship_costs * 100) + + # Get excluded shipping options from config + excluded_options = self.config.get("download", {}).get("excluded_shipping_options", []) + + # If include_all_matching_shipping_options is enabled, get all options for the same package size + if self.config.get("download", {}).get("include_all_matching_shipping_options", False): + # Find all options with the same price to determine the package size + matching_options = [opt for opt in shipping_costs if opt["priceInEuroCent"] == price_in_cent] + if not matching_options: + return "NOT_APPLICABLE", ship_costs, shipping_options + + # Use the package size of the first matching option + matching_size = matching_options[0]["packageSize"] + + # Get all options of the same size + shipping_options = [ + shipping_option_mapping[opt["id"]] + for opt in shipping_costs + if opt["packageSize"] == matching_size + and opt["id"] in shipping_option_mapping + and shipping_option_mapping[opt["id"]] not in excluded_options + ] + else: + # Only use the matching option if it's not excluded + matching_option = next((x for x in shipping_costs if x["priceInEuroCent"] == price_in_cent), None) + if not matching_option: + return "NOT_APPLICABLE", ship_costs, shipping_options + + shipping_option = shipping_option_mapping.get(matching_option["id"]) + if not shipping_option or shipping_option in excluded_options: + return "NOT_APPLICABLE", ship_costs, shipping_options + shipping_options = [shipping_option] - shipping_options = [shipping_option] except TimeoutError: # no pricing box -> no shipping given ship_type = "NOT_APPLICABLE" diff --git a/src/kleinanzeigen_bot/resources/config_defaults.yaml b/src/kleinanzeigen_bot/resources/config_defaults.yaml index 8097543..3455e58 100644 --- a/src/kleinanzeigen_bot/resources/config_defaults.yaml +++ b/src/kleinanzeigen_bot/resources/config_defaults.yaml @@ -26,6 +26,12 @@ ad_defaults: # Jobs > Praktika: 102/125 categories: {} +download: + # if true, all shipping options matching the package size will be included + include_all_matching_shipping_options: false + # list of shipping options to exclude, e.g. ["DHL_2", "DHL_5"] + excluded_shipping_options: [] + publishing: delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index f4b88b2..b382de5 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -107,7 +107,7 @@ class TestAdExtractorShipping: "data": { "shippingOptionsResponse": { "options": [ - {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100)} + {"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100), "packageSize": "SMALL"} ] } } @@ -132,7 +132,7 @@ class TestAdExtractorShipping: "data": { "shippingOptionsResponse": { "options": [ - {"id": "DHL_001", "priceInEuroCent": 549} + {"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"} ] } } @@ -149,6 +149,109 @@ class TestAdExtractorShipping: assert costs == 5.49 assert options == ["DHL_2"] + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_shipping_info_with_all_matching_options(self, test_extractor: AdExtractor) -> None: + """Test shipping info extraction with all matching options enabled.""" + shipping_response = { + "content": json.dumps({ + "data": { + "shippingOptionsResponse": { + "options": [ + {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, + {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"}, + {"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"} + ] + } + } + }) + } + + # Enable all matching options in config + test_extractor.config["download"] = {"include_all_matching_shipping_options": True} + + with patch.object(test_extractor, "page", MagicMock()), \ + patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ + patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): + + shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() + + assert shipping_type == "SHIPPING" + assert costs == 4.89 + if options is not None: + assert sorted(options) == ["DHL_2", "Hermes_Päckchen", "Hermes_S"] + else: + assert options is None + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_shipping_info_with_excluded_options(self, test_extractor: AdExtractor) -> None: + """Test shipping info extraction with excluded options.""" + shipping_response = { + "content": json.dumps({ + "data": { + "shippingOptionsResponse": { + "options": [ + {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, + {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"}, + {"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"} + ] + } + } + }) + } + + # Enable all matching options and exclude DHL in config + test_extractor.config["download"] = { + "include_all_matching_shipping_options": True, + "excluded_shipping_options": ["DHL_2"] + } + + with patch.object(test_extractor, "page", MagicMock()), \ + patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ + patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): + + shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() + + assert shipping_type == "SHIPPING" + assert costs == 4.89 + if options is not None: + assert sorted(options) == ["Hermes_Päckchen", "Hermes_S"] + else: + assert options is None + + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor: AdExtractor) -> None: + """Test shipping info extraction when the matching option is excluded.""" + shipping_response = { + "content": json.dumps({ + "data": { + "shippingOptionsResponse": { + "options": [ + {"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"}, + {"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"} + ] + } + } + }) + } + + # Exclude the matching option + test_extractor.config["download"] = { + "excluded_shipping_options": ["Hermes_Päckchen"] + } + + with patch.object(test_extractor, "page", MagicMock()), \ + patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ + patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response): + + shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() + + assert shipping_type == "NOT_APPLICABLE" + assert costs == 4.89 + assert options is None + class TestAdExtractorNavigation: """Tests for navigation related functionality."""