diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index a3cab77..de32e90 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -855,8 +855,8 @@ class KleinanzeigenBot(WebScrapingMixin): "new_with_tag": "Neu mit Etikett", "new": "Neu", "like_new": "Sehr Gut", - "alright": "Gut", - "ok": "In Ordnung", + "ok": "Gut", + "alright": "In Ordnung", "defect": "Defekt", } mapped_condition = condition_mapping.get(condition_value) diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 71d8f1b..80669d1 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -277,7 +277,19 @@ class AdExtractor(WebScrapingMixin): title:str = await self.web_text(By.ID, "viewad-title") LOG.info('Extracting information from ad with title "%s"', title) + # contains info about ad in different dimensions in form of a key:value dict + # dimension108 contains special attributes + # dimension92 contains fake category, which becomes an special attribute on ad page + belen_conf = await self.web_execute("window.BelenConf") + info["category"] = await self._extract_category_from_ad_page() + + # append subcategory and change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer" + # take subcategory from dimension92 as key 'art_s' sometimes is a special attribute (e.g. gender for clothes) + # the subcategory isn't really necessary, but when set, the appropriate special attribute gets preselected + if belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension92"]: + info["category"] += f"/{belen_conf['universalAnalyticsOpts']['dimensions']['dimension92']}" + info["title"] = title # Get raw description text @@ -296,11 +308,8 @@ class AdExtractor(WebScrapingMixin): info["description"] = description_text.strip() - info["special_attributes"] = await self._extract_special_attributes_from_ad_page() - if "art_s" in info["special_attributes"]: - # change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer" - info["category"] = f"{info['category']}/{info['special_attributes']['art_s']}" - del info["special_attributes"]["art_s"] + info["special_attributes"] = await self._extract_special_attributes_from_ad_page(belen_conf) + if "schaden_s" in info["special_attributes"]: # change f to 'nein' and 't' to 'ja' info["special_attributes"]["schaden_s"] = info["special_attributes"]["schaden_s"].translate(str.maketrans({"t": "ja", "f": "nein"})) @@ -347,18 +356,16 @@ class AdExtractor(WebScrapingMixin): return category - async def _extract_special_attributes_from_ad_page(self) -> dict[str, Any]: + async def _extract_special_attributes_from_ad_page(self, belen_conf:dict[str, Any]) -> dict[str, Any]: """ Extracts the special attributes from an ad page. If no items are available then special_attributes is empty :return: a dictionary (possibly empty) where the keys are the attribute names, mapped to their values """ - belen_conf = await self.web_execute("window.BelenConf") # e.g. "art_s:lautsprecher_kopfhoerer|condition_s:like_new|versand_s:t" special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"] - special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item) special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith(".versand_s") and k != "versand_s"} return special_attributes diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index fee3976..330b422 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -433,6 +433,14 @@ class TestAdExtractorContent: raw_description, # Raw description (without affixes) "03.02.2025" # Creation date ]), + web_execute=AsyncMock(return_value={ + "universalAnalyticsOpts": { + "dimensions": { + "dimension92": "", + "dimension108": "" + } + } + }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), @@ -461,6 +469,14 @@ class TestAdExtractorContent: TimeoutError("Timeout"), # Description times out "03.02.2025" # Date succeeds ]), + web_execute=AsyncMock(return_value={ + "universalAnalyticsOpts": { + "dimensions": { + "dimension92": "", + "dimension108": "" + } + } + }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), @@ -494,6 +510,14 @@ class TestAdExtractorContent: raw_description, # Description without affixes "03.02.2025" # Creation date ]), + web_execute = AsyncMock(return_value = { + "universalAnalyticsOpts": { + "dimensions": { + "dimension92": "", + "dimension108": "" + } + } + }), _extract_category_from_ad_page = AsyncMock(return_value = "160"), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")), @@ -575,9 +599,35 @@ class TestAdExtractorCategory: } } } - result = await extractor._extract_special_attributes_from_ad_page() + result = await extractor._extract_special_attributes_from_ad_page(mock_web_execute.return_value) assert result == {} + @pytest.mark.asyncio + # pylint: disable=protected-access + async def test_extract_special_attributes_not_empty(self, extractor: AdExtractor) -> None: + """Test extraction of special attributes when not empty.""" + + special_atts = { + "universalAnalyticsOpts": { + "dimensions": { + "dimension108": "versand_s:t|color_s:creme|groesse_s:68|condition_s:alright|type_s:accessoires|art_s:maedchen" + } + } + } + result = await extractor._extract_special_attributes_from_ad_page(special_atts) + assert len(result) == 5 + assert "versand_s" not in result + assert "color_s" in result + assert result["color_s"] == "creme" + assert "groesse_s" in result + assert result["groesse_s"] == "68" + assert "condition_s" in result + assert result["condition_s"] == "alright" + assert "type_s" in result + assert result["type_s"] == "accessoires" + assert "art_s" in result + assert result["art_s"] == "maedchen" + class TestAdExtractorContact: """Tests for contact information extraction."""