From 71eb6321919a3d19fb0d3c1586d8d0e592c320df Mon Sep 17 00:00:00 2001 From: Jeppy Date: Tue, 23 Jul 2024 10:21:35 +0200 Subject: [PATCH] FIX extract special attributes from ad page Format of special attribute changed to "key:value|key:value". Instead of transforming the string to JSON, directly create a dictionary from belen_conf. --- src/kleinanzeigen_bot/extract.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 612251d..824b5aa 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors SPDX-License-Identifier: AGPL-3.0-or-later SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ """ -import json, logging, os, shutil, re +import logging, os, shutil import urllib.request as urllib_request from datetime import datetime from typing import Any, Final @@ -290,15 +290,8 @@ class AdExtractor(WebScrapingMixin): """ belen_conf = await self.web_execute("window.BelenConf") special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"] - # Surrounding any word with " and add curly braces - special_attributes_fixed_str = "{" + re.sub('(\\w+)', '"\\g<1>"', special_attributes_str) + "}" - special_attributes = json.loads(special_attributes_fixed_str) - if not isinstance(special_attributes, dict): - raise ValueError( - "Failed to parse special attributes from ad page." - f"Expected a dictionary, but got a {type(special_attributes)}" - ) - special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')} + special_attributes = dict(item.split(":") for item in special_attributes_str.split("|")) + special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s') and k != "versand_s"} return special_attributes async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]: