From 45fd4bac9faccbec319cb3983af6c63958dfce98 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Dec 2022 19:59:47 +0100 Subject: [PATCH] ADD download shipping options and sell directly LIMITATION extracts only the cheapest shipping option --- README.md | 2 ++ kleinanzeigen_bot/__init__.py | 3 ++- kleinanzeigen_bot/extract.py | 36 ++++++++++++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2c6552c..488ec25 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,8 @@ Options: -v, --verbose - enables verbose output - only useful when troubleshooting issues ``` +Limitation of `download`: It's only possible to extract the cheaptest given shipping option. + ### Configuration All configuration files can be in YAML or JSON format. diff --git a/kleinanzeigen_bot/__init__.py b/kleinanzeigen_bot/__init__.py index 0cc7129..b4070cb 100644 --- a/kleinanzeigen_bot/__init__.py +++ b/kleinanzeigen_bot/__init__.py @@ -842,7 +842,8 @@ class KleinanzeigenBot(SeleniumMixin): info['price'], info['price_type'] = extractor.extract_pricing_info_from_ad_page() # process shipping - info['shipping_type'], info['shipping_costs'] = extractor.extract_shipping_info_from_ad_page() + info['shipping_type'], info['shipping_costs'], info['shipping_options'] = extractor.extract_shipping_info_from_ad_page() + info['sell_directly'] = extractor.extract_sell_directly_from_ad_page() # fetch images info['images'] = self.download_images_from_ad_page(directory, id_, LOG) diff --git a/kleinanzeigen_bot/extract.py b/kleinanzeigen_bot/extract.py index 0c4f951..c49df71 100644 --- a/kleinanzeigen_bot/extract.py +++ b/kleinanzeigen_bot/extract.py @@ -85,13 +85,13 @@ class AdExtractor(SeleniumMixin): except NoSuchElementException: # no 'commercial' ad, has no pricing box etc. return None, 'NOT_APPLICABLE' - def extract_shipping_info_from_ad_page(self) -> (str, float | None): + def extract_shipping_info_from_ad_page(self) -> (str, float | None, list | None): """ Extracts shipping information from an ad page. :return: the shipping type, and the shipping price (optional) """ - ship_type, ship_costs = 'NOT_APPLICABLE', None + ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None try: shipping_text = self.webdriver.find_element(By.CSS_SELECTOR, '.boxedarticle--details--shipping') \ .text.strip() @@ -105,10 +105,40 @@ class AdExtractor(SeleniumMixin): shipping_price = float(parse_decimal(shipping_price_parts[-2])) ship_type = 'SHIPPING' ship_costs = shipping_price + + # extract shipping options + # It is only possible the extract the cheapest shipping option, + # as the other options are not shown + shipping_option_mapping = { + "DHL_2": "5,49", + "Hermes_Päckchen": "4,50", + "Hermes_S": "4,95", + "DHL_5": "6,99", + "Hermes_M": "5,95", + "DHL_10": "9,49", + "DHL_31,5": "16,49", + "Hermes_L": "10,95", + } + for shipping_option, shipping_price in shipping_option_mapping.items(): + if shipping_price in shipping_text: + shipping_options = [shipping_option] + break except NoSuchElementException: # no pricing box -> no shipping given ship_type = 'NOT_APPLICABLE' - return ship_type, ship_costs + return ship_type, ship_costs, shipping_options + + def extract_sell_directly_from_ad_page(self) -> bool | None: + """ + Extracts the sell directly option from an ad page. + + :return: a boolean indicating whether the sell directly option is active (optional) + """ + try: + buy_now_is_active = self.webdriver.find_element(By.ID, 'j-buy-now').text == "Direkt kaufen" + return buy_now_is_active + except NoSuchElementException: + return None def extract_contact_from_ad_page(self) -> dict: """