From 86c3aeea85ddc36d48596538426cca1954692e5c Mon Sep 17 00:00:00 2001
From: sebthom <sebthom@users.noreply.github.com>
Date: Thu, 21 Nov 2024 22:53:35 +0100
Subject: [PATCH] fix: downloaded images have wrong file extension #348

---
 src/kleinanzeigen_bot/extract.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py
index 82e0d81..feae96a 100644
--- a/src/kleinanzeigen_bot/extract.py
+++ b/src/kleinanzeigen_bot/extract.py
@@ -5,6 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
 """
 import logging, os, shutil
 import urllib.request as urllib_request
+import mimetypes
 from datetime import datetime
 from typing import Any, Final
 
@@ -77,12 +78,15 @@ class AdExtractor(WebScrapingMixin):
                 current_img_url = img_element.attrs['src']  # URL of the image
                 if current_img_url is None:
                     continue
-                file_ending = current_img_url.split('.')[-1].lower()
-                img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
-                if current_img_url.startswith('https'):  # verify https (for Bandit linter)
-                    urllib_request.urlretrieve(current_img_url, img_path)  # nosec B310
-                dl_counter += 1
-                img_paths.append(img_path.split('/')[-1])
+
+                with urllib_request.urlopen(current_img_url) as response:  # nosec B310
+                    content_type = response.info().get_content_type()
+                    file_ending = mimetypes.guess_extension(content_type)
+                    img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
+                    with open(img_path, 'wb') as f:
+                        shutil.copyfileobj(response, f)
+                    dl_counter += 1
+                    img_paths.append(img_path.rsplit('/', maxsplit = 1)[-1])
 
                 # navigate to next image (if exists)
                 if img_nr < n_images: