feat: add multi-language support

2026-03-12 10:31:50 +01:00 · 2024-12-27 13:04:30 +01:00
parent 0aa1975325
commit 9d54a949e7
10 changed files with 613 additions and 100 deletions
--- a/src/kleinanzeigen_bot/extract.py
+++ b/src/kleinanzeigen_bot/extract.py
@@ -9,10 +9,15 @@ import mimetypes
 from datetime import datetime
 from typing import Any, Final

+from .i18n import get_translating_logger, pluralize
 from .utils import is_integer, parse_decimal, save_dict
 from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin

-LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.AdExtractor")
+__all__ = [
+    "AdExtractor",
+]
+
+LOG:Final[logging.Logger] = get_translating_logger(__name__)


 class AdExtractor(WebScrapingMixin):
@@ -42,7 +47,7 @@ class AdExtractor(WebScrapingMixin):

        new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
        if os.path.exists(new_base_dir):
-            LOG.info('Deleting current folder of ad...')
+            LOG.info('Deleting current folder of ad %s...', ad_id)
            shutil.rmtree(new_base_dir)
        os.mkdir(new_base_dir)
        LOG.info('New directory for ad created at %s.', new_base_dir)
@@ -68,7 +73,7 @@ class AdExtractor(WebScrapingMixin):
            image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')

            n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
-            LOG.info('Found %d images.', n_images)
+            LOG.info('Found %s.', pluralize("image", n_images))

            img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
            img_fn_prefix = 'ad_' + str(ad_id) + '__img'
@@ -97,13 +102,13 @@ class AdExtractor(WebScrapingMixin):
                        new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
                        img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
                    except TimeoutError:
-                        LOG.error('NEXT button in image gallery somehow missing, abort image fetching.')
+                        LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
                        break
                img_nr += 1
-            LOG.info('Downloaded %d image(s).', dl_counter)
+            LOG.info('Downloaded %s.', pluralize("image", dl_counter))

        except TimeoutError:  # some ads do not require images
-            LOG.warning('No image area found. Continue without downloading images.')
+            LOG.warning('No image area found. Continuing without downloading images.')

        return img_paths

@@ -153,10 +158,10 @@ class AdExtractor(WebScrapingMixin):
                parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
        if n_buttons > 1:
            multi_page = True
-            LOG.info('It seems like you have many ads!')
+            LOG.info('It looks like you have many ads!')
        else:
            multi_page = False
-            LOG.info('It seems like all your ads fit on one overview page.')
+            LOG.info('It looks like all your ads fit on one overview page.')

        refs:list[str] = []
        while True:  # loop reference extraction until no more forward page
@@ -208,7 +213,7 @@ class AdExtractor(WebScrapingMixin):
        # close (warning) popup, if given
        try:
            await self.web_find(By.ID, 'vap-ovrly-secure')
-            LOG.warning('A popup appeared.')
+            LOG.warning('A popup appeared!')
            await self.web_click(By.CLASS_NAME, 'mfp-close')
            await self.web_sleep()
        except TimeoutError: