mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
This commit is contained in:
@@ -45,36 +45,13 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
os.mkdir(relative_directory)
|
os.mkdir(relative_directory)
|
||||||
LOG.info("Created ads directory at ./%s.", relative_directory)
|
LOG.info("Created ads directory at ./%s.", relative_directory)
|
||||||
|
|
||||||
# First, extract ad info to get the title
|
# Extract ad info and determine final directory path
|
||||||
temp_dir = os.path.join(relative_directory, f"ad_{ad_id}")
|
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
|
||||||
ad_cfg:AdPartial = await self._extract_ad_page_info(temp_dir, ad_id)
|
relative_directory, ad_id
|
||||||
|
)
|
||||||
# Create folder name with ad title
|
|
||||||
sanitized_title = misc.sanitize_folder_name(ad_cfg.title, self.config.download.folder_name_max_length)
|
|
||||||
new_base_dir = os.path.join(relative_directory, f"ad_{ad_id}_{sanitized_title}")
|
|
||||||
|
|
||||||
# If the folder with title already exists, delete it
|
|
||||||
if os.path.exists(new_base_dir):
|
|
||||||
LOG.info("Deleting current folder of ad %s...", ad_id)
|
|
||||||
shutil.rmtree(new_base_dir)
|
|
||||||
|
|
||||||
# If the old folder without title exists, handle based on configuration
|
|
||||||
if os.path.exists(temp_dir):
|
|
||||||
if self.config.download.rename_existing_folders:
|
|
||||||
LOG.info("Renaming folder from %s to %s for ad %s...",
|
|
||||||
os.path.basename(temp_dir), os.path.basename(new_base_dir), ad_id)
|
|
||||||
os.rename(temp_dir, new_base_dir)
|
|
||||||
else:
|
|
||||||
# Use the existing folder without renaming
|
|
||||||
new_base_dir = temp_dir
|
|
||||||
LOG.info("Using existing folder for ad %s at %s.", ad_id, new_base_dir)
|
|
||||||
else:
|
|
||||||
# Create new directory with title
|
|
||||||
os.mkdir(new_base_dir)
|
|
||||||
LOG.info("New directory for ad created at %s.", new_base_dir)
|
|
||||||
|
|
||||||
# Save the ad configuration file
|
# Save the ad configuration file
|
||||||
ad_file_path = new_base_dir + "/" + f"ad_{ad_id}.yaml"
|
ad_file_path = final_dir + "/" + f"ad_{ad_id}.yaml"
|
||||||
dicts.save_dict(
|
dicts.save_dict(
|
||||||
ad_file_path,
|
ad_file_path,
|
||||||
ad_cfg.model_dump(),
|
ad_cfg.model_dump(),
|
||||||
@@ -283,23 +260,32 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
pass
|
pass
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
async def _extract_title_from_ad_page(self) -> str:
|
||||||
|
"""
|
||||||
|
Extracts the title from an ad page.
|
||||||
|
Assumes that the web driver currently shows an ad page.
|
||||||
|
|
||||||
|
:return: the ad title
|
||||||
|
"""
|
||||||
|
return await self.web_text(By.ID, "viewad-title")
|
||||||
|
|
||||||
async def _extract_ad_page_info(self, directory:str, ad_id:int) -> AdPartial:
|
async def _extract_ad_page_info(self, directory:str, ad_id:int) -> AdPartial:
|
||||||
"""
|
"""
|
||||||
Extracts all necessary information from an ad´s page.
|
Extracts ad information and downloads images to the specified directory.
|
||||||
|
NOTE: Requires that the driver session currently is on the ad page.
|
||||||
|
|
||||||
:param directory: the path of the ad´s previously created directory
|
:param directory: the directory to download images to
|
||||||
:param ad_id: the ad ID, already extracted by a calling function
|
:param ad_id: the ad ID
|
||||||
|
:return: an AdPartial object containing the ad information
|
||||||
"""
|
"""
|
||||||
info:dict[str, Any] = {"active": True}
|
info:dict[str, Any] = {"active": True}
|
||||||
|
|
||||||
# extract basic info
|
# extract basic info
|
||||||
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
||||||
title:str = await self.web_text(By.ID, "viewad-title")
|
|
||||||
LOG.info('Extracting information from ad with title "%s"', title)
|
|
||||||
|
|
||||||
# contains info about ad in different dimensions in form of a key:value dict
|
# Extract title
|
||||||
# dimension108 contains special attributes
|
title = await self._extract_title_from_ad_page()
|
||||||
# dimension92 contains fake category, which becomes an special attribute on ad page
|
|
||||||
belen_conf = await self.web_execute("window.BelenConf")
|
belen_conf = await self.web_execute("window.BelenConf")
|
||||||
|
|
||||||
info["category"] = await self._extract_category_from_ad_page()
|
info["category"] = await self._extract_category_from_ad_page()
|
||||||
@@ -348,9 +334,9 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
# convert creation date to ISO format
|
# convert creation date to ISO format
|
||||||
created_parts = creation_date.split(".")
|
created_parts = creation_date.split(".")
|
||||||
creation_date = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
|
creation_date_str = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
|
||||||
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
creation_date_dt = datetime.fromisoformat(creation_date_str)
|
||||||
info["created_on"] = creation_date
|
info["created_on"] = creation_date_dt
|
||||||
info["updated_on"] = None # will be set later on
|
info["updated_on"] = None # will be set later on
|
||||||
|
|
||||||
ad_cfg = AdPartial.model_validate(info)
|
ad_cfg = AdPartial.model_validate(info)
|
||||||
@@ -360,6 +346,55 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
return ad_cfg
|
return ad_cfg
|
||||||
|
|
||||||
|
async def _extract_ad_page_info_with_directory_handling(
|
||||||
|
self, relative_directory:str, ad_id:int
|
||||||
|
) -> tuple[AdPartial, str]:
|
||||||
|
"""
|
||||||
|
Extracts ad information and handles directory creation/renaming.
|
||||||
|
|
||||||
|
:param relative_directory: Base directory for downloads
|
||||||
|
:param ad_id: The ad ID
|
||||||
|
:return: AdPartial with directory information
|
||||||
|
"""
|
||||||
|
# First, extract basic info to get the title
|
||||||
|
info:dict[str, Any] = {"active": True}
|
||||||
|
|
||||||
|
# extract basic info
|
||||||
|
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
||||||
|
title = await self._extract_title_from_ad_page()
|
||||||
|
LOG.info('Extracting title from ad %s: "%s"', ad_id, title)
|
||||||
|
|
||||||
|
# Determine the final directory path
|
||||||
|
sanitized_title = misc.sanitize_folder_name(title, self.config.download.folder_name_max_length)
|
||||||
|
final_dir = os.path.join(relative_directory, f"ad_{ad_id}_{sanitized_title}")
|
||||||
|
temp_dir = os.path.join(relative_directory, f"ad_{ad_id}")
|
||||||
|
|
||||||
|
# Handle existing directories
|
||||||
|
if os.path.exists(final_dir):
|
||||||
|
# If the folder with title already exists, delete it
|
||||||
|
LOG.info("Deleting current folder of ad %s...", ad_id)
|
||||||
|
shutil.rmtree(final_dir)
|
||||||
|
|
||||||
|
if os.path.exists(temp_dir):
|
||||||
|
if self.config.download.rename_existing_folders:
|
||||||
|
# Rename the old folder to the new name with title
|
||||||
|
LOG.info("Renaming folder from %s to %s for ad %s...",
|
||||||
|
os.path.basename(temp_dir), os.path.basename(final_dir), ad_id)
|
||||||
|
os.rename(temp_dir, final_dir)
|
||||||
|
else:
|
||||||
|
# Use the existing folder without renaming
|
||||||
|
final_dir = temp_dir
|
||||||
|
LOG.info("Using existing folder for ad %s at %s.", ad_id, final_dir)
|
||||||
|
else:
|
||||||
|
# Create new directory with title
|
||||||
|
os.mkdir(final_dir)
|
||||||
|
LOG.info("New directory for ad created at %s.", final_dir)
|
||||||
|
|
||||||
|
# Now extract complete ad info (including images) to the final directory
|
||||||
|
ad_cfg = await self._extract_ad_page_info(final_dir, ad_id)
|
||||||
|
|
||||||
|
return ad_cfg, final_dir
|
||||||
|
|
||||||
async def _extract_category_from_ad_page(self) -> str:
|
async def _extract_category_from_ad_page(self) -> str:
|
||||||
"""
|
"""
|
||||||
Extracts a category of an ad in numerical form.
|
Extracts a category of an ad in numerical form.
|
||||||
|
|||||||
@@ -174,10 +174,6 @@ kleinanzeigen_bot/extract.py:
|
|||||||
#################################################
|
#################################################
|
||||||
download_ad:
|
download_ad:
|
||||||
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s."
|
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s."
|
||||||
"Deleting current folder of ad %s...": "Lösche aktuellen Ordner der Anzeige %s..."
|
|
||||||
"New directory for ad created at %s.": "Neues Verzeichnis für Anzeige erstellt unter %s."
|
|
||||||
"Renaming folder from %s to %s for ad %s...": "Benenne Ordner von %s zu %s für Anzeige %s um..."
|
|
||||||
"Using existing folder for ad %s at %s.": "Verwende bestehenden Ordner für Anzeige %s unter %s."
|
|
||||||
|
|
||||||
_download_images_from_ad_page:
|
_download_images_from_ad_page:
|
||||||
"Found %s.": "%s gefunden."
|
"Found %s.": "%s gefunden."
|
||||||
@@ -210,8 +206,12 @@ kleinanzeigen_bot/extract.py:
|
|||||||
"There is no ad under the given ID.": "Es gibt keine Anzeige unter der angegebenen ID."
|
"There is no ad under the given ID.": "Es gibt keine Anzeige unter der angegebenen ID."
|
||||||
"A popup appeared!": "Ein Popup ist erschienen!"
|
"A popup appeared!": "Ein Popup ist erschienen!"
|
||||||
|
|
||||||
_extract_ad_page_info:
|
_extract_ad_page_info_with_directory_handling:
|
||||||
"Extracting information from ad with title \"%s\"": "Extrahiere Informationen aus Anzeige mit Titel \"%s\""
|
"Extracting title from ad %s: \"%s\"": "Extrahiere Titel aus Anzeige %s: \"%s\""
|
||||||
|
"Deleting current folder of ad %s...": "Lösche aktuellen Ordner der Anzeige %s..."
|
||||||
|
"New directory for ad created at %s.": "Neues Verzeichnis für Anzeige erstellt unter %s."
|
||||||
|
"Renaming folder from %s to %s for ad %s...": "Benenne Ordner von %s zu %s für Anzeige %s um..."
|
||||||
|
"Using existing folder for ad %s at %s.": "Verwende bestehenden Ordner für Anzeige %s unter %s."
|
||||||
|
|
||||||
_extract_contact_from_ad_page:
|
_extract_contact_from_ad_page:
|
||||||
"No street given in the contact.": "Keine Straße in den Kontaktdaten angegeben."
|
"No street given in the contact.": "Keine Straße in den Kontaktdaten angegeben."
|
||||||
|
|||||||
@@ -735,7 +735,7 @@ class TestAdExtractorDownload:
|
|||||||
patch("os.rename") as mock_rename, \
|
patch("os.rename") as mock_rename, \
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
base_dir = "downloaded-ads"
|
||||||
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
||||||
@@ -746,26 +746,32 @@ class TestAdExtractorDownload:
|
|||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
mock_exists.side_effect = lambda path: path in existing_paths
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
mock_isdir.side_effect = lambda path: path == base_dir
|
||||||
|
|
||||||
mock_extract.return_value = AdPartial.model_validate({
|
# Mock the new method that handles directory creation and extraction
|
||||||
"title": "Test Advertisement Title",
|
mock_extract_with_dir.return_value = (
|
||||||
"description": "Test Description",
|
AdPartial.model_validate({
|
||||||
"category": "Dienstleistungen",
|
"title": "Test Advertisement Title",
|
||||||
"price": 100,
|
"description": "Test Description",
|
||||||
"images": [],
|
"category": "Dienstleistungen",
|
||||||
"contact": {
|
"price": 100,
|
||||||
"name": "Test User",
|
"images": [],
|
||||||
"street": "Test Street 123",
|
"contact": {
|
||||||
"zipcode": "12345",
|
"name": "Test User",
|
||||||
"location": "Test City"
|
"street": "Test Street 123",
|
||||||
}
|
"zipcode": "12345",
|
||||||
})
|
"location": "Test City"
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
final_dir
|
||||||
|
)
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
await extractor.download_ad(12345)
|
||||||
|
|
||||||
# Verify the correct functions were called
|
# Verify the correct functions were called
|
||||||
mock_extract.assert_called_once()
|
mock_extract_with_dir.assert_called_once()
|
||||||
mock_rmtree.assert_called_once_with(final_dir) # Delete the final directory with title
|
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling
|
||||||
mock_mkdir.assert_called_once_with(final_dir) # Create the final directory with title
|
# so we don't expect rmtree/mkdir to be called directly in download_ad
|
||||||
|
mock_rmtree.assert_not_called() # Directory handling is done internally
|
||||||
|
mock_mkdir.assert_not_called() # Directory handling is done internally
|
||||||
mock_makedirs.assert_not_called() # Directory already exists
|
mock_makedirs.assert_not_called() # Directory already exists
|
||||||
mock_rename.assert_not_called() # No renaming needed
|
mock_rename.assert_not_called() # No renaming needed
|
||||||
|
|
||||||
@@ -774,15 +780,7 @@ class TestAdExtractorDownload:
|
|||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value.model_dump()
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
|
|
||||||
"""Test image download when no images are found."""
|
|
||||||
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
|
||||||
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
|
||||||
assert len(image_paths) == 0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
||||||
@@ -794,7 +792,7 @@ class TestAdExtractorDownload:
|
|||||||
patch("os.rename") as mock_rename, \
|
patch("os.rename") as mock_rename, \
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
base_dir = "downloaded-ads"
|
||||||
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
||||||
@@ -804,29 +802,31 @@ class TestAdExtractorDownload:
|
|||||||
mock_exists.return_value = False
|
mock_exists.return_value = False
|
||||||
mock_isdir.return_value = False
|
mock_isdir.return_value = False
|
||||||
|
|
||||||
mock_extract.return_value = AdPartial.model_validate({
|
# Mock the new method that handles directory creation and extraction
|
||||||
"title": "Test Advertisement Title",
|
mock_extract_with_dir.return_value = (
|
||||||
"description": "Test Description",
|
AdPartial.model_validate({
|
||||||
"category": "Dienstleistungen",
|
"title": "Test Advertisement Title",
|
||||||
"price": 100,
|
"description": "Test Description",
|
||||||
"images": [],
|
"category": "Dienstleistungen",
|
||||||
"contact": {
|
"price": 100,
|
||||||
"name": "Test User",
|
"images": [],
|
||||||
"street": "Test Street 123",
|
"contact": {
|
||||||
"zipcode": "12345",
|
"name": "Test User",
|
||||||
"location": "Test City"
|
"street": "Test Street 123",
|
||||||
}
|
"zipcode": "12345",
|
||||||
})
|
"location": "Test City"
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
final_dir
|
||||||
|
)
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
await extractor.download_ad(12345)
|
||||||
|
|
||||||
# Verify the correct functions were called
|
# Verify the correct functions were called
|
||||||
mock_extract.assert_called_once()
|
mock_extract_with_dir.assert_called_once()
|
||||||
mock_rmtree.assert_not_called() # No directory to remove
|
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling
|
||||||
mock_mkdir.assert_has_calls([
|
mock_rmtree.assert_not_called() # Directory handling is done internally
|
||||||
call(base_dir),
|
mock_mkdir.assert_has_calls([call(base_dir)]) # Only base directory creation
|
||||||
call(final_dir) # Create the final directory with title
|
|
||||||
])
|
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
mock_makedirs.assert_not_called() # Using mkdir instead
|
||||||
mock_rename.assert_not_called() # No renaming needed
|
mock_rename.assert_not_called() # No renaming needed
|
||||||
|
|
||||||
@@ -835,7 +835,7 @@ class TestAdExtractorDownload:
|
|||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value.model_dump()
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad_use_existing_folder(self, extractor:AdExtractor) -> None:
|
async def test_download_ad_use_existing_folder(self, extractor:AdExtractor) -> None:
|
||||||
@@ -847,7 +847,7 @@ class TestAdExtractorDownload:
|
|||||||
patch("os.rename") as mock_rename, \
|
patch("os.rename") as mock_rename, \
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
base_dir = "downloaded-ads"
|
||||||
temp_dir = os.path.join(base_dir, "ad_12345")
|
temp_dir = os.path.join(base_dir, "ad_12345")
|
||||||
@@ -859,24 +859,28 @@ class TestAdExtractorDownload:
|
|||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
mock_exists.side_effect = lambda path: path in existing_paths
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
mock_isdir.side_effect = lambda path: path == base_dir
|
||||||
|
|
||||||
mock_extract.return_value = AdPartial.model_validate({
|
# Mock the new method that handles directory creation and extraction
|
||||||
"title": "Test Advertisement Title",
|
mock_extract_with_dir.return_value = (
|
||||||
"description": "Test Description",
|
AdPartial.model_validate({
|
||||||
"category": "Dienstleistungen",
|
"title": "Test Advertisement Title",
|
||||||
"price": 100,
|
"description": "Test Description",
|
||||||
"images": [],
|
"category": "Dienstleistungen",
|
||||||
"contact": {
|
"price": 100,
|
||||||
"name": "Test User",
|
"images": [],
|
||||||
"street": "Test Street 123",
|
"contact": {
|
||||||
"zipcode": "12345",
|
"name": "Test User",
|
||||||
"location": "Test City"
|
"street": "Test Street 123",
|
||||||
}
|
"zipcode": "12345",
|
||||||
})
|
"location": "Test City"
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
temp_dir # Use existing temp directory
|
||||||
|
)
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
await extractor.download_ad(12345)
|
||||||
|
|
||||||
# Verify the correct functions were called
|
# Verify the correct functions were called
|
||||||
mock_extract.assert_called_once()
|
mock_extract_with_dir.assert_called_once()
|
||||||
mock_rmtree.assert_not_called() # No directory to remove
|
mock_rmtree.assert_not_called() # No directory to remove
|
||||||
mock_mkdir.assert_not_called() # Base directory already exists
|
mock_mkdir.assert_not_called() # Base directory already exists
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
mock_makedirs.assert_not_called() # Using mkdir instead
|
||||||
@@ -887,7 +891,7 @@ class TestAdExtractorDownload:
|
|||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value.model_dump()
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad_rename_existing_folder_when_enabled(self, extractor:AdExtractor) -> None:
|
async def test_download_ad_rename_existing_folder_when_enabled(self, extractor:AdExtractor) -> None:
|
||||||
@@ -902,7 +906,7 @@ class TestAdExtractorDownload:
|
|||||||
patch("os.rename") as mock_rename, \
|
patch("os.rename") as mock_rename, \
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
base_dir = "downloaded-ads"
|
||||||
temp_dir = os.path.join(base_dir, "ad_12345")
|
temp_dir = os.path.join(base_dir, "ad_12345")
|
||||||
@@ -915,32 +919,45 @@ class TestAdExtractorDownload:
|
|||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
mock_exists.side_effect = lambda path: path in existing_paths
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
mock_isdir.side_effect = lambda path: path == base_dir
|
||||||
|
|
||||||
mock_extract.return_value = AdPartial.model_validate({
|
# Mock the new method that handles directory creation and extraction
|
||||||
"title": "Test Advertisement Title",
|
mock_extract_with_dir.return_value = (
|
||||||
"description": "Test Description",
|
AdPartial.model_validate({
|
||||||
"category": "Dienstleistungen",
|
"title": "Test Advertisement Title",
|
||||||
"price": 100,
|
"description": "Test Description",
|
||||||
"images": [],
|
"category": "Dienstleistungen",
|
||||||
"contact": {
|
"price": 100,
|
||||||
"name": "Test User",
|
"images": [],
|
||||||
"street": "Test Street 123",
|
"contact": {
|
||||||
"zipcode": "12345",
|
"name": "Test User",
|
||||||
"location": "Test City"
|
"street": "Test Street 123",
|
||||||
}
|
"zipcode": "12345",
|
||||||
})
|
"location": "Test City"
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
final_dir
|
||||||
|
)
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
await extractor.download_ad(12345)
|
||||||
|
|
||||||
# Verify the correct functions were called
|
# Verify the correct functions were called
|
||||||
mock_extract.assert_called_once()
|
mock_extract_with_dir.assert_called_once() # Extract to final directory
|
||||||
mock_rmtree.assert_not_called() # No directory to remove
|
# Directory handling (including renaming) is now done inside _extract_ad_page_info_with_directory_handling
|
||||||
mock_mkdir.assert_not_called() # Base directory already exists
|
mock_rmtree.assert_not_called() # Directory handling is done internally
|
||||||
|
mock_mkdir.assert_not_called() # Directory handling is done internally
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
mock_makedirs.assert_not_called() # Using mkdir instead
|
||||||
mock_rename.assert_called_once_with(temp_dir, final_dir) # Rename temp to final
|
mock_rename.assert_not_called() # Directory handling is done internally
|
||||||
|
|
||||||
# Get the actual call arguments
|
# Get the actual call arguments
|
||||||
actual_call = mock_save_dict.call_args
|
actual_call = mock_save_dict.call_args
|
||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value.model_dump()
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
|
||||||
|
"""Test image download when no images are found."""
|
||||||
|
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
||||||
|
assert len(image_paths) == 0
|
||||||
|
|||||||
Reference in New Issue
Block a user