FIX Download path not found

This commit is contained in:
Philipp K
2022-12-17 10:50:18 +01:00
committed by Sebastian Thomschke
parent d08b335351
commit 886d812393
3 changed files with 14 additions and 16 deletions

View File

@@ -222,7 +222,7 @@ The following parameters can be configured:
# wild card patterns to select ad configuration files # wild card patterns to select ad configuration files
# if relative paths are specified, then they are relative to this configuration file # if relative paths are specified, then they are relative to this configuration file
ad_files: ad_files:
- "my_ads/**/ad_*.{json,yml,yaml}" - "./**/ad_*.{json,yml,yaml}"
# default values for ads, can be overwritten in each ad configuration file # default values for ads, can be overwritten in each ad configuration file
ad_defaults: ad_defaults:

View File

@@ -207,14 +207,15 @@ class KleinanzeigenBot(SeleniumMixin):
LOG.info("App version: %s", self.get_version()) LOG.info("App version: %s", self.get_version())
def load_ads(self, *, ignore_inactive:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]: def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]:
LOG.info("Searching for ad config files...") LOG.info("Searching for ad config files...")
ad_files = set() ad_files = set()
data_root_dir = os.path.dirname(self.config_file_path) data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]: for file_pattern in self.config["ad_files"]:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
ad_files.add(abspath(ad_file, relative_to = data_root_dir)) if not str(ad_file).endswith('ad_fields.yaml'):
ad_files.add(abspath(ad_file, relative_to = data_root_dir))
LOG.info(" -> found %s", pluralize("ad config file", ad_files)) LOG.info(" -> found %s", pluralize("ad config file", ad_files))
if not ad_files: if not ad_files:
return [] return []
@@ -235,7 +236,7 @@ class KleinanzeigenBot(SeleniumMixin):
LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file) LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file)
continue continue
if self.ads_selector == "new" and ad_cfg["id"]: if self.ads_selector == "new" and ad_cfg["id"] and check_id:
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file) LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file)
continue continue
@@ -878,11 +879,12 @@ class KleinanzeigenBot(SeleniumMixin):
:param id_: the ad ID :param id_: the ad ID
""" """
# create sub-directory for ad to download: # create sub-directory for ad(s) to download (if necessary):
relative_directory = str(self.config['ad_files'][0]).split('**', maxsplit = 1)[0] relative_directory = 'downloaded-ads'
# make sure configured base directory exists # make sure configured base directory exists
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory): if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
os.mkdir(relative_directory) os.mkdir(relative_directory)
LOG.info('Created ads directory at /%s.', relative_directory)
new_base_dir = os.path.join(relative_directory, f'ad_{id_}') new_base_dir = os.path.join(relative_directory, f'ad_{id_}')
if os.path.exists(new_base_dir): if os.path.exists(new_base_dir):
@@ -928,14 +930,10 @@ class KleinanzeigenBot(SeleniumMixin):
# check which ads already saved # check which ads already saved
saved_ad_ids = [] saved_ad_ids = []
data_root_dir = os.path.dirname(self.config_file_path) ads = self.load_ads(ignore_inactive=False, check_id=False) # do not skip because of existing IDs
for file_pattern in self.config["ad_files"]: for ad_ in ads:
for ad_file in glob.glob(file_pattern, root_dir = os.path.dirname(self.config_file_path), ad_id = int(ad_[2]['id'])
flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): saved_ad_ids.append(ad_id)
ad_file_path = abspath(ad_file, relative_to = data_root_dir)
ad_dict = utils.load_dict(ad_file_path)
ad_id = int(ad_dict['id'])
saved_ad_ids.append(ad_id)
LOG.info('Start fetch task for your unsaved ads!') LOG.info('Start fetch task for your unsaved ads!')
new_count = 0 new_count = 0
@@ -949,7 +947,7 @@ class KleinanzeigenBot(SeleniumMixin):
if self.navigate_to_ad_page(url = ref_pair[0]): if self.navigate_to_ad_page(url = ref_pair[0]):
self.download_ad_page(id_) self.download_ad_page(id_)
new_count += 1 new_count += 1
LOG.info('%d new ads were downloaded from your profile.', new_count) LOG.info('%d new ad(s) were downloaded from your profile.', new_count)
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s) elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(',')]

View File

@@ -1,5 +1,5 @@
ad_files: ad_files:
- "**/ad_*.{json,yml,yaml}" - "./**/ad_*.{json,yml,yaml}"
# default values for ads, can be overwritten in each ad configuration file # default values for ads, can be overwritten in each ad configuration file
ad_defaults: ad_defaults: