Source code for etl_toolbox.file_functions

'''
.. epigraph:: Functions for working with files and directories
'''

import os
import re


[docs]def get_file_list_from_dir(dir_path, recursive=False, include_regex=None): r""" Returns a list of the files in a directory Usage: >>> get_file_list_from_dir('test_data/test_dir') # doctest:+SKIP ['test_data/test_dir/1.csv', 'test_data/test_dir/2.csv', 'test_data/test_dir/3.json'] :param recursive: If set to ``True``, the returned list will include files from ``dir`` and all of its subdirectories. Default is ``False``. Example: >>> get_file_list_from_dir('test_data/test_dir', ... recursive=True) # doctest:+SKIP ['test_data/test_dir/1.csv', 'test_data/test_dir/2.csv', 'test_data/test_dir/3.json', 'test_data/test_dir/a/1.csv', 'test_data/test_dir/b/3.csv', 'test_data/test_dir/b/c/2.txt'] :type recursive: boolean, optional :param include_regex: Only include files whose path matches this regex. Default is ``None`` (list is unfiltered). Example: >>> get_file_list_from_dir('test_data/test_dir', ... include_regex=r'.*\.csv$') # doctest:+SKIP ['test_data/test_dir/1.csv', 'test_data/test_dir/2.csv'] :type include_regex: string, optional :return: Returns list of file paths. """ file_list = [] # Collect file paths for root, dirs, files in os.walk(dir_path): for f in files: file_list.append(os.path.normpath(os.path.join(root, f))) if not recursive: break # Apply filter if include_regex is not None: file_list = [f for f in file_list if re.match(include_regex, f)] return file_list