From 29d75489b2aac59dad0bb48012ad72da45189a54 Mon Sep 17 00:00:00 2001 From: Brian Gregg Date: Wed, 3 Jun 2026 15:51:01 -0400 Subject: [PATCH] Updated get_file_info to handle files with multiple periods in their name. --- scan-batch-dir | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/scan-batch-dir b/scan-batch-dir index f474a2e..840757f 100755 --- a/scan-batch-dir +++ b/scan-batch-dir @@ -636,25 +636,28 @@ def process_unknown(file_path:str): def get_file_info(file_path: str): """ Obtain information about the file_path file. - Args: file_path (str) The Path to the file. - Returns: - pid (str) The PID of the file. + pid (str) The PID of the file (everything before the first period). parent (str) The parent directory of the file. file (str) The filename and extension of the file. dir (str) The directory that the file is located in. - ext (str) The file extension of the file. + ext (str) The file extension of the file (everything after the last period). """ - dir,ext = os.path.splitext(file_path) # Split the file_path into directory and extension. - file_name = os.path.basename(dir) # Get the file_name of the object which will usually be the pid. - pid = file_name # Assign pid to the last piece of the directory. - parent = os.path.basename(os.path.dirname(file_path)) dir = os.path.dirname(file_path) - file = f"{pid}{ext}" + full_name = os.path.basename(file_path) # e.g. "abc.def.ghi.txt" + parent = os.path.basename(dir) + + if '.' in full_name: + pid = full_name.split('.')[0] # Everything before the first period. + ext = '.' + full_name.rsplit('.', 1)[1] # Everything after the last period. + else: + pid = full_name + ext = '' - return(pid,parent,file,dir,ext) + file = full_name + return (pid, parent, file, dir, ext) def add_update_dataframe(df: pd.DataFrame, pid: str, row_data):