Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions scan-batch-dir
Original file line number Diff line number Diff line change
Expand Up @@ -159,30 +159,39 @@ def read_google_sheet(spreadsheet_id: str, sheet_name="Sheet1", credentials_file
data = sheet.get('values', [])

if not data:
logger.warn(f"read_google_sheet - No data found in the specified worksheet.")
logger.warning(f"read_google_sheet - No data found in the specified worksheet.")
print(f"No data found in the specified worksheet.")

# Return empty DataFrame
return pd.DataFrame()

else:
logger.info(f"read_google_sheet - Read of Google Sheet Successful.")
print(f"Read of Google Sheet Successful.")
# Find the true max width across ALL rows (including the header),
# since the API silently drops trailing empty cells from every row.
max_columns = max(len(row) for row in data)

# Convert to DataFrame
# First row as headers, rest as data
headers = data[0]
rows = data[1:] if len(data) > 1 else []
# Pad every row (including the header row) to the full width.
fill_value = None
padded_data = [row + [fill_value] * (max_columns - len(row)) for row in data]

# Pad rows with fewer columns with fill_value
fill_value = None
max_columns = len(headers)
padded_rows = [row + [fill_value] * (max_columns - len(row)) for row in rows]
headers = padded_data[0]
rows = padded_data[1:] if len(padded_data) > 1 else []

logger.info(f"read_google_sheet - Read of Google Sheet Successful.")
print(f"Read of Google Sheet Successful.")

# If any header cell is None/empty after padding, give it a placeholder
# name so the DataFrame columns are always unique and non-null.
headers = [
col if col not in (None, "") else f"Unnamed_{i}"
for i, col in enumerate(headers)
]

# Create DataFrame
df = pd.DataFrame(padded_rows, columns=headers)
# Create DataFrame
df = pd.DataFrame(padded_rows, columns=headers)

return df
logger.info(f"read_google_sheet - Read of Google Sheet Successful.")
print(f"Read of Google Sheet Successful.")
return df

except Exception as e:
print(f"Error accessing Google Sheet: {str(e)}")
Expand Down