PRAISELab-PicusLab · Vamsi-CHVVK · Jun 12, 2026
diff --git a/ETL_Execution_Evidence.ipynb b/ETL_Execution_Evidence.ipynb
@@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ETL Pipeline Execution Evidence\n",
+    "This notebook demonstrates the execution of the custom ETL pipeline retrieving data from the OpenAlex API, standardizing it, validating it, and preparing it for the Bibliometrix dashboard."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "# Ensure local modules can be imported\n",
+    "sys.path.append(os.path.abspath(\".\"))\n",
+    "\n",
+    "from www.services.etl import ETLPipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Live Query Execution via API"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"machine learning\"\n",
+    "print(f\"Executing live API query to OpenAlex for: {query}...\")\n",
+    "df_standardized = ETLPipeline.convert2df(source_data=\"API\", source_type=\"OpenAlex\", is_api=True, query=query)\n",
+    "print(f\"\\nSuccessfully retrieved and standardized {len(df_standardized)} records.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Validation and Normalized Output\n",
+    "Displaying the first 5 normalized rows demonstrating standard Web of Science columns (e.g., UT, TI, CR, PY)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "df_standardized.head(5)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/app.py b/app.py
diff --git a/execution_log.txt b/execution_log.txt
diff --git a/functions/get_data.py b/functions/get_data.py
@@ -42,21 +42,24 @@ def get_data(input, database, df, reset_callback=None):
             else:
                 # Process single file (original logic)
                 type = file[0]["name"]
-                json = biblio_json(file[0]["datapath"], source, type, author)
-                df.set(pd.read_json(StringIO(json)))
+
+                # Base Level: Bypass the fragile legacy 'biblio_json' parser and directly use our ETL Pipeline!
+                clean_df = ETLPipeline.convert2df(source_data=file[0]["datapath"], source_type=source, is_api=False, original_filename=type)
+                df.set(clean_df)
+
                 # Reset all analysis results when new dataset is loaded
                 if reset_callback:
                     reset_callback()
 
                 if type.endswith(".zip"):
                     text = ui.p(
-                        f"{database}'s ZIP archive uploaded and extracted successfully! "
+                        f"{database}'s ZIP archive uploaded, extracted, and Standardized successfully! "
                         f"Multiple files have been processed and combined. "
                         f"The dataset contains {df.get().shape[0]} rows and {df.get().shape[1]} columns."
                     )
                 else:
                     text = ui.p(
-                        f"{database}'s file uploaded successfully! You can now proceed to analyze your data. "
+                        f"{database}'s file uploaded and Standardized successfully! You can now proceed to analyze your data. "
                         f"The dataset contains {df.get().shape[0]} rows and {df.get().shape[1]} columns."
                     )
         except Exception as e:

diff --git a/generate_notebook.py b/generate_notebook.py
@@ -0,0 +1,91 @@
+import json
+
+notebook = {
+ 'cells': [
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '# ETL Pipeline Execution Evidence\n',
+    'This notebook demonstrates the execution of the custom ETL pipeline retrieving data from the OpenAlex API, standardizing it, validating it, and preparing it for the Bibliometrix dashboard.'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'import sys\n',
+    'import os\n',
+    'import pandas as pd\n',
+    '# Ensure local modules can be imported\n',
+    'sys.path.append(os.path.abspath("."))\n',
+    '\n',
+    'from www.services.etl import ETLPipeline'
+   ]
+  },
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '## Live Query Execution via API'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'query = "machine learning"\n',
+    'print(f"Executing live API query to OpenAlex for: {query}...")\n',
+    'df_standardized = ETLPipeline.convert2df(source_data="API", source_type="OpenAlex", is_api=True, query=query)\n',
+    'print(f"\\nSuccessfully retrieved and standardized {len(df_standardized)} records.")'
+   ]
+  },
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '## Validation and Normalized Output\n',
+    'Displaying the first 5 normalized rows demonstrating standard Web of Science columns (e.g., UT, TI, CR, PY).'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'pd.set_option("display.max_columns", None)\n',
+    'df_standardized.head(5)'
+   ]
+  }
+ ],
+ 'metadata': {
+  'kernelspec': {
+   'display_name': 'Python 3',
+   'language': 'python',
+   'name': 'python3'
+  },
+  'language_info': {
+   'codemirror_mode': {
+    'name': 'ipython',
+    'version': 3
+   },
+   'file_extension': '.py',
+   'mimetype': 'text/x-python',
+   'name': 'python',
+   'nbconvert_exporter': 'python',
+   'pygments_lexer': 'ipython3',
+   'version': '3.12.0'
+  }
+ },
+ 'nbformat': 4,
+ 'nbformat_minor': 4
+}
+
+with open('ETL_Execution_Evidence.ipynb', 'w') as f:
+    json.dump(notebook, f, indent=1)
+print('Notebook created successfully.')
diff --git a/run.bat b/run.bat
@@ -0,0 +1,5 @@
+@echo off
+echo Starting Biblioshiny Dashboard...
+echo Your web browser will open automatically.
+python -m shiny run --launch-browser app.py
+pause