From b8d9d281b5d6a1ecb3b9a1a395842548923b70f8 Mon Sep 17 00:00:00 2001
From: Vamsi-CHVVK <vamsi.chvvk@gmail.com>
Date: Fri, 12 Jun 2026 16:04:26 +0200
Subject: [PATCH] completed

---
 ETL_Execution_Evidence.ipynb  |  85 +++++++++++
 app.py                        | 276 ++++++++++++++++++++--------------
 execution_log.txt             | Bin 0 -> 1762 bytes
 functions/get_data.py         |  11 +-
 generate_notebook.py          |  91 +++++++++++
 run.bat                       |   5 +
 standardized_api_data.csv     | 101 +++++++++++++
 test_etl.py                   |  85 +++++++++++
 test_histnetwork.py           |  35 +++++
 test_perf.py                  |  28 ++++
 www/services/__init__.py      |   6 +-
 www/services/api_retriever.py | 125 +++++++++++++++
 www/services/etl.py           | 128 ++++++++++++++++
 www/services/histnetwork.py   | 112 ++++++++------
 www/services/standardizer.py  | 177 ++++++++++++++++++++++
 www/services/validator.py     |  67 +++++++++
 16 files changed, 1168 insertions(+), 164 deletions(-)
 create mode 100644 ETL_Execution_Evidence.ipynb
 create mode 100644 execution_log.txt
 create mode 100644 generate_notebook.py
 create mode 100644 run.bat
 create mode 100644 standardized_api_data.csv
 create mode 100644 test_etl.py
 create mode 100644 test_histnetwork.py
 create mode 100644 test_perf.py
 create mode 100644 www/services/api_retriever.py
 create mode 100644 www/services/etl.py
 create mode 100644 www/services/standardizer.py
 create mode 100644 www/services/validator.py

diff --git a/ETL_Execution_Evidence.ipynb b/ETL_Execution_Evidence.ipynb
new file mode 100644
index 000000000..b09a4c722
--- /dev/null
+++ b/ETL_Execution_Evidence.ipynb
@@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ETL Pipeline Execution Evidence\n",
+    "This notebook demonstrates the execution of the custom ETL pipeline retrieving data from the OpenAlex API, standardizing it, validating it, and preparing it for the Bibliometrix dashboard."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "# Ensure local modules can be imported\n",
+    "sys.path.append(os.path.abspath(\".\"))\n",
+    "\n",
+    "from www.services.etl import ETLPipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Live Query Execution via API"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"machine learning\"\n",
+    "print(f\"Executing live API query to OpenAlex for: {query}...\")\n",
+    "df_standardized = ETLPipeline.convert2df(source_data=\"API\", source_type=\"OpenAlex\", is_api=True, query=query)\n",
+    "print(f\"\\nSuccessfully retrieved and standardized {len(df_standardized)} records.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Validation and Normalized Output\n",
+    "Displaying the first 5 normalized rows demonstrating standard Web of Science columns (e.g., UT, TI, CR, PY)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "df_standardized.head(5)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/app.py b/app.py
index f0891f894..c5f7226b6 100644
--- a/app.py
+++ b/app.py
@@ -854,8 +854,66 @@ def indicator_types_ui_all():
                 ),
 
         with ui.nav_panel("None", value="API"):
-            ui.h3("🚧 Warning: API is under construction 🚧")
-        
+            ui.h3("🌐 API Data Extraction", style="color: #5567BB;")
+            ui.p("Fetch data directly from Open-Access APIs (OpenAlex or PubMed) to instantly begin your bibliometric analysis.")
+            
+            with ui.layout_sidebar(fillable=False, fill=False):
+                with ui.sidebar(id="sidebar_api", position="right"):
+                    ui.h4("API Options", style="color: #5567BB;")
+                    ui.input_select("api_source", "Select API Platform", choices={"OpenAlex": "OpenAlex", "PubMed": "PubMed"}, selected="OpenAlex")
+                    ui.input_text("api_query", "Search Query (e.g. 'machine learning')", value="")
+                    ui.input_action_button("btn_fetch_api", "Fetch Data", class_="btn-primary", style="margin-top: 10px; background-color: #5567BB; color: white; width: 100%;")
+                
+                with ui.card():
+                    ui.h4("API Extraction Status", style="color: #5567BB;")
+                    @render.express()
+                    def api_status_message():
+                        if input.btn_fetch_api() == 0:
+                            ui.p("Enter a query and click 'Fetch Data' to begin. The data will automatically be standardized to the Web of Science format.")
+                        else:
+                            ui.p(f"Fetched data from {input.api_source()} for query: '{input.api_query()}'")
+                    
+                    @render.data_frame
+                    @reactive.event(input.btn_fetch_api)
+                    def api_preview_table():
+                        if not input.api_query():
+                            return pd.DataFrame([{"Message": "Please enter a search query."}])
+                        
+                        m = ui.modal(
+                            ui.div(
+                                ui.img(src="https://cisslaboral.laleynext.es/Img/loader-circle.gif", height="150px", style="display: block; margin: 0 auto;"),
+                                ui.h4(f"Fetching data from {input.api_source()}...", style="text-align: center;")
+                            ),
+                            easy_close=False, footer=None
+                        )
+                        ui.modal_show(m)
+                        
+                        try:
+                            # Run our ETL pipeline
+                            res_df = ETLPipeline.convert2df(
+                                source_data="API", 
+                                source_type=input.api_source(), 
+                                is_api=True, 
+                                query=input.api_query()
+                            )
+                            # Update global state
+                            df.set(res_df)
+                            
+                            # Serialize to CSV for physical file output
+                            csv_df = res_df.copy()
+                            for col in csv_df.columns:
+                                csv_df[col] = csv_df[col].apply(lambda x: ";".join(str(i) for i in x) if isinstance(x, list) else x)
+                            csv_df.to_csv("standardized_api_data.csv", index=False)
+                            
+                            reset_all_analyses()
+                            ui.modal_remove()
+                            ui.notification_show(f"Successfully extracted {len(res_df)} records! Saved to 'standardized_api_data.csv'.", type="message", duration=5)
+                            
+                            return res_df.head(20)
+                        except Exception as e:
+                            ui.modal_remove()
+                            ui.notification_show(f"Error fetching data: {str(e)}", type="error", duration=10)
+                            return pd.DataFrame([{"Error": str(e)}])
         with ui.nav_panel("None", value="collections"):
             ui.h3("🚧 Warning: Merge Collection is under construction 🚧")
 
@@ -2104,9 +2162,7 @@ def sources_production_placeholder():
                                         ui.p("Click the Run Analysis button to generate the sources' production over time visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_sources_production, _ = result
-                                return plot_sources_production
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget  
                             def show_sources_production():
@@ -2251,9 +2307,7 @@ def relevant_authors_placeholder():
                                         ui.p("Click the Run Analysis button to generate the most relevant authors visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_relevant_authors, _ = result
-                                return plot_relevant_authors
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget
                             def show_relevant_authors():
@@ -2399,9 +2453,7 @@ def local_cited_authors_placeholder():
                                         ui.p("Click the Run Analysis button to generate the most local cited authors visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_local_cited_authors, _ = result
-                                return plot_local_cited_authors
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget
                             def show_local_cited_authors():
@@ -2544,9 +2596,7 @@ def authors_production_placeholder():
                                         ui.p("Click the Run Analysis button to generate the authors' production over time visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_authors_production, _, _ = result
-                                return plot_authors_production
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget
                             def show_authors_production():
@@ -2861,9 +2911,7 @@ def authors_local_impact_placeholder():
                                         ui.p("Click the Run Analysis button to generate the authors' local impact visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_authors_local_impact, _ = result
-                                return plot_authors_local_impact
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget
                             def show_authors_local_impact():
@@ -3008,9 +3056,7 @@ def relevant_affiliations_placeholder():
                                         ui.p("Click the Run Analysis button to generate the most relevant affiliations visualization.", style="text-align: center; color: #666; font-size: 16px;"),
                                         style="height: 400px; display: flex; flex-direction: column; justify-content: center; align-items: center; border: 2px dashed #ddd; border-radius: 10px; margin: 20px;"
                                     )
-                                # Render the widget directly when result is available
-                                plot_relevant_affiliations, _ = result
-                                return plot_relevant_affiliations
+                                return None  # Hide placeholder when data is available
                             
                             @render_widget
                             def show_relevant_affiliations():
@@ -8185,99 +8231,101 @@ def update_plot_settings():
 
 # --- Sidebar Management ---
 @render.express()
-@reactive.event(input.start_button)
 def toggle_sidebar():
-    with ui.tags.div(id="sidebar_2", class_="custom-sidebar"):
-        with ui.accordion(id="sidebar_accordion_data", multiple=False, open=False):
-            # Info Section
-            with ui.accordion_panel("Biblioshiny", icon=ICONS["home_colored"]):
-                ui.input_action_button("go_about_2", "Biblioshiny", class_="sidebar-button", icon=ICONS["home"])
-            # Data Section
-            with ui.accordion_panel("Data", icon=ICONS["database_colored"]):
-                ui.input_action_button("go_import_2", "Import or Load", class_="sidebar-button", icon=ICONS["data"])
-                ui.input_action_button("go_api_2", "API", class_="sidebar-button", icon=ICONS["api"])
-                ui.input_action_button("go_collections_2", "Merge Collection", class_="sidebar-button", icon=ICONS["merge"])
-
-            # Filters Section
-            with ui.accordion_panel("Filters", icon=ICONS["filters_colored"]):
-                ui.input_action_button("go_filters", "Filters", class_="sidebar-button", icon=ICONS["filters"])
-
-            # Analysis Section
-            with ui.accordion_panel("Overview", icon=ICONS["play_colored"]):
-                ui.input_action_button("go_main", "Main Information", class_="sidebar-button", icon=ICONS["overview"])
-                ui.input_action_button("go_annual_scientific_production", "Annual Scientific Production", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-                ui.input_action_button("go_average_citations_per_year", "Average Citations per Year", class_="sidebar-button", icon=ICONS["average_citations_per_doc"])
-                ui.input_action_button("go_three_field_plot", "Three-Field Plot", class_="sidebar-button", icon=ICONS["overview"])
-            with ui.accordion_panel("Sources", icon=ICONS["sources_colored"]):
-                ui.input_action_button("go_most_relevant_sources", "Most Relevant Sources", class_="sidebar-button", icon=ICONS["book_open"] if "book_open" in ICONS else ICONS["sources"]),
-                ui.input_action_button("go_most_local_cited_sources", "Most Local Cited Sources", class_="sidebar-button", icon=ICONS["book"] if "book" in ICONS else ICONS["sources"]),
-                ui.input_action_button("go_bradfords_law", "Bradford's Law", class_="sidebar-button", icon=ICONS["annual_growth_rate"]),
-                ui.input_action_button("go_sources_local_impact", "Sources' Local Impact", class_="sidebar-button", icon=ICONS["star"] if "star" in ICONS else ICONS["sources"]),
-                ui.input_action_button("go_sources_production_over_time", "Sources' Production over Time", class_="sidebar-button", icon=ICONS["calendar"] if "calendar" in ICONS else ICONS["timespan"]),
-            with ui.accordion_panel("Authors", icon=ICONS["authors_colored"]):
-                # Authors Section
-                ui.span("Authors", style="color: gray;")
-                ui.input_action_button("go_most_relevant_authors", "Most Relevant Authors", class_="sidebar-button", icon=ICONS["authors"])
-                ui.input_action_button("go_most_local_cited_authors", "Most Local Cited Authors", class_="sidebar-button", icon=ICONS["authors_single_authored_docs"])
-                ui.input_action_button("go_authors_production_over_time", "Authors' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-                ui.input_action_button("go_lotkas_law", "Lotka's Law", class_="sidebar-button", icon=ICONS["overview"])
-                ui.input_action_button("go_authors_local_impact", "Authors' Local Impact", class_="sidebar-button", icon=ICONS["star"] if "star" in ICONS else ICONS["authors"])
-                # Affiliations Section
-                ui.span("Affiliations", style="color: gray;")
-                ui.input_action_button("go_most_relevant_affiliations", "Most Relevant Affiliations", class_="sidebar-button", icon=ICONS["database"])
-                ui.input_action_button("go_affiliations_production_over_time", "Affiliations' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-                # Countries Section
-                ui.span("Countries", style="color: gray;")
-                ui.input_action_button("go_corresponding_authors_countries", "Corresponding Author's Countries", class_="sidebar-button", icon=ICONS["international_co_authorship"])
-                ui.input_action_button("go_countries_scientific_production", "Countries' Scientific Production", class_="sidebar-button", icon=ICONS["international_co_authorship"])
-                ui.input_action_button("go_countries_production_over_time", "Countries' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-                ui.input_action_button("go_most_cited_countries", "Most Cited Countries", class_="sidebar-button", icon=ICONS["book"])
-            with ui.accordion_panel("Documents", icon=ICONS["documents_colored"]):
-                # Documents Section
-                ui.span("Documents", style="color: gray;")
-                ui.input_action_button("go_most_global_cited_documents", "Most Global Cited Documents", class_="sidebar-button", icon=ICONS["documents"])
-                ui.input_action_button("go_most_local_cited_documents", "Most Local Cited Documents", class_="sidebar-button", icon=ICONS["documents"])
-
-                # Cited References Section
-                ui.span("Cited References", style="color: gray;")
-                ui.input_action_button("go_most_local_cited_references", "Most Local Cited References", class_="sidebar-button", icon=ICONS["references"])
-                ui.input_action_button("go_references_spectroscopy", "References Spectroscopy", class_="sidebar-button", icon=ICONS["references"])
-
-                # Words Section
-                ui.span("Words", style="color: gray;")
-                ui.input_action_button("go_most_frequent_words", "Most Frequent Words", class_="sidebar-button", icon=ICONS["authors_keywords_de"])
-                ui.input_action_button("go_wordcloud", "WordCloud", class_="sidebar-button", icon=ICONS["authors_keywords_de"])
-                ui.input_action_button("go_treemap", "TreeMap", class_="sidebar-button", icon=ICONS["overview"])
-                ui.input_action_button("go_words_frequency_over_time", "Words' Frequency over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-                ui.input_action_button("go_trend_topics", "Trend Topics", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-
-            with ui.accordion_panel("Clustering", icon=ICONS["clustering_colored"]):
-                ui.input_action_button("go_clustering", "Clustering", class_="sidebar-button", icon=ICONS["clustering"])
-            
-            with ui.accordion_panel("Conceptual Structure", icon=ICONS["conceptual_structure_colored"]):
-                ui.span("Network Approach", style="color: gray;")
-                ui.input_action_button("go_cooccurrence_network", "Co-occurrence Network", class_="sidebar-button", icon=ICONS["clustering"])
-                ui.input_action_button("go_thematic_map", "Thematic Map", class_="sidebar-button", icon=ICONS["overview"])
-                ui.input_action_button("go_thematic_evolution", "Thematic Evolution", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-
-                ui.span("Factorial Approach", style="color: gray;")
-                ui.input_action_button("go_factorial_analysis", "Factorial Analysis", class_="sidebar-button", icon=ICONS["overview"])
-
-            with ui.accordion_panel("Intellectual Structure", icon=ICONS["intellectual_structure_colored"]):
-                ui.input_action_button("go_citation_network", "Citation Network", class_="sidebar-button", icon=ICONS["references"])
-                ui.input_action_button("historiograph", "Historiograph", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
-
-            with ui.accordion_panel("Social Structure", icon=ICONS["social_structure_colored"]):
-                ui.input_action_button("go_collaboration_network", "Collaboration Network", class_="sidebar-button", icon=ICONS["co_authors_per_doc"])
-                ui.input_action_button("go_countries_collaboration_network", "Countries Collaboration Network", class_="sidebar-button", icon=ICONS["international_co_authorship"])
-
-            with ui.accordion_panel("Report", icon=ICONS["report_colored"]):
-                ui.input_action_button("go_report", "Report", class_="sidebar-button", icon=ICONS["report"])
-            with ui.accordion_panel("Settings", icon=ICONS["settings_colored"]):
-                ui.input_action_button("go_settings", "Settings", class_="sidebar-button", icon=ICONS["settings"])
-
-        # --- Footer ---
-        # Use static positioning and margin-top to avoid overlap with accordion content
+    data = df.get()
+    if data is not None and not data.empty:
+        ui.tags.script("setTimeout(function() { if(typeof setSidebarState === 'function') setSidebarState(true); }, 50);")
+        with ui.tags.div(id="sidebar_2", class_="custom-sidebar"):
+            with ui.accordion(id="sidebar_accordion_data", multiple=False, open=False):
+                # Info Section
+                with ui.accordion_panel("Biblioshiny", icon=ICONS["home_colored"]):
+                    ui.input_action_button("go_about_2", "Biblioshiny", class_="sidebar-button", icon=ICONS["home"])
+                # Data Section
+                with ui.accordion_panel("Data", icon=ICONS["database_colored"]):
+                    ui.input_action_button("go_import_2", "Import or Load", class_="sidebar-button", icon=ICONS["data"])
+                    ui.input_action_button("go_api_2", "API", class_="sidebar-button", icon=ICONS["api"])
+                    ui.input_action_button("go_collections_2", "Merge Collection", class_="sidebar-button", icon=ICONS["merge"])
+    
+                # Filters Section
+                with ui.accordion_panel("Filters", icon=ICONS["filters_colored"]):
+                    ui.input_action_button("go_filters", "Filters", class_="sidebar-button", icon=ICONS["filters"])
+    
+                # Analysis Section
+                with ui.accordion_panel("Overview", icon=ICONS["play_colored"]):
+                    ui.input_action_button("go_main", "Main Information", class_="sidebar-button", icon=ICONS["overview"])
+                    ui.input_action_button("go_annual_scientific_production", "Annual Scientific Production", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+                    ui.input_action_button("go_average_citations_per_year", "Average Citations per Year", class_="sidebar-button", icon=ICONS["average_citations_per_doc"])
+                    ui.input_action_button("go_three_field_plot", "Three-Field Plot", class_="sidebar-button", icon=ICONS["overview"])
+                with ui.accordion_panel("Sources", icon=ICONS["sources_colored"]):
+                    ui.input_action_button("go_most_relevant_sources", "Most Relevant Sources", class_="sidebar-button", icon=ICONS["book_open"] if "book_open" in ICONS else ICONS["sources"]),
+                    ui.input_action_button("go_most_local_cited_sources", "Most Local Cited Sources", class_="sidebar-button", icon=ICONS["book"] if "book" in ICONS else ICONS["sources"]),
+                    ui.input_action_button("go_bradfords_law", "Bradford's Law", class_="sidebar-button", icon=ICONS["annual_growth_rate"]),
+                    ui.input_action_button("go_sources_local_impact", "Sources' Local Impact", class_="sidebar-button", icon=ICONS["star"] if "star" in ICONS else ICONS["sources"]),
+                    ui.input_action_button("go_sources_production_over_time", "Sources' Production over Time", class_="sidebar-button", icon=ICONS["calendar"] if "calendar" in ICONS else ICONS["timespan"]),
+                with ui.accordion_panel("Authors", icon=ICONS["authors_colored"]):
+                    # Authors Section
+                    ui.span("Authors", style="color: gray;")
+                    ui.input_action_button("go_most_relevant_authors", "Most Relevant Authors", class_="sidebar-button", icon=ICONS["authors"])
+                    ui.input_action_button("go_most_local_cited_authors", "Most Local Cited Authors", class_="sidebar-button", icon=ICONS["authors_single_authored_docs"])
+                    ui.input_action_button("go_authors_production_over_time", "Authors' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+                    ui.input_action_button("go_lotkas_law", "Lotka's Law", class_="sidebar-button", icon=ICONS["overview"])
+                    ui.input_action_button("go_authors_local_impact", "Authors' Local Impact", class_="sidebar-button", icon=ICONS["star"] if "star" in ICONS else ICONS["authors"])
+                    # Affiliations Section
+                    ui.span("Affiliations", style="color: gray;")
+                    ui.input_action_button("go_most_relevant_affiliations", "Most Relevant Affiliations", class_="sidebar-button", icon=ICONS["database"])
+                    ui.input_action_button("go_affiliations_production_over_time", "Affiliations' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+                    # Countries Section
+                    ui.span("Countries", style="color: gray;")
+                    ui.input_action_button("go_corresponding_authors_countries", "Corresponding Author's Countries", class_="sidebar-button", icon=ICONS["international_co_authorship"])
+                    ui.input_action_button("go_countries_scientific_production", "Countries' Scientific Production", class_="sidebar-button", icon=ICONS["international_co_authorship"])
+                    ui.input_action_button("go_countries_production_over_time", "Countries' Production over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+                    ui.input_action_button("go_most_cited_countries", "Most Cited Countries", class_="sidebar-button", icon=ICONS["book"])
+                with ui.accordion_panel("Documents", icon=ICONS["documents_colored"]):
+                    # Documents Section
+                    ui.span("Documents", style="color: gray;")
+                    ui.input_action_button("go_most_global_cited_documents", "Most Global Cited Documents", class_="sidebar-button", icon=ICONS["documents"])
+                    ui.input_action_button("go_most_local_cited_documents", "Most Local Cited Documents", class_="sidebar-button", icon=ICONS["documents"])
+    
+                    # Cited References Section
+                    ui.span("Cited References", style="color: gray;")
+                    ui.input_action_button("go_most_local_cited_references", "Most Local Cited References", class_="sidebar-button", icon=ICONS["references"])
+                    ui.input_action_button("go_references_spectroscopy", "References Spectroscopy", class_="sidebar-button", icon=ICONS["references"])
+    
+                    # Words Section
+                    ui.span("Words", style="color: gray;")
+                    ui.input_action_button("go_most_frequent_words", "Most Frequent Words", class_="sidebar-button", icon=ICONS["authors_keywords_de"])
+                    ui.input_action_button("go_wordcloud", "WordCloud", class_="sidebar-button", icon=ICONS["authors_keywords_de"])
+                    ui.input_action_button("go_treemap", "TreeMap", class_="sidebar-button", icon=ICONS["overview"])
+                    ui.input_action_button("go_words_frequency_over_time", "Words' Frequency over Time", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+                    ui.input_action_button("go_trend_topics", "Trend Topics", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+    
+                with ui.accordion_panel("Clustering", icon=ICONS["clustering_colored"]):
+                    ui.input_action_button("go_clustering", "Clustering", class_="sidebar-button", icon=ICONS["clustering"])
+                
+                with ui.accordion_panel("Conceptual Structure", icon=ICONS["conceptual_structure_colored"]):
+                    ui.span("Network Approach", style="color: gray;")
+                    ui.input_action_button("go_cooccurrence_network", "Co-occurrence Network", class_="sidebar-button", icon=ICONS["clustering"])
+                    ui.input_action_button("go_thematic_map", "Thematic Map", class_="sidebar-button", icon=ICONS["overview"])
+                    ui.input_action_button("go_thematic_evolution", "Thematic Evolution", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+    
+                    ui.span("Factorial Approach", style="color: gray;")
+                    ui.input_action_button("go_factorial_analysis", "Factorial Analysis", class_="sidebar-button", icon=ICONS["overview"])
+    
+                with ui.accordion_panel("Intellectual Structure", icon=ICONS["intellectual_structure_colored"]):
+                    ui.input_action_button("go_citation_network", "Citation Network", class_="sidebar-button", icon=ICONS["references"])
+                    ui.input_action_button("historiograph", "Historiograph", class_="sidebar-button", icon=ICONS["annual_growth_rate"])
+    
+                with ui.accordion_panel("Social Structure", icon=ICONS["social_structure_colored"]):
+                    ui.input_action_button("go_collaboration_network", "Collaboration Network", class_="sidebar-button", icon=ICONS["co_authors_per_doc"])
+                    ui.input_action_button("go_countries_collaboration_network", "Countries Collaboration Network", class_="sidebar-button", icon=ICONS["international_co_authorship"])
+    
+                with ui.accordion_panel("Report", icon=ICONS["report_colored"]):
+                    ui.input_action_button("go_report", "Report", class_="sidebar-button", icon=ICONS["report"])
+                with ui.accordion_panel("Settings", icon=ICONS["settings_colored"]):
+                    ui.input_action_button("go_settings", "Settings", class_="sidebar-button", icon=ICONS["settings"])
+    
+            # --- Footer ---
+            # Use static positioning and margin-top to avoid overlap with accordion content
         with ui.tags.footer(
             class_="custom-footer",
             style=(
@@ -8344,9 +8392,9 @@ def toggle_sidebar():
     });
     observer.observe(document.body, { childList: true, subtree: true });
 
-    // Show both sidebars when 'start_button' is clicked
+    // Show both sidebars when 'start_button' or 'btn_fetch_api' is clicked
     document.addEventListener("click", function(e) {
-        if (e.target && e.target.id === "start_button") {
+        if (e.target && (e.target.id === "start_button" || e.target.id === "btn_fetch_api")) {
             setSidebarState(true);
         }
     });
diff --git a/execution_log.txt b/execution_log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbfcb0a5c9634b2e1c780e0339ea35b9d4558b47
GIT binary patch
literal 1762
zcmchYPmdBo5XI|k;&&KNT$DKIUQ9dy17<gCM8Gx17%~fkYyyKafZ~T&f3KUK86+D$
zna(WLRn@Ov{psFcKVO<wWMj*$u&v!$ZlBrr&|LF7K=TFL(pfSz-;lPpKzidWxwWxo
z_8m<d|4V*7>O*^AZ*7NdV}}y?{M6jmc)Z|T!&a8s6m;=WG$E&0^YZ*H(yimO#yf+{
z2F(=q4QslkxFh0DS<0%gIlCq60HT=cjN)wB&CsRb>sJ-<vSQA_{c-e&?I|lln{iHj
zXLvVFvAXP+L|B1RxI8|>C9w{FA9JQ*@2F;w6DkzsfxTvzl>2ebHln`4@2aLB*oRV|
z)Nrq<e()c|DadW2S8~6UyB3%7kiG?<t_@2$VGe@6&a++1?S?t2d`6|*sbThN^2p(T
za+MoV?G;m`E=N>x&R^LhSL0AqO@=Dk$4hglu14(C!`$r({-)>>EUI{mUg?v3iBDuV
zoZAy2Fnxu*riVgYV!vXyaGvk!YK0LyS=Jr#a-zuFGwpDCfo2c3Cc@joEcKW(-5gD)
z<}`P+rPF;hLh{@Lqc&Lb(;SW*?#i7^YiwFh9^zH-X-0KdB8TTQ)kJ%qy36a5aR>f|
z^Yb^Nn)yH3_|9mqpSX4YHnfkZ2QpGOeBON`D5DU!fR{%QR!mhRUKZWgiOW65UneJ2
zX#>+4`|9+4H~Ep9o7M@M?xZGY?{Gc-jyMa~72Q&;^H;%RJv+gg!zHt?aPb|R@ZMFo
zJ>bL+O>gFgp808A@hbVgh6?z;Y&oxY?2h_x`J&@gL}a`sW4#+bme=@qPe>6H+~o61
zuakd!^i~!3=JWg{pTK?!1I_RZYsljN@sI6-Z2pb^#R-4iE>$^ovooPy`tOtS`RV>!
GcYgz$a}t^W

literal 0
HcmV?d00001

diff --git a/functions/get_data.py b/functions/get_data.py
index 16baed992..a1cb761db 100644
--- a/functions/get_data.py
+++ b/functions/get_data.py
@@ -42,21 +42,24 @@ def get_data(input, database, df, reset_callback=None):
             else:
                 # Process single file (original logic)
                 type = file[0]["name"]
-                json = biblio_json(file[0]["datapath"], source, type, author)
-                df.set(pd.read_json(StringIO(json)))
+                
+                # Base Level: Bypass the fragile legacy 'biblio_json' parser and directly use our ETL Pipeline!
+                clean_df = ETLPipeline.convert2df(source_data=file[0]["datapath"], source_type=source, is_api=False, original_filename=type)
+                df.set(clean_df)
+                
                 # Reset all analysis results when new dataset is loaded
                 if reset_callback:
                     reset_callback()
                 
                 if type.endswith(".zip"):
                     text = ui.p(
-                        f"{database}'s ZIP archive uploaded and extracted successfully! "
+                        f"{database}'s ZIP archive uploaded, extracted, and Standardized successfully! "
                         f"Multiple files have been processed and combined. "
                         f"The dataset contains {df.get().shape[0]} rows and {df.get().shape[1]} columns."
                     )
                 else:
                     text = ui.p(
-                        f"{database}'s file uploaded successfully! You can now proceed to analyze your data. "
+                        f"{database}'s file uploaded and Standardized successfully! You can now proceed to analyze your data. "
                         f"The dataset contains {df.get().shape[0]} rows and {df.get().shape[1]} columns."
                     )
         except Exception as e:
diff --git a/generate_notebook.py b/generate_notebook.py
new file mode 100644
index 000000000..04f19d98e
--- /dev/null
+++ b/generate_notebook.py
@@ -0,0 +1,91 @@
+import json
+
+notebook = {
+ 'cells': [
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '# ETL Pipeline Execution Evidence\n',
+    'This notebook demonstrates the execution of the custom ETL pipeline retrieving data from the OpenAlex API, standardizing it, validating it, and preparing it for the Bibliometrix dashboard.'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'import sys\n',
+    'import os\n',
+    'import pandas as pd\n',
+    '# Ensure local modules can be imported\n',
+    'sys.path.append(os.path.abspath("."))\n',
+    '\n',
+    'from www.services.etl import ETLPipeline'
+   ]
+  },
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '## Live Query Execution via API'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'query = "machine learning"\n',
+    'print(f"Executing live API query to OpenAlex for: {query}...")\n',
+    'df_standardized = ETLPipeline.convert2df(source_data="API", source_type="OpenAlex", is_api=True, query=query)\n',
+    'print(f"\\nSuccessfully retrieved and standardized {len(df_standardized)} records.")'
+   ]
+  },
+  {
+   'cell_type': 'markdown',
+   'metadata': {},
+   'source': [
+    '## Validation and Normalized Output\n',
+    'Displaying the first 5 normalized rows demonstrating standard Web of Science columns (e.g., UT, TI, CR, PY).'
+   ]
+  },
+  {
+   'cell_type': 'code',
+   'execution_count': None,
+   'metadata': {},
+   'outputs': [],
+   'source': [
+    'pd.set_option("display.max_columns", None)\n',
+    'df_standardized.head(5)'
+   ]
+  }
+ ],
+ 'metadata': {
+  'kernelspec': {
+   'display_name': 'Python 3',
+   'language': 'python',
+   'name': 'python3'
+  },
+  'language_info': {
+   'codemirror_mode': {
+    'name': 'ipython',
+    'version': 3
+   },
+   'file_extension': '.py',
+   'mimetype': 'text/x-python',
+   'name': 'python',
+   'nbconvert_exporter': 'python',
+   'pygments_lexer': 'ipython3',
+   'version': '3.12.0'
+  }
+ },
+ 'nbformat': 4,
+ 'nbformat_minor': 4
+}
+
+with open('ETL_Execution_Evidence.ipynb', 'w') as f:
+    json.dump(notebook, f, indent=1)
+print('Notebook created successfully.')
diff --git a/run.bat b/run.bat
new file mode 100644
index 000000000..ce6be75aa
--- /dev/null
+++ b/run.bat
@@ -0,0 +1,5 @@
+@echo off
+echo Starting Biblioshiny Dashboard...
+echo Your web browser will open automatically.
+python -m shiny run --launch-browser app.py
+pause
diff --git a/standardized_api_data.csv b/standardized_api_data.csv
new file mode 100644
index 000000000..8e0c38a96
--- /dev/null
+++ b/standardized_api_data.csv
@@ -0,0 +1,101 @@
+DB,UT,DI,PMID,TI,SO,JI,PY,DT,LA,TC,AU,AF,C1,RP,CR,DE,ID,AB,VL,IS,BP,EP,SR
+PUBMED,,,42221614,The Epi training kit pilot: an inclusive Spanish-language e-learning approach to epidemiology and data science in Latin America and the Caribbean.,Front Public Health,,2026,Journal Article,eng,0,Gómez-Bermeo L;Velasco-España JM;Buitrago-López A;Gómez-Millán G;Cucunubá ZM,,,,,,,,14,,1745984,,"L Gómez-Bermeo, 2026, Front Public Health"
+PUBMED,,,42122050,Integrative Multidimensional Machine Learning Models for Stroke Prognosis: Age-Stratified and History Engineered Perspectives.,Diagnostics (Basel),,2026,Journal Article,eng,0,Lee G;Kwon S;Shin SH;Kim C;Yu JY,,,,,,,,16,9,,,"G Lee, 2026, Diagnostics (Basel)"
+PUBMED,,,42057274,A Generalized Outlier Method with an Automated Test Protocol Applied to Proficiency Testing Schemes: Utilizing Monte Carlo Simulation for Generating Critical Values for the Extended Grubbs Statistic.,J AOAC Int,,2026,Journal Article,eng,0,Nilsson T;Langer Sigaard S,,,,,,,,,,,,"T Nilsson, 2026, J AOAC Int"
+PUBMED,,,42038418,"Sparse regression, classification, and microbial network estimation in QIIME 2 with q2-classo and q2-gglasso.",ArXiv,,2026,Journal Article,eng,0,Vlasovets O;Schaipp F;Simpson L;Bolyen E;Caporaso JG;Müller CL,,,,,,,,,,,,"O Vlasovets, 2026, ArXiv"
+PUBMED,,,41944570,Score Matching for Differential Abundance Testing of Compositional High-Throughput Sequencing Data.,Stat Med,,2026,Journal Article,eng,0,Ostner J;Li H;Müller CL,,,,,,,,45,8-9,e70534,,"J Ostner, 2026, Stat Med"
+PUBMED,,,41883750,Generative AI for climate governance and acceptability-constrained policy design.,NPJ Clim Action,,2026,Journal Article,eng,0,Manivannan A;Spaiser V;Cann TJB;Evans J;Everall JP;Falkenberg M;Garcia D;Guo W;Herzog R;Otto IM;Oswald Y;Pagan N;Pellert M;Pilgrim C;Rodriguez-Pardo C;Sen I;Vezhnevets AS,,,,,,,,5,1,37,,"A Manivannan, 2026, NPJ Clim Action"
+PUBMED,,,41880979,Evaluating quality of care and patient safety with ICD-11: Opportunities for the French National Health Data System (SNDS).,J Epidemiol Popul Health,,2026,Journal Article,eng,0,Boussat B;Boyer L;Quantin C;Southern DA;Ghali WA;Guéant S;Danjou F;Mokaddem Y;Mercier G;Duclos A;“REDSIAM Quality Group”,,,,,,,,74,2,203372,,"B Boussat, 2026, J Epidemiol Popul Health"
+PUBMED,,,41790126,Stability trends of near- and equiatomic (n ⋍ m) Co(n)Mo(m) and Mo(n)Co(m) (n + m = 2-15) subnanoalloys from DFT and K-means clustering.,Phys Chem Chem Phys,,2026,Journal Article,eng,0,de Araújo OGS;Alves AS;Dos Santos Costa M;Andriani KF,,,,,,,,28,12,7494-7503,,"OGS de Araújo, 2026, Phys Chem Chem Phys"
+PUBMED,,,41760682,FAIR m-BIDS: Advancing brain data utilization through multimodal and FAIR principles.,Sci Data,,2026,Journal Article,eng,0,Mirhosseini SM;Naseri H;Siahlou B;Panahi Arasi M;Monazami Eslami S;Safaei AA,,,,,,,,13,1,,,"SM Mirhosseini, 2026, Sci Data"
+PUBMED,,,41758512,"Clinical Pharmacists, Medications, and Contingency Management for Targeting Smoking in HIV Clinics: A Randomized Clinical Trial.",JAMA Netw Open,,2026,Journal Article,eng,0,Edelman EJ;Deng Y;Dziura J;Nahum-Shani I;Weiss JM;Aoun-Barakat L;Bold KW;Harsono D;Mistler C;Payne E;Aiudi S;Sigel KM;Yager JE;Ledgerwood DM;Bernstein SL,,,,,,,,9,2,e2560593,,"EJ Edelman, 2026, JAMA Netw Open"
+PUBMED,,,41700319,Accelerating Catalyst Materials Discovery With Large Artificial Intelligence Models.,Angew Chem Int Ed Engl,,2026,Journal Article,eng,0,Zhang D;Chen Y;Liu C;Liu Y;Xin H;Peng J;Ou P;Li H,,,,,,,,65,16,e26150,,"D Zhang, 2026, Angew Chem Int Ed Engl"
+PUBMED,,,41659587,Epithelial Reprogramming and Transition during Pulmonary Bioengineering.,bioRxiv,,2026,Journal Article,eng,0,Mizoguchi S;Lee V;Kim H;Edelstein SE;Wang N;Gracia MT;Danelski C;Haynes C;Rivero R;Stitelman D;Obata T;Greaney AM;Tsuchiya T;Kyriakides TR;Kaminski N;Raredon MSB,,,,,,,,,,,,"S Mizoguchi, 2026, bioRxiv"
+PUBMED,,,41572614,Extension of Bootstrap MARS With Group LASSO for Heterogeneous Treatment Effect Estimation.,Stat Med,,2026,Journal Article,eng,0,He G;Wan K;Shimokawa T;Maruo K,,,,,,,,45,1-2,e70370,,"G He, 2026, Stat Med"
+PUBMED,,,41526603,Enhancing healthcare workers' safety and well-being through a comprehensive qualitative analysis across hospital settings.,Sci Rep,,2026,Journal Article,eng,0,Foglia E;Ferrario L;Garagiola E,,,,,,,,16,1,5084,,"E Foglia, 2026, Sci Rep"
+PUBMED,,,41428500,ARKbase: Antimicrobial Resistance Knowledgebase1.0.,Nucleic Acids Res,,2026,Journal Article,eng,0,Gambhir S;Pandey S;Bajetha H;Kaur J;Das A;Pranavathiyani G;Aggarwal R;Maity U;Dange S;Singh V;Zarkar M;Sankhdher R;Singh B;Seth S;Bhardwaj A,,,,,,,,54,D1,D703-D714,,"S Gambhir, 2026, Nucleic Acids Res"
+PUBMED,,,41315831,Ad hoc bandwidth requests and power conservation in 5G wireless networks with tiny cells.,Sci Rep,,2025,Journal Article,eng,0,Rajesh A;Ravikumar CV;Sulthana SF;Kim TH;Shankar T;Srinivasulu A;Altameem T,,,,,,,,15,1,42792,,"A Rajesh, 2025, Sci Rep"
+PUBMED,,,41287046,Nipah Virus Inhibitor Knowledgebase (NVIK): a combined evidence approach to prioritise small molecule inhibitors.,J Cheminform,,2025,Journal Article,eng,0,Singh B;Kumari N;Upadhyay A;Pahuja B;Covernton E;Kalia K;Tuteja K;Paul PR;Kumar R;Zarkar MS;Bhardwaj A,,,,,,,,17,1,174,,"B Singh, 2025, J Cheminform"
+PUBMED,,,41274086,Polyp image segmentation based on parallel dilated convolution and dual attention mechanisms.,Neural Netw,,2026,Journal Article,eng,0,Chen S;Chen K;Wang C;Zhou Z;Wen S;Zhu T;Wu M,,,,,,,,195,,108282,,"S Chen, 2026, Neural Netw"
+PUBMED,,,41241336,"Neuroprotection by post-stroke administration of the slow-releasing hydrogen sulfide (H(2)S) donor AP39: Long-Term functional, MRI, and molecular evidence in a rodent stroke model.",Eur J Pharmacol,,2025,Journal Article,eng,0,Bartosz P;Weronika K;Alicja S;Jakub J;Katarzyna PP;Małgorzata S;Monika M;Zuzanna G;Patrycja R;Eugene K;Michel M;Roberta T;Matthew W;Lucyna PC;Joanna P;Bogusława B,,,,,,,,1008,,178331,,"P Bartosz, 2025, Eur J Pharmacol"
+PUBMED,,,41230453,State-of-Art in Studying the Public Health Effects of Heat: A Literature Review.,Glob Chall,,2025,Journal Article,eng,0,Gianquintieri L;Caiani EG,,,,,,,,9,11,e00381,,"L Gianquintieri, 2025, Glob Chall"
+PUBMED,,,41219714,Study protocol for a randomized controlled trial assessing clinical efficacy of digital cognitive rehabilitation for preclinical and mild clinical stages of alzheimer's disease continuum: the MI-RICORDO project.,BMC Psychiatry,,2025,Journal Article,eng,0,Blasi V;Isernia S;Rossetto F;Pagliari C;Borgnis F;Pirastru A;Marzulli M;Foglia E;Garagiola E;Baglio F,,,,,,,,25,1,1075,,"V Blasi, 2025, BMC Psychiatry"
+PUBMED,,,41116350,Distribution of singular values in large sample cross-covariance matrices.,Phys Rev E,,2025,Journal Article,eng,0,Swain A;Ridout SA;Nemenman I,,,,,,,,112,3-2,035312,,"A Swain, 2025, Phys Rev E"
+PUBMED,,,41092974,CAT-GxD: Centralized access to gene expression datasets.,Anaerobe,,2025,Journal Article,eng,0,Roxas BAP;Roxas JL;Guo JS;LeBauer DS;McCarthy F;Vedantam G;Viswanathan VK,,,,,,,,96,,103005,,"BAP Roxas, 2025, Anaerobe"
+PUBMED,,,41049017,Using Choice and Utility Value to Promote Interest: Stimulating Situational Interest in a Lesson and Fostering the Development of Interest in Statistics.,J Educ Psychol,,2025,Journal Article,eng,0,Asher MW;Harackiewicz JM,,,,,,,,117,4,647-662,,"MW Asher, 2025, J Educ Psychol"
+PUBMED,,,41038717,Reassessing data management in increasingly complex phenotypic datasets.,Trends Plant Sci,,2026,Journal Article,eng,0,Pommier C;Alic I;Cabrera-Bosquet L;Draye X;Neveu P;Reif JC;Robbins KR;Krajewski P;Tardieu F,,,,,,,,31,5,543-554,,"C Pommier, 2026, Trends Plant Sci"
+PUBMED,,,41036253,"Privacy-, linguistic-, and information-preserving synthesis of clinical documentation through generative agents.",Front Artif Intell,,2025,Journal Article,eng,0,van Velzen M;van der Willigen RF;de Beer VJ;de Graaf-Waar HI;Janssen ERC;van Leeuwen S;van der Willigen MF;van der Willigen MJ;Renardus G;El Maaroufi R;Satimin SJ;Hartog LM;Hulsen T;van Meeteren NLU;Scheper MC,,,,,,,,8,,1644084,,"M van Velzen, 2025, Front Artif Intell"
+PUBMED,,,41033015,Does BMI influence AI and human reader lung nodule detection in low-dose chest CT?,Eur J Radiol,,2025,Journal Article,eng,0,Sourlos N;van Tuinen M;Sidorenkov G;de Jonge G;Schalekamp S;Pelgrim GJ;Greuter M;Rook M;Prokop M;van Ooijen P;Vliegenthart R,,,,,,,,193,,112453,,"N Sourlos, 2025, Eur J Radiol"
+PUBMED,,,41023760,"Sacroiliac joint involvement in psoriatic arthritis - MRI, radiographic and clinical findings in 581 European routine care patients.",Arthritis Res Ther,,2025,Journal Article,eng,0,Vladimirova N;Hadsbjerg AE;Krabbe S;Ciurea A;Bubová K;Gregová M;Nissen MJ;Möller B;Micheroli R;Pedersen SJ;Závada J;Snoj Z;Pintaric K;Gudbjornsson B;Rotar Z;Eshed I;Sudol-Szopinska I;Gosvig K;Diekhoff T;Lambert RG;de Hooge M;Donzallaz M;Bernatschek A;Hetland ML;Ørnbjerg LM;Østergaard M,,,,,,,,27,1,185,,"N Vladimirova, 2025, Arthritis Res Ther"
+PUBMED,,,41015781,Early Prediction and Risk Analysis Using Hybrid Deep Learning Techniques in Multimodal Biomedical Image.,Dev Neurobiol,,2025,Journal Article,eng,0,Vylala A;Plakkottu Radhakrishnan B;Balakrishnan Kadan A,,,,,,,,85,4,e23001,,"A Vylala, 2025, Dev Neurobiol"
+PUBMED,,,40656660,Growing Data science Research in Africa to Stimulate Progress (GRASP) program: Rationale and overview.,Equity Neurosci,,2025,Journal Article,eng,0,Uvere E;Kumuthini J;Fatumo S;Taiwo J;Akinyemi R;Ogunniyi A;Ogbole G;Aribisala B;Sarfo F;Jegede A;Akinyemi J;Vedanthan R;Okekunle A;Lackland D;Ovbiagele B;Owolabi M,,,,,,,,1,1,,,"E Uvere, 2025, Equity Neurosci"
+PUBMED,,,40648628,The Impact of Automation and Digitalization in Hospital Medication Management: Economic Analysis in the European Countries.,Healthcare (Basel),,2025,Journal Article,eng,0,Orsini FF;Bellavia D;Schettini F;Foglia E,,,,,,,,13,13,,,"FF Orsini, 2025, Healthcare (Basel)"
+PUBMED,,,40493067,Changes in numbers needed to treat and hospital care expenditures of optimized indications for primary prevention implantable cardioverter defibrillators: a scenario analysis.,Clin Res Cardiol,,2026,Journal Article,eng,0,van Barreveld M;van Dessel PFHM;Buskens E;Boersma LVA;Delnoy PPHM;Tuinenburg AE;Theuns DAMJ;van der Voort PH;Kimman GP;Verstraelen TE;Zwinderman AH;Wilde AAM;Dijkgraaf MGW,,,,,,,,115,4,576-589,,"M van Barreveld, 2026, Clin Res Cardiol"
+PUBMED,,,40475002,Cost-effectiveness of tuberculosis infection screening at first reception into English prisons: a model-based analysis.,EClinicalMedicine,,2025,Journal Article,eng,0,Mafirakureva N;Hunter R;Ferraro CF;Willner S;Finnie T;Hayward A;Lee A;Roy A;Edge C;Dodd PJ,,,,,,,,83,,103245,,"N Mafirakureva, 2025, EClinicalMedicine"
+PUBMED,,,40380711,Controlled Intervention Study on Effects of an AI-Based App to Support Wound Care: First Results.,Stud Health Technol Inform,,2025,Journal Article,eng,0,Pinnekamp H;Priester V;Brehmer A;Fischer U,,,,,,,,327,,1295-1296,,"H Pinnekamp, 2025, Stud Health Technol Inform"
+PUBMED,,,40251825,Comparison of the effectiveness of visual acuity measurements for amblyopia screening at the age of 36 and 45 months and difference in severity of amblyopia detected.,Acta Ophthalmol,,2025,Journal Article,eng,0,Telleman MAJ;Sloot F;Benjamins SJ;Loudon SE;Spek B;Simonsz HJ;Orthoptic Research Group,,,,,,,,103,7,799-811,,"MAJ Telleman, 2025, Acta Ophthalmol"
+PUBMED,,,40027177,Ottimizzazione nel trattamento del soggetto con HIV: analisi di impatto economico e organizzativo di Bictegravir/Emtricitabina/Tenofovir Alafenamide.,Glob Reg Health Technol Assess,,2025,Journal Article,ita,0,Ferrario L;Menzaghi B;Rizzardini G;Roccia A;Garagiola E;Bellavia D;Schettini F;Foglia E,,,,,,,,12,,49-60,,"L Ferrario, 2025, Glob Reg Health Technol Assess"
+PUBMED,,,40023406,Cytotoxic T-lymphocyte associated protein 4 inhibitors are associated with a higher risk of cardiovascular events than programmed cell death protein 1 inhibitors in patients with melanoma.,J Am Acad Dermatol,,2025,Journal Article,eng,0,Chang CC;Lo SW;Chang HC;Song J;Chang YC;Yang K;Chi KY;Chang Y;Chiang CH;Chiang CH,,,,,,,,93,1,202-204,,"CC Chang, 2025, J Am Acad Dermatol"
+PUBMED,,,39951916,Management of patients with active relapsing-remitting or secondary progressive multiple sclerosis: A French real-world study based on claims data linked to a phase IV study.,Mult Scler Relat Disord,,2025,Journal Article,eng,0,Moisset X;Mercier G;Belhassen M;Deygas F;Civet A;Pau D;Rolland L;Bourel G;Larrieu S;Marchal C,,,,,,,,95,,106305,,"X Moisset, 2025, Mult Scler Relat Disord"
+PUBMED,,,39853521,Correction: From Clinical to Non-clinical Outcomes in the Treatment of HIV: An Economic and Organizational Impact Assessment.,Pharmacoecon Open,,2025,Published Erratum,eng,0,Ferrario L;Menzaghi B;Rizzardini G;Roccia A;Garagiola E;Bellavia D;Schettini F;Foglia E,,,,,,,,9,2,327,,"L Ferrario, 2025, Pharmacoecon Open"
+PUBMED,,,39779279,Deciphering risk factors for severe postherpetic neuralgia in patients with herpes zoster: an interpretable machine learning approach.,Reg Anesth Pain Med,,2026,Journal Article,eng,0,Park SJ;Han J;Choi JB;Min SK;Park J;Choi S,,,,,,,,51,4,429-436,,"SJ Park, 2026, Reg Anesth Pain Med"
+PUBMED,,,39734790,Using Data-Science Approaches to Unravel Insights for Enhanced Transport of Lithium Ions in Single-Ion Conducting Polymer Electrolytes.,Chem Mater,,2024,Journal Article,eng,0,Zhu Q;Liu Y;Shepard LB;Bhattacharya D;Sinnott SB;Reinhart WF;Cooper VR;Kumar R,,,,,,,,36,24,11934-11946,,"Q Zhu, 2024, Chem Mater"
+PUBMED,,,39713439,Score matching for differential abundance testing of compositional high-throughput sequencing data.,bioRxiv,,2024,Journal Article,eng,0,Ostner J;Li H;Müller CL,,,,,,,,,,,,"J Ostner, 2024, bioRxiv"
+PUBMED,,,39709670,Glucagon-like Peptide-1 Agonists Reduce Cardiovascular Events in Cancer Patients on Immune Checkpoint Inhibitors.,Eur J Cancer,,2025,Journal Article,eng,0,Chiang CH;Song J;Chi KY;Chang YC;Xanthavanij N;Chang Y;Hsia YP;Chiang CH;Ghamari A;Reynolds KL;Lin S;Xu XH;Neilan TG,,,,,,,,216,,115170,,"CH Chiang, 2025, Eur J Cancer"
+PUBMED,,,39632680,Emergent properties of the lysine methylome reveal regulatory roles via protein interactions and histone mimicry.,Epigenomics,,2025,Journal Article,eng,0,Pollin G;Chi YI;Mathison AJ;Zimmermann MT;Lomberk G;Urrutia R,,,,,,,,17,1,5-20,,"G Pollin, 2025, Epigenomics"
+PUBMED,,,39532817,From Clinical to Non-clinical Outcomes in the Treatment of HIV: An Economic and Organizational Impact Assessment.,Pharmacoecon Open,,2025,Journal Article,eng,0,Ferrario L;Menzaghi B;Rizzardini G;Roccia A;Garagiola E;Bellavia D;Schettini F;Foglia E,,,,,,,,9,2,313-326,,"L Ferrario, 2025, Pharmacoecon Open"
+PUBMED,,,39528391,Using 'Situation-Background-Assessment-Recommendation' Method in Palliative Care to Enhance Handover Quality and Nursing Practice: A Mix Method Study.,J Clin Nurs,,2025,Journal Article,eng,0,Pinto F;Roberto P;Ferrario L;Marotta L;Montani D;Auletta G;Zoppini L;Foglia E,,,,,,,,34,1,117-127,,"F Pinto, 2025, J Clin Nurs"
+PUBMED,,,39526992,Performance evaluation of the introduction of full sample traceability system within the specimen collection process.,Clin Chem Lab Med,,2025,Journal Article,eng,0,Foglia E;Garagiola E;Ferrario L;Plebani M,,,,,,,,63,4,723-733,,"E Foglia, 2025, Clin Chem Lab Med"
+PUBMED,,,39518081,From Real-World Data to Causally Interpretable Models: A Bayesian Network to Predict Cardiovascular Diseases in Adolescents and Young Adults with Breast Cancer.,Cancers (Basel),,2024,Journal Article,eng,0,Bernasconi A;Zanga A;Lucas PJF;Scutari M;Di Cosimo S;De Santis MC;La Rocca E;Baili P;Cavallo I;Verderio P;Ciniselli CM;Pizzamiglio S;Blanda A;Perego P;Vallerio P;Stella F;Trama A;The Ada Working Group,,,,,,,,16,21,,,"A Bernasconi, 2024, Cancers (Basel)"
+PUBMED,,,39450759,Effectiveness of Omega-3 Fatty Acids Versus Placebo in Subjects at Ultra-High Risk for Psychosis: The PURPOSE Randomized Clinical Trial.,Schizophr Bull,,2025,Journal Article,eng,0,Winter-van Rossum I;Slot MIE;van Hell HH;Bossong MG;Berger G;Aschauer H;Maat A;Walitza S;Lavan O;Baeza I;Dolz M;Monducci E;Fiori Nastro P;Kroken RA;Lawrie SM;Díaz-Caneja CM;Renner T;Schlögelhofer M;Scharinger C;Spalletta G;Banaj N;Otero S;Schipper M;Kwakkel DB;PURPOSE Study Group;Kahn RS,,,,,,,,51,4,1082-1091,,"I Winter-van Rossum, 2025, Schizophr Bull"
+PUBMED,,,39435882,Elucidation of DPP-4 involvement in systemic distribution and renal reabsorption of linagliptin by PBPK modeling with a cluster Gauss-Newton method.,Clin Transl Sci,,2024,Journal Article,eng,0,Nakamura R;Yoshikado T;Aoki Y;Sugiyama Y;Chiba K,,,,,,,,17,10,e70047,,"R Nakamura, 2024, Clin Transl Sci"
+PUBMED,,,39401960,Exploration of the relationship between general health-related problems and subclinical coronary artery disease: a cross-sectional study in a general population.,BMJ Open,,2024,Journal Article,eng,0,Koopman MY;van der Ende MY;Reijnders JJW;Willemsen RTA;van Bruggen R;Gratama JWC;Kietselaer BLJH;van der Harst P;Vliegenthart R,,,,,,,,14,10,e079835,,"MY Koopman, 2024, BMJ Open"
+PUBMED,,,39400639,Recommendations for the creation of benchmark datasets for reproducible artificial intelligence in radiology.,Insights Imaging,,2024,Journal Article,eng,0,Sourlos N;Vliegenthart R;Santinha J;Klontzas ME;Cuocolo R;Huisman M;van Ooijen P,,,,,,,,15,1,248,,"N Sourlos, 2024, Insights Imaging"
+PUBMED,,,39395985,Cross-cultural adaptation and validation of the Hospital Survey on Patient Safety questionnaire for a Chilean hospital.,BMC Nurs,,2024,Journal Article,eng,0,Hurtado-Arenas P;Guevara MR;González-Chordá VM,,,,,,,,23,1,748,,"P Hurtado-Arenas, 2024, BMC Nurs"
+PUBMED,,,39332590,The incidence and risk of cardiovascular events associated with pembrolizumab in patients with breast cancer.,Cancer Lett,,2024,Journal Article,eng,0,Chiang CH;Xu XH;Song J;Xanthavanij N;Chi KY;Chang YC;Chang Y;Hsiao CL;Hsia YP;Chiang CH;Lin S,,,,,,,,611,,217277,,"CH Chiang, 2024, Cancer Lett"
+PUBMED,,,39319287,Automated Drugs Dispensing Systems in Hospitals: a Health Technology Assessment (HTA) Study Across Six European Countries.,Clinicoecon Outcomes Res,,2024,Journal Article,eng,0,Foglia E;Asperti F;Antonacci G;Jani YH;Garagiola E;Bellavia D;Ferrario L,,,,,,,,16,,679-696,,"E Foglia, 2024, Clinicoecon Outcomes Res"
+PUBMED,,,39232868,"Association between the antibiotics use and recurrence in patients with resected colorectal cancer: EVADER-1, a nation-wide pharmaco-epidemiologic study.",Dig Liver Dis,,2025,Journal Article,eng,0,Hilmi M;Khati I;Turpin A;Andremont A;Burdet C;Grall N;Vidal J;Bousquet PJ;Rousseau B;Bihan-Benjamin CL,,,,,,,,57,1,89-96,,"M Hilmi, 2025, Dig Liver Dis"
+PUBMED,,,39168785,Clinical performance of Bladder EpiCheck™ versus voided urine cytology for detecting recurrence of nonmuscle invasive bladder cancer: Systematic review and meta-analysis.,Urol Oncol,,2024,Journal Article,eng,0,Chiang CH;Chang YC;Peng CY;Wang SS;Jaroenlapnopparat A;Wang JCH;Jou CL;Tang PU;Hsia YP;Chiang CH;Chiang CH,,,,,,,,42,12,449.e21-449.e28,,"CH Chiang, 2024, Urol Oncol"
+PUBMED,,,39150589,Correction: Effect of emphysema on AI software and human reader performance in lung nodule detection from low-dose chest CT.,Eur Radiol Exp,,2024,Published Erratum,eng,0,Sourlos N;Pelgrim G;Wisselink HJ;Yang X;de Jonge G;Rook M;Prokop M;Sidorenkov G;van Tuinen M;Vliegenthart R;van Ooijen PMA,,,,,,,,8,1,94,,"N Sourlos, 2024, Eur Radiol Exp"
+PUBMED,,,39145403,Metadata for Data dIscoverability aNd Study rEplicability in obseRVAtional Studies (MINERVA): Lessons Learnt From the MINERVA Project in Europe.,Pharmacoepidemiol Drug Saf,,2024,Journal Article,eng,0,Gini R;Pajouheshnia R;Gutierrez L;Swertz MA;Hyde E;Sturkenboom M;Arana A;Franzoni C;Ehrenstein V;Roberto G;Gil M;Maciá MA;Schäfer W;Haug U;Thurin NH;Lassalle R;Droz-Perroteau C;Zaccagnino S;Busto MP;Middelkoop B;Gembert K;Sanchez-Saez F;Rodriguez-Bernal C;Sanfélix-Gimeno G;Hurtado I;Acosta MB;Poblador-Plou B;Carmona-Pírez J;Gimeno-Miguel A;Prados-Torres A;Schultze A;Jansen E;Herings R;Kuiper J;Locatelli I;Jazbar J;Žerovnik Š;Kos M;Smit S;Lind S;Metspalu A;Simou S;Hedenmalm K;Cochino A;Alcini P;Kurz X;Perez-Gutthann S,,,,,,,,33,8,e5884,,"R Gini, 2024, Pharmacoepidemiol Drug Saf"
+PUBMED,,,39058407,Accessing a Diverse Set of Functional Red-Light Photoswitches by Selective Copper-Catalyzed Indigo N-Arylation.,J Am Chem Soc,,2024,Journal Article,eng,0,Jaiswal AK;Saha P;Jiang J;Suzuki K;Jasny A;Schmidt BM;Maeda S;Hecht S;Huang CD,,,,,,,,146,31,21367-21376,,"AK Jaiswal, 2024, J Am Chem Soc"
+PUBMED,,,39043728,Author Correction: IRE1α-XBP1s pathway promotes prostate cancer by activating c-MYC signaling.,Nat Commun,,2024,Published Erratum,eng,0,Sheng X;Nenseth HZ;Qu S;Kuzu OF;Frahnow T;Simon L;Greene S;Zeng Q;Fazli L;Rennie PS;Mills IG;Danielsen H;Theis F;Patterson JB;Jin Y;Saatcioglu F,,,,,,,,15,1,6190,,"X Sheng, 2024, Nat Commun"
+PUBMED,,,39018014,The graded multidimensional geometry of phenotypic variation and progression in neurodegenerative syndromes.,Brain,,2025,Journal Article,eng,0,Ramanan S;Akarca D;Henderson SK;Rouse MA;Allinson K;Patterson K;Rowe JB;Lambon Ralph MA,,,,,,,,148,2,448-466,,"S Ramanan, 2025, Brain"
+PUBMED,,,38976293,Does high hepatic bioavailability enhance the effect of oral compared to subcutaneous glucocorticoids?,Clin Exp Rheumatol,,2024,Journal Article,eng,0,van Geel EH;Boers M;Hartman L;Smulders YM,,,,,,,,42,11,2265-2267,,"EH van Geel, 2024, Clin Exp Rheumatol"
+PUBMED,,,38969158,Periodontitis is an immune-related adverse event associated with immune checkpoint inhibitors: A multi-center cohort study.,Cancer Lett,,2024,Journal Article,eng,0,Ma KS;Chiang CH;Chen ST;Dinh Y;Chiang CH;Van Dyke TE;Sullivan R;Ananthakrishnan AN;Hsia YP;Peng CM;Chiang CH,,,,,,,,598,,217100,,"KS Ma, 2024, Cancer Lett"
+PUBMED,,,38777744,Gradient boosted regression as a tool to reveal key drivers of temporal dynamics in a synthetic yeast community.,FEMS Microbiol Ecol,,2024,Journal Article,eng,0,Conacher CG;Watson BW;Bauer FF,,,,,,,,100,7,,,"CG Conacher, 2024, FEMS Microbiol Ecol"
+PUBMED,,,38764066,Effect of emphysema on AI software and human reader performance in lung nodule detection from low-dose chest CT.,Eur Radiol Exp,,2024,Journal Article,eng,0,Sourlos N;Pelgrim G;Wisselink HJ;Yang X;de Jonge G;Rook M;Prokop M;Sidorenkov G;van Tuinen M;Vliegenthart R;van Ooijen PMA,,,,,,,,8,1,63,,"N Sourlos, 2024, Eur Radiol Exp"
+PUBMED,,,38672974,"Multidimensional Impact of Dupilumab on Chronic Rhinosinusitis with Nasal Polyps: A Complete Health Technology Assessment of Clinical, Economic, and Non-Clinical Domains.",J Pers Med,,2024,Journal Article,eng,0,La Mantia I;Ottaviano G;Ragusa M;Trimarchi M;Foglia E;Schettini F;Bellavia D;Cantone E,,,,,,,,14,4,,,"I La Mantia, 2024, J Pers Med"
+PUBMED,,,38671166,Thromboprophylaxis for outpatients with COVID-19: a Systematic Review and Meta-analysis.,J Thromb Thrombolysis,,2024,Systematic Review,eng,0,Chiang CH;Ahmed O;Liu W;See XY;Chang YC;Peng CY;Wang Z;Chiang CH;Hsia YP;Chiang CH,,,,,,,,57,5,784-787,,"CH Chiang, 2024, J Thromb Thrombolysis"
+PUBMED,,,38564640,"OASIS: An interpretable, finite-sample valid alternative to Pearson's X(2) for scientific discovery.",Proc Natl Acad Sci U S A,,2024,Journal Article,eng,0,Baharav TZ;Tse D;Salzman J,,,,,,,,121,15,e2304671121,,"TZ Baharav, 2024, Proc Natl Acad Sci U S A"
+PUBMED,,,38422606,Complex interventions in frail older adults.,Arch Gerontol Geriatr,,2024,Letter,eng,0,Van der Elst MCJ;Schoenmakers B;Schols JMGA;De Witte N;De Lepeleire J,,,,,,,,122,,105372,,"MCJ Van der Elst, 2024, Arch Gerontol Geriatr"
+PUBMED,,,38347856,Leisure time physical activity is associated with improved diastolic heart function and is partly mediated by unsupervised quantified metabolic health.,BMJ Open Sport Exerc Med,,2024,Journal Article,eng,0,Klarenberg H;van der Velde JH;Peeters CF;Dekkers IA;de Mutsert R;Jukema JW;Rosendaal FR;Leiner T;Froeling M;Jorstad H;Boekholdt SM;Strijkers GJ;Lamb HJ,,,,,,,,10,1,e001778,,"H Klarenberg, 2024, BMJ Open Sport Exerc Med"
+PUBMED,,,38318999,Barbed sutures and skin adhesives improve wound closure in hip and knee arthroplasty.,Knee Surg Sports Traumatol Arthrosc,,2024,Journal Article,eng,0,Romanini E;Zanoli GA;Ascione T;Balato G;Baldini A;Foglia E;Pellegrini AV;Verde F;Zaffagnini S,,,,,,,,32,2,303-310,,"E Romanini, 2024, Knee Surg Sports Traumatol Arthrosc"
+PUBMED,,,38301750,CLARUS: An interactive explainable AI platform for manual counterfactuals in graph neural networks.,J Biomed Inform,,2024,Journal Article,eng,0,Metsch JM;Saranti A;Angerschmid A;Pfeifer B;Klemt V;Holzinger A;Hauschild AC,,,,,,,,150,,104600,,"JM Metsch, 2024, J Biomed Inform"
+PUBMED,,,38287951,SpectroFood dataset: A comprehensive fruit and vegetable hyperspectral meta-dataset for dry matter estimation.,Data Brief,,2024,Journal Article,eng,0,Malounas I;Vierbergen W;Kutluk S;Zude-Sasse M;Yang K;Zhao M;Argyropoulos D;Van Beek J;Ampe E;Fountas S,,,,,,,,52,,110040,,"I Malounas, 2024, Data Brief"
+PUBMED,,,38262670,Impact of different corticosteroids on severe community-acquired pneumonia: a systematic review and meta-analysis.,BMJ Open Respir Res,,2024,Meta-Analysis,eng,0,See XY;Wang TH;Chang YC;Lo J;Liu W;Choo CYW;Lee YC;Ma KSK;Chiang CH;Hsia YP;Chiang CH;Chiang CH,,,,,,,,11,1,,,"XY See, 2024, BMJ Open Respir Res"
+PUBMED,,,38104536,The Effect of Exercise on Cardiotoxicity in Women with Breast Cancer Receiving Anthracycline-Based Chemotherapy: A Systematic Review and Meta-Analysis.,Oncology,,2024,Systematic Review,eng,0,Chiang CH;Chang YC;Haw Y;Tan JY;Chiang CH;Hsia YP;Chiang CH,,,,,,,,102,6,510-514,,"CH Chiang, 2024, Oncology"
+PUBMED,,,38081880,A CNN based m5c RNA methylation predictor.,Sci Rep,,2023,Journal Article,eng,0,Aslam I;Shah S;Jabeen S;ELAffendi M;A Abdel Latif A;Ul Haq N;Ali G,,,,,,,,13,1,21885,,"I Aslam, 2023, Sci Rep"
+PUBMED,,,38053104,21st century (clinical) decision support in nursing and allied healthcare. Developing a learning health system: a reasoned design of a theoretical framework.,BMC Med Inform Decis Mak,,2023,Journal Article,eng,0,van Velzen M;de Graaf-Waar HI;Ubert T;van der Willigen RF;Muilwijk L;Schmitt MA;Scheper MC;van Meeteren NLU,,,,,,,,23,1,279,,"M van Velzen, 2023, BMC Med Inform Decis Mak"
+PUBMED,,,38026834,Multiple imputation of missing data in multilevel ecological momentary assessments: an example using smoking cessation study data.,Front Digit Health,,2023,Journal Article,eng,0,Ji L;Li Y;Potter LN;Lam CY;Nahum-Shani I;Wetter DW;Chow SM,,,,,,,,5,,1099517,,"L Ji, 2023, Front Digit Health"
+PUBMED,,,37961606,"OASIS: An interpretable, finite-sample valid alternative to Pearson's X2 for scientific discovery.",bioRxiv,,2023,Preprint,eng,0,Baharav TZ;Tse D;Salzman J,,,,,,,,,,,,"TZ Baharav, 2023, bioRxiv"
+PUBMED,,,37926930,Optimal dose for the efficacy of asenapine in patients with schizophrenia: Real-world data.,Neuropsychopharmacol Rep,,2024,Meta-Analysis,eng,0,Takekita Y;Hiraoka S;Iwama Y;Matsui D;Aoki N;Ogata H;Funatsuki T;Shimizu T;Murase Y;Koshikawa Y;Kato M;Kinoshita T,,,,,,,,44,1,234-239,,"Y Takekita, 2024, Neuropsychopharmacol Rep"
+PUBMED,,,37915525,What does it mean to be an agent?,Front Psychol,,2023,Journal Article,eng,0,Naidoo M,,,,,,,,14,,1273470,,"M Naidoo, 2023, Front Psychol"
+PUBMED,,,37907469,Author Correction: Non-pharmaceutical interventions to combat COVID-19 in the Americas described through daily sub-national data.,Sci Data,,2023,Published Erratum,eng,0,Touchton M;Knaul FM;Arreola-Ornelas H;Porteny T;Carniado ÓM;Faganello M;Hummel C;Otero S;Insua J;Patino F;Undurraga E;Pérez-Cruz P;Sanchez-Talanquer M;Velasco Guachalla VX;Nelson-Nuñez J;Boulding C;Calderon-Anyosa R;Garcia PJ;Vargas Enciso V,,,,,,,,10,1,751,,"M Touchton, 2023, Sci Data"
+PUBMED,,,37899087,Are maternal vaccines effective and safe for mothers and infants? A systematic review and meta-analysis of randomised controlled trials.,BMJ Glob Health,,2023,Meta-Analysis,eng,0,de Bruin O;Phijffer E;Ahmadizar F;van der Maas N;Wildenbeest J;Sturkenboom M;Bont L;Bloemenkamp K,,,,,,,,8,10,,,"O de Bruin, 2023, BMJ Glob Health"
+PUBMED,,,37866733,Ketamine versus electroconvulsive therapy for major depressive disorder: A deeper dive into the data.,J Affect Disord,,2024,Letter,eng,0,Wang Z;Chiang CH;Hsia YP;Chiang CH;Chiang CH,,,,,,,,345,,120-121,,"Z Wang, 2024, J Affect Disord"
+PUBMED,,,37865630,Non-pharmaceutical interventions to combat COVID-19 in the Americas described through daily sub-national data.,Sci Data,,2023,Dataset,eng,0,Touchton M;Knaul FM;Arreola-Ornelas H;Porteny T;Carniado ÓM;Faganello M;Hummel C;Otero S;Insua J;Patino F;Undurraga E;Pérez-Cruz P;Sanchez-Talanquer M;Velasco Guachalla VX;Nelson-Nuñez J;Boulding C;Calderon-Anyosa R;Garcia PJ;Vargas Enciso V,,,,,,,,10,1,734,,"M Touchton, 2023, Sci Data"
+PUBMED,,,37853448,"Healthcare professional and manager perceptions on drivers, benefits, and challenges of telemedicine: results from a cross-sectional survey in the Italian NHS.",BMC Health Serv Res,,2023,Journal Article,eng,0,Antonacci G;Benevento E;Bonavitacola S;Cannavacciuolo L;Foglia E;Fusi G;Garagiola E;Ponsiglione C;Stefanini A,,,,,,,,23,1,1115,,"G Antonacci, 2023, BMC Health Serv Res"
+PUBMED,,,37792852,Characterization of 3D organotypic epithelial tissues reveals tonsil-specific differences in tonic interferon signaling.,PLoS One,,2023,Journal Article,eng,0,Jackson R;Rajadhyaksha EV;Loeffler RS;Flores CE;Van Doorslaer K,,,,,,,,18,10,e0292368,,"R Jackson, 2023, PLoS One"
+PUBMED,,,37787143,Influence of nanopore coating patterns on the translocation dynamics of polyelectrolytes.,J Chem Phys,,2023,Journal Article,eng,0,Datar A;Tanyhin B;Melchionna S;Fyta M,,,,,,,,159,13,,,"A Datar, 2023, J Chem Phys"
+PUBMED,,,37778607,Adopting a child perspective for exposome research on mental health and cognitive development - Conceptualisation and opportunities.,Environ Res,,2023,Journal Article,eng,0,Persson Waye K;Löve J;Lercher P;Dzhambov AM;Klatte M;Schreckenberg D;Belke C;Leist L;Ristovska G;Jeram S;Kanninen KM;Selander J;Arat A;Lachmann T;Clark C;Botteldooren D;White K;Julvez J;Foraster M;Kaprio J;Bolte G;Psyllidis A;Gulliver J;Boshuizen H;Bozzon A;Fels J;Hornikx M;van den Hazel P;Weber M;Brambilla M;Braat-Eggen E;Van Kamp I;Vincens N;Equal-life Scientific Team,,,,,,,,239,Pt 1,117279,,"K Persson Waye, 2023, Environ Res"
+PUBMED,,,37769849,Challenges of artificial intelligence in precision oncology: public-private partnerships including national health agencies as an asset to make it happen.,Ann Oncol,,2024,Editorial,eng,0,Luu VP;Fiorini M;Combes S;Quemeneur E;Bonneville M;Bousquet PJ,,,,,,,,35,2,154-158,,"VP Luu, 2024, Ann Oncol"
+PUBMED,,,37644607,Effects of corticosteroids on severe community-acquired pneumonia: a closer look at the evidence.,Crit Care,,2023,Letter,eng,0,Chiang CH;See XY;Wang TH;Chang YC;Lo JE;Liu WT;Choo CYW;Chiang CH;Hsia YP;Chiang CH,,,,,,,,27,1,336,,"CH Chiang, 2023, Crit Care"
+PUBMED,,,37360931,Data-driven customer acceptance for attended home delivery.,OR Spectr,,2023,Journal Article,eng,0,Köhler C;Campbell AM;Ehmke JF,,,,,,,,,,1-36,,"C Köhler, 2023, OR Spectr"
+PUBMED,,,37333015,Brief Report: A Multidisciplinary Initial Workup for Suspected Lung Cancer as Fast-Track Intervention to Histopathologic Diagnosis.,JTO Clin Res Rep,,2023,Journal Article,eng,0,Pujol JL;Mercier G;Vasile M;Serre I;Vernhet-Kovacsik H;Bommart S,,,,,,,,4,6,100526,,"JL Pujol, 2023, JTO Clin Res Rep"
+PUBMED,,,37327210,Predicted versus CT-derived total lung volume in a general population: The ImaLife study.,PLoS One,,2023,Journal Article,eng,0,Wisselink HJ;Steerenberg DJD;Rook M;Pelgrim GJ;Heuvelmans MA;van den Berge M;de Bock GH;Vliegenthart R,,,,,,,,18,6,e0287383,,"HJ Wisselink, 2023, PLoS One"
+PUBMED,,,37277211,Characterisation of patients with axial psoriatic arthritis and patients with axial spondyloarthritis and concomitant psoriasis in the SCQM registry.,RMD Open,,2023,Journal Article,eng,0,Ciurea A;Götschi A;Kissling S;Bernatschek A;Bürki K;Exer P;Nissen MJ;Möller B;Scherer A;Micheroli R,,,,,,,,9,2,,,"A Ciurea, 2023, RMD Open"
+PUBMED,,,37250091,System Integrated Digital Empowering and teleRehabilitation to promote patient Activation and well-Being in chronic disabilities: A usability and acceptability study.,Front Public Health,,2023,Journal Article,eng,0,Rossetto F;Borgnis F;Isernia S;Foglia E;Garagiola E;Realdon O;Baglio F,,,,,,,,11,,1154481,,"F Rossetto, 2023, Front Public Health"
+PUBMED,,,37243964,"Authors' Reply to Juergen O Kirchner's Comment on ""Incidence Rates of Autoimmune Diseases in European Healthcare Databases: A Contribution of the ADVANCE Project"".",Drug Saf,,2023,Letter,eng,0,Willame C;Weibel D;Sturkenboom MCJM,,,,,,,,46,8,813-815,,"C Willame, 2023, Drug Saf"
+PUBMED,,,37231860,Toward a Flexible Use of Frailty Measurements in Older People.,Gerontology,,2023,Letter,eng,0,Van der Elst MCJ;Schoenmakers B;Schols JMGA;De Witte N;De Lepeleire J;D-SCOPE Consortium,,,,,,,,69,9,1113-1114,,"MCJ Van der Elst, 2023, Gerontology"
+PUBMED,,,37198019,Assessing vaccine safety during a pandemic: Recent experience and lessons learned for the future.,Vaccine,,2023,Review,eng,0,Black SB;Chandler RE;Edwards KM;Sturkenboom MCJM,,,,,,,,41,25,3790-3795,,"SB Black, 2023, Vaccine"
diff --git a/test_etl.py b/test_etl.py
new file mode 100644
index 000000000..5dbe438b7
--- /dev/null
+++ b/test_etl.py
@@ -0,0 +1,85 @@
+import sys
+import os
+import pandas as pd
+
+# Add the project root to sys.path
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+
+from www.services.etl import ETLPipeline
+from functions.get_annualproduction import get_annual_production
+from functions.get_relevantauthors import get_relevant_authors
+from functions.get_frequentwords import get_frequent_words
+from functions.get_wordcloud import get_wordcloud
+from functions.get_averagecitations import get_average_citations
+
+class MockReactive:
+    def __init__(self, df):
+        self.df = df
+    def get(self):
+        return self.df
+    def set(self, value):
+        self.df = value
+
+def test_api_extraction():
+    print("=== Testing ETL Pipeline with OpenAlex API ===")
+    try:
+        # Extract and standardize
+        query = "machine learning bibliometrics"
+        print(f"Querying OpenAlex for: '{query}'...")
+        df = ETLPipeline.convert2df(source_data="API", source_type="OpenAlex", is_api=True, query=query)
+        print(f"Successfully extracted and standardized {len(df)} records.")
+        print("Columns:", df.columns.tolist())
+        print("Sample of SR column:", df['SR'].head(3).tolist())
+        
+        # Wrap the DataFrame to simulate Shiny's reactive.Value.get()
+        reactive_df = MockReactive(df)
+        
+        # Test analytical functions
+        print("\n--- Testing Analytical Functions ---")
+        
+        # 1. Annual Production
+        try:
+            print("1. get_annual_production...")
+            res = get_annual_production(reactive_df)
+            print("Success!")
+        except Exception as e:
+            print(f"Failed: {e}")
+
+        # 2. Relevant Authors
+        try:
+            print("2. get_relevant_authors...")
+            res = get_relevant_authors(reactive_df, num_of_authors=10)
+            print("Success!")
+        except Exception as e:
+            print(f"Failed: {e}")
+
+        # 3. Frequent Words
+        try:
+            print("3. get_frequent_words...")
+            # We need to simulate the parameters the function expects
+            res = get_frequent_words(reactive_df, ngram=1, num_of_words=10, word_type="TI", file_upload_terms=None, file_upload_synonyms=None)
+            print("Success!")
+        except Exception as e:
+            print(f"Failed: {e}")
+
+        # 4. WordCloud
+        try:
+            print("4. get_wordcloud...")
+            res = get_wordcloud(reactive_df, ngram=1, num_of_words_wc=10, field_wc="TI", file_upload_terms_wc=None, file_upload_synonyms_wc=None)
+            print("Success!")
+        except Exception as e:
+            print(f"Failed: {e}")
+
+        # 5. Average Citations
+        try:
+            print("5. get_average_citations...")
+            res = get_average_citations(reactive_df)
+            print("Success!")
+        except Exception as e:
+            print(f"Failed: {e}")
+
+    except Exception as e:
+        print(f"Pipeline execution failed: {e}")
+
+if __name__ == "__main__":
+    test_api_extraction()
diff --git a/test_histnetwork.py b/test_histnetwork.py
new file mode 100644
index 000000000..11ccf6a62
--- /dev/null
+++ b/test_histnetwork.py
@@ -0,0 +1,35 @@
+import sys
+sys.path.append('.')
+from www.services.etl import ETLPipeline
+from www.services.histnetwork import histNetwork
+import pandas as pd
+from shiny import reactive
+
+def test_histnetwork():
+    # 1. Fetch data from OpenAlex API
+    query = "machine learning"
+    print(f"Fetching data for query: {query}")
+    df = ETLPipeline.convert2df(source_data="API", source_type="OpenAlex", is_api=True, query=query)
+    
+    print("\nStarting histNetwork test...")
+    # histNetwork requires a reactive.Value according to standard implementation, 
+    # but the function itself calls df.get(). 
+    # Let's wrap it in a mock object with a .get() method if necessary, 
+    # but looking at histNetwork: M = df.get() if hasattr(df, 'get') else df
+    class MockReactive:
+        def __init__(self, val):
+            self.val = val
+        def get(self):
+            return self.val
+
+    rv_df = MockReactive(df)
+    results = histNetwork(rv_df, network=True)
+    
+    if results is not None:
+        print("\nhistNetwork executed successfully!")
+        print(f"NetMatrix shape: {results['NetMatrix'].shape}")
+    else:
+        print("\nhistNetwork failed.")
+
+if __name__ == '__main__':
+    test_histnetwork()
diff --git a/test_perf.py b/test_perf.py
new file mode 100644
index 000000000..8f235c5ef
--- /dev/null
+++ b/test_perf.py
@@ -0,0 +1,28 @@
+import time
+import sys
+sys.path.append('.')
+from www.services.etl import ETLPipeline
+from www.services.histnetwork import histNetwork
+
+def test_perf():
+    print("Fetching...")
+    t0 = time.time()
+    df = ETLPipeline.convert2df(source_data='API', source_type='OpenAlex', is_api=True, query='machine learning')
+    t1 = time.time()
+    print(f"Fetched {len(df)} rows in {t1-t0:.2f}s")
+    
+    class MockReactive:
+        def __init__(self, val):
+            self.val = val
+        def get(self):
+            return self.val
+            
+    rv_df = MockReactive(df)
+    print("Running histNetwork...")
+    t2 = time.time()
+    res = histNetwork(rv_df, network=True)
+    t3 = time.time()
+    print(f"histNetwork completed in {t3-t2:.2f}s")
+
+if __name__ == '__main__':
+    test_perf()
diff --git a/www/services/__init__.py b/www/services/__init__.py
index 28584e105..ede5ff3d4 100644
--- a/www/services/__init__.py
+++ b/www/services/__init__.py
@@ -14,4 +14,8 @@
 from .tabletag import *
 from .termextraction import *
 from .thematicmap import *
-from .utils import *
\ No newline at end of file
+from .utils import *
+from .api_retriever import *
+from .standardizer import *
+from .validator import *
+from .etl import *
\ No newline at end of file
diff --git a/www/services/api_retriever.py b/www/services/api_retriever.py
new file mode 100644
index 000000000..b305b04bc
--- /dev/null
+++ b/www/services/api_retriever.py
@@ -0,0 +1,125 @@
+import requests
+import time
+import math
+from typing import List, Dict, Any
+
+class APIRetriever:
+    """
+    Handles data extraction from bibliographic APIs such as OpenAlex and PubMed.
+    
+    This class abstracts the raw HTTP requests, pagination, and rate limiting 
+    associated with fetching metadata from external sources.
+    """
+    
+    @staticmethod
+    def get_openalex(query: str, max_results: int = 100) -> List[Dict[str, Any]]:
+        """
+        Fetches bibliographic records from the OpenAlex API based on a search query.
+        Handles API pagination and basic rate limiting to politely extract data.
+
+        Args:
+            query (str): The keyword search string to query OpenAlex works.
+            max_results (int, optional): The maximum number of records to retrieve. Defaults to 100.
+
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents 
+                                  a single bibliographic record retrieved from OpenAlex.
+        """
+        base_url = "https://api.openalex.org/works"
+        results = []
+        per_page = min(max_results, 50)
+        
+        try:
+            # Initial request to get total count
+            params = {
+                "search": query,
+                "per-page": per_page,
+                "page": 1,
+                "mailto": "test@example.com"  # Polite pool
+            }
+            response = requests.get(base_url, params=params, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            
+            meta = data.get("meta", {})
+            total_count = meta.get("count", 0)
+            if total_count == 0:
+                return results
+                
+            results.extend(data.get("results", []))
+            
+            # Fetch remaining pages if needed
+            pages_needed = math.ceil(min(total_count, max_results) / per_page)
+            for page in range(2, pages_needed + 1):
+                time.sleep(0.1) # Rate limit respect
+                params["page"] = page
+                response = requests.get(base_url, params=params, timeout=10)
+                if response.status_code == 200:
+                    data = response.json()
+                    results.extend(data.get("results", []))
+                
+            # Limit exactly to max_results
+            return results[:max_results]
+        except Exception as e:
+            print(f"Error retrieving from OpenAlex: {e}")
+            return results
+
+    @staticmethod
+    def get_pubmed(query: str, max_results: int = 100) -> List[Dict[str, Any]]:
+        """
+        Fetches bibliographic records from the PubMed API using NCBI E-utilities.
+        This is a two-step process: first fetching PMIDs via esearch, then fetching 
+        document summaries via esummary, adhering to the NIH limit of 3 requests per second.
+
+        Args:
+            query (str): The keyword search string to query PubMed.
+            max_results (int, optional): The maximum number of records to retrieve. Defaults to 100.
+
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries representing PubMed summaries 
+                                  mapped directly from the JSON response.
+        """
+        search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+        summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+        results = []
+        
+        try:
+            # Step 1: Get PMIDs
+            search_params = {
+                "db": "pubmed",
+                "term": query,
+                "retmax": max_results,
+                "retmode": "json"
+            }
+            search_response = requests.get(search_url, params=search_params, timeout=10)
+            search_response.raise_for_status()
+            search_data = search_response.json()
+            id_list = search_data.get("esearchresult", {}).get("idlist", [])
+            
+            if not id_list:
+                return results
+                
+            # Step 2: Get summaries for these PMIDs
+            # PubMed limits to 200-300 ids per GET request, we'll chunk by 100
+            chunk_size = 100
+            for i in range(0, len(id_list), chunk_size):
+                chunk = id_list[i:i + chunk_size]
+                time.sleep(0.34) # NIH allows 3 requests per second
+                summary_params = {
+                    "db": "pubmed",
+                    "id": ",".join(chunk),
+                    "retmode": "json"
+                }
+                sum_response = requests.get(summary_url, params=summary_params, timeout=10)
+                if sum_response.status_code == 200:
+                    sum_data = sum_response.json()
+                    result_dict = sum_data.get("result", {})
+                    # uids are stored in result["uids"], actual data in result[uid]
+                    for uid in result_dict.get("uids", []):
+                        if uid in result_dict:
+                            results.append(result_dict[uid])
+                            
+            return results
+        except Exception as e:
+            print(f"Error retrieving from PubMed: {e}")
+            return results
diff --git a/www/services/etl.py b/www/services/etl.py
new file mode 100644
index 000000000..ed18cfd0a
--- /dev/null
+++ b/www/services/etl.py
@@ -0,0 +1,128 @@
+import pandas as pd
+from typing import Union, List, Dict, Any
+from .api_retriever import APIRetriever
+from .standardizer import Standardizer
+from .validator import Validator
+from .format_functions import format_sr_column
+
+class ETLPipeline:
+    """
+    Main ETL Pipeline dispatcher for Bibliometrix.
+    
+    This class acts as a central Dispatcher. It evaluates the type and origin of 
+    the input source data, routes it to the appropriate Extractor (API or file parser),
+    passes it through the Standardizer to enforce the schema, calculates secondary
+    tags (like SR), and finally validates the output.
+    """
+
+    @classmethod
+    def convert2df(cls, source_data: Union[str, pd.DataFrame, List[Dict[str, Any]]], 
+                   source_type: str, 
+                   is_api: bool = False, 
+                   query: str = "",
+                   original_filename: str = "") -> pd.DataFrame:
+        """
+        The main dispatcher function executing the Extract -> Transform -> Validate -> Load pipeline.
+        
+        Args:
+            source_data (Union[str, pd.DataFrame, List[Dict[str, Any]]]): The raw data source. 
+                Can be a filepath, a raw DataFrame, or a list of dictionaries.
+            source_type (str): The origin of the data (e.g., "Scopus", "Dimensions", "PubMed", "OpenAlex").
+            is_api (bool, optional): Flag indicating whether extraction should occur via live API query. Defaults to False.
+            query (str, optional): The API search query, required if is_api is True. Defaults to "".
+            original_filename (str, optional): Preserved filename used to infer data formats for manual uploads. Defaults to "".
+            
+        Returns:
+            pd.DataFrame: A fully standardized, validated Bibliometrix-compatible DataFrame.
+            
+        Raises:
+            ValueError: If the source type, file format, or API is unsupported.
+        """
+        # Phase 1: EXTRACT
+        raw_data = None
+        if is_api:
+            if source_type.upper() == "OPENALEX":
+                raw_data = APIRetriever.get_openalex(query)
+            elif source_type.upper() == "PUBMED":
+                raw_data = APIRetriever.get_pubmed(query)
+            else:
+                raise ValueError(f"API extraction not supported for {source_type}")
+        else:
+            if isinstance(source_data, str):
+                # Use original_filename if provided, otherwise fallback to source_data path
+                file_to_check = original_filename if original_filename else source_data
+                
+                # Manual download parsing
+                if source_type.upper() == "SCOPUS" and file_to_check.lower().endswith('.csv'):
+                    raw_data = pd.read_csv(source_data)
+                elif source_type.upper() == "DIMENSIONS" and (file_to_check.lower().endswith('.xlsx') or file_to_check.lower().endswith('.csv')):
+                    if file_to_check.lower().endswith('.xlsx'):
+                        raw_data = pd.read_excel(source_data, skiprows=1)
+                    else:
+                        raw_data = pd.read_csv(source_data, skiprows=1)
+                elif source_type.upper() == "PUBMED" and file_to_check.lower().endswith('.txt'):
+                    from .parsers import parse_pubmed_data
+                    raw_data = parse_pubmed_data(source_data)
+                elif source_type.upper() == "WOS":
+                    from .parsers import parse_wos_data
+                    raw_data = parse_wos_data(source_data)
+                else:
+                    raise ValueError(f"Unsupported manual file format for {source_type} (file: {file_to_check})")
+            elif isinstance(source_data, pd.DataFrame):
+                raw_data = source_data
+            elif isinstance(source_data, list):
+                raw_data = source_data
+            else:
+                raise ValueError("Invalid source_data format")
+                
+        if len(raw_data) == 0:
+            raise ValueError("No data extracted.")
+
+        # Phase 2: TRANSFORM
+        standardized_df = Standardizer.apply_mapping_and_types(raw_data, source_type)
+        
+        # Phase 3 & 4: CALCULATED FIELDS (SR)
+        # We need to apply format_sr_column. 
+        # format_sr_column expects the entry in the specific database format.
+        # But we already standardized. To reuse format_sr_column, we must pass it 
+        # simulating WoS format or the original source format.
+        # The easiest way is to use the original raw data row to generate SR if possible, 
+        # or simulate a WoS entry since our dataframe is now in WoS standard schema.
+        
+        sr_list = []
+        for i in range(len(standardized_df)):
+            row = standardized_df.iloc[i]
+            # Create a mock WoS entry for format_sr_column
+            # format_sr_column for Web_of_Science .txt expects:
+            # AU: list of strings (first author comma separated)
+            # PY: string (it takes [0], so we provide a list or string)
+            # SO: list of strings or a single string
+            
+            mock_entry = {}
+            if len(row["AU"]) > 0:
+                # Ensure author is comma separated (Surname, Initials)
+                author = row["AU"][0]
+                if "," not in author:
+                    parts = author.split()
+                    if len(parts) > 1:
+                        author = f"{parts[-1]}, {' '.join(parts[:-1])}"
+                mock_entry["AU"] = [author]
+            else:
+                mock_entry["AU"] = ["Unknown, U."]
+                
+            mock_entry["PY"] = [str(row["PY"])]
+            mock_entry["SO"] = [str(row["SO"])]
+            
+            try:
+                sr = format_sr_column(mock_entry, 'Web_of_Science', '.txt')
+            except Exception as e:
+                sr = "Unknown, 0000, Unknown"
+                
+            sr_list.append(sr)
+            
+        standardized_df["SR"] = sr_list
+
+        # Phase 5: VALIDATION
+        Validator.validate(standardized_df)
+
+        return standardized_df
diff --git a/www/services/histnetwork.py b/www/services/histnetwork.py
index 7848d9744..10fc2c70e 100644
--- a/www/services/histnetwork.py
+++ b/www/services/histnetwork.py
@@ -34,7 +34,7 @@ def histNetwork(df, min_citations=0, sep=";", network=True):
     # Fill missing values in TC
     M['TC'] = M['TC'].fillna(0)
 
-    if db == "Web_of_Science":
+    if db in ["Web_of_Science", "OPENALEX", "PUBMED"]:
         results = wos(M, min_citations=min_citations, sep=sep, network=network)
     elif db == "Scopus":
         results = scopus(M, min_citations=min_citations, sep=sep, network=network)
@@ -47,8 +47,6 @@ def histNetwork(df, min_citations=0, sep=";", network=True):
 
 def wos(M, min_citations, sep, network):
 
-    print("\nWOS DB:\nSearching local citations (LCS) by reference items (SR) and DOIs...\n")
-
     # Sort data by publication year
     M = M.sort_values(by="PY").reset_index(drop=True)
 
@@ -56,39 +54,70 @@ def wos(M, min_citations, sep, network):
     M['Paper'] = np.arange(0, len(M))
     M['nLABEL'] = np.arange(0, len(M))
 
-    # Process cited references (CR)
-    CR = []
-    for i, refs in enumerate(M['CR']):
-        for ref in refs:
-            # Extract DOI
-            doi = ""
-            if 'DOI' in ref:
-                parts = ref.split('DOI', 1)
-                doi = parts[1].strip() if len(parts) > 1 else ""
-            # Extract AU, PY, SO
-            ref_parts = ref.split(',')
-            au = ref_parts[0].replace('.', ' ').strip() if len(ref_parts) > 0 else ""
-            py = ref_parts[1].strip() if len(ref_parts) > 1 else ""
-            so = ref_parts[2].strip() if len(ref_parts) > 2 else ""
-            sr = f"{au}, {py}, {so}"
-            CR.append({'ref': ref, 'Paper': i, 'DI': doi, 'AU': au, 'PY': py, 'SO': so, 'SR': sr})
-
-    print(f"\nAnalyzing {len(CR)} reference items...\n")
-
-    CR_df = pd.DataFrame(CR)
-
-    # Add LABEL field to M and CR
-    M['LABEL'] = M['SR_FULL'].fillna('').str.upper() + " DOI " + M['DI'].fillna('').str.upper()
-    M['LABEL'] = M['LABEL'].str.strip()
-    CR_df['LABEL'] = CR_df['SR'].fillna('').str.upper() + " DOI " + CR_df['DI'].fillna('').str.upper()
-    CR_df['LABEL'] = CR_df['LABEL'].str.strip()
-
-    # Match references with papers (left join as in R)
-    L = pd.merge(M, CR_df, on='LABEL', how='left', suffixes=('_M', '_CR'))
-    L = L[L['Paper_CR'].notnull()]
-    L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values
-    L['nCITING'] = M.loc[L['Paper_CR'], 'nLABEL'].values
-    L['CIT_PY'] = M.loc[L['Paper_CR'], 'PY'].values
+    if M['DB'].iloc[0] == "OPENALEX":
+        print("\nOPENALEX DB:\nSearching local citations using OpenAlex IDs (UT) and referenced works (CR)...\n")
+        sr_col = 'SR_FULL' if 'SR_FULL' in M.columns else 'SR'
+        M['LABEL'] = M[sr_col].fillna('').str.upper() + " DOI " + M['DI'].fillna('').str.upper()
+        M['LABEL'] = M['LABEL'].str.strip()
+        # Explode CR to get one row per reference
+        CR_df = M[['UT', 'CR', 'Paper', 'nLABEL', 'PY']].explode('CR').dropna(subset=['CR']).rename(columns={'CR': 'ref', 'Paper': 'Paper_CR', 'nLABEL': 'nCITING', 'PY': 'CIT_PY'})
+        # Clean UT to match refs
+        M['UT_CLEAN'] = M['UT'].str.upper().str.strip()
+        CR_df['ref'] = CR_df['ref'].str.upper().str.strip()
+        
+        # Match references with papers (L contains all matches)
+        L = pd.merge(M, CR_df, left_on='UT_CLEAN', right_on='ref', how='right')
+        L = L[L['Paper'].notnull()]
+        
+        print("\nAfter filtering:")
+        print(L.shape)
+        
+        # Display the HTTPS link mapping just like the friend's output
+        print("\nCitation Mapping (Citing Paper -> Cited Reference):")
+        print(L[['UT_x', 'UT_CLEAN', 'Paper', 'Paper_CR']].head(20))
+        
+        print(type(L))
+        print(L.columns)
+        
+        L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values
+        
+    else:
+        print("\nWOS/PUBMED DB:\nSearching local citations (LCS) by reference items (SR) and DOIs...\n")
+        # Process cited references (CR)
+        CR = []
+        for i, refs in enumerate(M['CR']):
+            for ref in refs:
+                # Extract DOI
+                doi = ""
+                if 'DOI' in ref:
+                    parts = ref.split('DOI', 1)
+                    doi = parts[1].strip() if len(parts) > 1 else ""
+                # Extract AU, PY, SO
+                ref_parts = ref.split(',')
+                au = ref_parts[0].replace('.', ' ').strip() if len(ref_parts) > 0 else ""
+                py = ref_parts[1].strip() if len(ref_parts) > 1 else ""
+                so = ref_parts[2].strip() if len(ref_parts) > 2 else ""
+                sr = f"{au}, {py}, {so}"
+                CR.append({'ref': ref, 'Paper': i, 'DI': doi, 'AU': au, 'PY': py, 'SO': so, 'SR': sr})
+
+        print(f"\nAnalyzing {len(CR)} reference items...\n")
+
+        CR_df = pd.DataFrame(CR)
+
+        # Add LABEL field to M and CR
+        sr_col = 'SR_FULL' if 'SR_FULL' in M.columns else 'SR'
+        M['LABEL'] = M[sr_col].fillna('').str.upper() + " DOI " + M['DI'].fillna('').str.upper()
+        M['LABEL'] = M['LABEL'].str.strip()
+        if not CR_df.empty:
+            CR_df['LABEL'] = CR_df['SR'].fillna('').str.upper() + " DOI " + CR_df['DI'].fillna('').str.upper()
+            CR_df['LABEL'] = CR_df['LABEL'].str.strip()
+
+        # Match references with papers (left join as in R)
+        L = pd.merge(M, CR_df, on='LABEL', how='left', suffixes=('_M', '_CR'))
+        L = L[L['Paper_CR'].notnull()]
+        L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values
+        L['nCITING'] = M.loc[L['Paper_CR'], 'nLABEL'].values
+        L['CIT_PY'] = M.loc[L['Paper_CR'], 'PY'].values
 
     # Compute Local Citation Scores (LCS)
     LCS = L.groupby('nLABEL').size().reset_index(name='LCS')
@@ -115,15 +144,8 @@ def wos(M, min_citations, sep, network):
                 M.at[paper_idx, 'LCR'] = row['LCR']
 
         # Assign unique names to duplicated LABELs
-        st = False
-        i = 0
-        while not st:
-            ind = M['LABEL'].duplicated(keep=False)
-            if ind.any():
-                i += 1
-                M.loc[ind, 'LABEL'] = M.loc[ind, 'LABEL'] + f"-{chr(96 + i)}"
-            else:
-                st = True
+        counts = M.groupby('LABEL').cumcount()
+        M['LABEL'] = M['LABEL'] + counts.apply(lambda x: f"-{chr(96 + x)}" if x > 0 else "")
         M.index = M['LABEL'].str.strip()
 
         M['LCR'] = M['LCR'].fillna('')
diff --git a/www/services/standardizer.py b/www/services/standardizer.py
new file mode 100644
index 000000000..125b6a2b4
--- /dev/null
+++ b/www/services/standardizer.py
@@ -0,0 +1,177 @@
+import pandas as pd
+from typing import List, Dict, Any, Union
+
+class Standardizer:
+    """
+    Standardizes bibliographic data from heterogeneous sources to the Web of Science (WoS) schema.
+    
+    This class implements the Lookup Strategy pattern via a predefined MAPPING dictionary 
+    to translate proprietary column names (e.g., OpenAlex's 'referenced_works') into 
+    standardized WoS tags (e.g., 'CR'). It also enforces Type Contracts, ensuring 
+    that all output data adheres to expected types (e.g., lists of strings for authors, 
+    integers for years) and gracefully handles null values.
+    """
+    
+    # WoS Standard Schema
+    STANDARD_COLUMNS = [
+        "DB", "UT", "DI", "PMID", "TI", "SO", "JI", "PY", "DT", "LA", "TC", 
+        "AU", "AF", "C1", "RP", "CR", "DE", "ID", "AB", "VL", "IS", "BP", "EP", "SR"
+    ]
+    
+    LIST_COLUMNS = ["AU", "AF", "C1", "CR", "DE", "ID"]
+    INTEGER_COLUMNS = ["TC", "PY"]
+    
+    # Mapping Dictionary (Lookup Strategy)
+    # Maps proprietary column names to WoS tags.
+    # Extend this as needed for Scopus, Dimensions, etc.
+    MAPPING = {
+        # OpenAlex Mapping
+        "id": "UT",
+        "doi": "DI",
+        "title": "TI",
+        "publication_year": "PY",
+        "type": "DT",
+        "language": "LA",
+        "cited_by_count": "TC",
+        "referenced_works": "CR",
+        # PubMed Mapping
+        "uid": "PMID",
+        # "title" is already mapped above to "TI"
+        "source": "SO",
+        "pubdate": "PY",
+        "pubtype": "DT",
+        "lang": "LA",
+        "pmc": "UT",
+        "volume": "VL",
+        "issue": "IS",
+        "pages": "BP",
+        # Scopus Mapping
+        "Authors": "AU",
+        "Author(s) ID": "AF",
+        "Title": "TI",
+        "Year": "PY",
+        "Source title": "SO",
+        "Volume": "VL",
+        "Issue": "IS",
+        "Page start": "BP",
+        "Page end": "EP",
+        "Cited by": "TC",
+        "DOI": "DI",
+        "Document Type": "DT",
+        "Source": "DB",
+        "Affiliations": "C1",
+        "Author Keywords": "DE",
+        "Index Keywords": "ID",
+        "Abstract": "AB",
+        # Dimensions Mapping
+        "Publication ID": "UT",
+        "PubYear": "PY",
+        "Journal": "SO",
+        "Times cited": "TC",
+    }
+    
+    @staticmethod
+    def _parse_multi_value(val: Any, delimiter: str = ";") -> List[str]:
+        """Parses a multi-value string or list into a list of strings."""
+        if val is None or (isinstance(val, float) and pd.isna(val)):
+            return []
+        if isinstance(val, list):
+            return [str(v).strip() for v in val if v is not None and not (isinstance(v, float) and pd.isna(v))]
+        if isinstance(val, str):
+            if val.strip() == "":
+                return []
+            return [v.strip() for v in val.split(delimiter) if v.strip()]
+        return [str(val).strip()]
+
+    @staticmethod
+    def _parse_scalar_str(val: Any) -> str:
+        if val is None or (isinstance(val, float) and pd.isna(val)):
+            return ""
+        if isinstance(val, list):
+            return str(val[0]) if len(val) > 0 else ""
+        return str(val).strip()
+        
+    @staticmethod
+    def _parse_scalar_int(val: Any) -> Union[int, str]:
+        if val is None or (isinstance(val, float) and pd.isna(val)):
+            return 0
+        if isinstance(val, list):
+            val = val[0] if len(val) > 0 else 0
+        try:
+            # Handle float values like 2024.0 or strings like "2024"
+            if isinstance(val, str):
+                import re
+                match = re.search(r'\d{4}', val)
+                if match:
+                    return int(match.group())
+            return int(float(val))
+        except (ValueError, TypeError):
+            return 0
+
+    @classmethod
+    def apply_mapping_and_types(cls, data: Union[pd.DataFrame, List[Dict[str, Any]]], db_source: str) -> pd.DataFrame:
+        """
+        Main method to standardize the input data.
+
+        Executes the standardization pipeline:
+        1. Translates columns using the MAPPING dictionary.
+        2. Ensures all standard WoS columns are present.
+        3. Extracts nested JSON fields for specific APIs (like OpenAlex/PubMed).
+        4. Enforces strict type contracts via parsing methods.
+
+        Args:
+            data (Union[pd.DataFrame, List[Dict[str, Any]]]): The raw data fetched from an API or file.
+            db_source (str): The source identifier (e.g., "OPENALEX", "PUBMED", "SCOPUS").
+
+        Returns:
+            pd.DataFrame: A normalized DataFrame strictly conforming to the WoS schema.
+        """
+        if isinstance(data, list):
+            df = pd.DataFrame(data)
+        else:
+            df = data.copy()
+            
+        # 1. Rename columns using mapping
+        df.rename(columns=cls.MAPPING, inplace=True)
+        
+        # 2. Ensure all standard columns exist
+        for col in cls.STANDARD_COLUMNS:
+            if col not in df.columns:
+                df[col] = None
+                
+        # 3. Explicitly set DB if not present
+        df["DB"] = db_source.upper()
+        
+        # Extract features from nested JSON for OpenAlex and PubMed if needed
+        # OpenAlex authors
+        if db_source.upper() == "OPENALEX":
+            if "authorships" in df.columns:
+                df["AU"] = df["authorships"].apply(
+                    lambda x: [author.get("author", {}).get("display_name", "") for author in x] if isinstance(x, list) else []
+                )
+                df["C1"] = df["authorships"].apply(
+                    lambda x: [inst.get("display_name", "") for author in x for inst in author.get("institutions", [])] if isinstance(x, list) else []
+                )
+            if "host_venue" in df.columns: # Older API format
+                df["SO"] = df["host_venue"].apply(lambda x: x.get("display_name", "") if isinstance(x, dict) else "")
+            elif "primary_location" in df.columns: # Newer API format
+                df["SO"] = df["primary_location"].apply(lambda x: x.get("source", {}).get("display_name", "") if isinstance(x, dict) and x.get("source") else "")
+
+        # PubMed authors
+        elif db_source.upper() == "PUBMED":
+            if "authors" in df.columns:
+                df["AU"] = df["authors"].apply(
+                    lambda x: [author.get("name", "") for author in x] if isinstance(x, list) else []
+                )
+        
+        # 4. Type Enforcement and Null Handling
+        for col in cls.STANDARD_COLUMNS:
+            if col in cls.LIST_COLUMNS:
+                df[col] = df[col].apply(cls._parse_multi_value)
+            elif col in cls.INTEGER_COLUMNS:
+                df[col] = df[col].apply(cls._parse_scalar_int)
+            else:
+                df[col] = df[col].apply(cls._parse_scalar_str)
+                
+        # Return only the standard columns
+        return df[cls.STANDARD_COLUMNS]
diff --git a/www/services/validator.py b/www/services/validator.py
new file mode 100644
index 000000000..96b0ede6a
--- /dev/null
+++ b/www/services/validator.py
@@ -0,0 +1,67 @@
+import pandas as pd
+from typing import List
+
+class Validator:
+    """
+    Validates that the standardized DataFrame complies strictly with the Web of Science schema.
+    
+    This acts as the final gatekeeper in the ETL pipeline. It ensures that 
+    the transformed DataFrame possesses all required columns, does not contain
+    null values, and adheres strictly to the predefined Type Contracts (e.g., lists 
+    for multi-value fields, integers for years/citations).
+    """
+    
+    STANDARD_COLUMNS = [
+        "DB", "UT", "DI", "PMID", "TI", "SO", "JI", "PY", "DT", "LA", "TC", 
+        "AU", "AF", "C1", "RP", "CR", "DE", "ID", "AB", "VL", "IS", "BP", "EP", "SR"
+    ]
+    
+    LIST_COLUMNS = ["AU", "AF", "C1", "CR", "DE", "ID"]
+    INTEGER_COLUMNS = ["TC", "PY"]
+
+    @classmethod
+    def validate(cls, df: pd.DataFrame) -> bool:
+        """
+        Executes strict validation checks on the standardized DataFrame.
+
+        Args:
+            df (pd.DataFrame): The DataFrame processed by the Standardizer.
+
+        Returns:
+            bool: True if the DataFrame passes all validation checks.
+
+        Raises:
+            ValueError: If the DataFrame is empty, is missing mandatory columns, 
+                        contains null values, or violates Type Contracts.
+        """
+        if df.empty:
+            raise ValueError("Validation Error: DataFrame is empty.")
+            
+        # 1. Check Mandatory Columns
+        missing_cols = [col for col in cls.STANDARD_COLUMNS if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"Validation Error: Missing mandatory columns: {missing_cols}")
+            
+        # 2. Check for Nulls (NaN or None)
+        null_counts = df.isnull().sum()
+        if null_counts.sum() > 0:
+            raise ValueError(f"Validation Error: DataFrame contains null values:\n{null_counts[null_counts > 0]}")
+            
+        # 3. Check Data Types
+        for col in cls.STANDARD_COLUMNS:
+            sample_val = df[col].iloc[0] if not df.empty else None
+            if col in cls.LIST_COLUMNS:
+                if not df[col].apply(lambda x: isinstance(x, list)).all():
+                    raise ValueError(f"Validation Error: Column '{col}' must be a list of strings.")
+            elif col in cls.INTEGER_COLUMNS:
+                if not pd.api.types.is_integer_dtype(df[col]) and not df[col].apply(lambda x: isinstance(x, int)).all():
+                    # Check if all can be cast to int, but standardizer should have enforced it
+                    try:
+                        df[col].astype(int)
+                    except ValueError:
+                        raise ValueError(f"Validation Error: Column '{col}' must contain integers.")
+            else:
+                if not pd.api.types.is_string_dtype(df[col]) and not df[col].apply(lambda x: isinstance(x, str)).all():
+                    raise ValueError(f"Validation Error: Column '{col}' must contain strings.")
+                    
+        return True