gutenbergtools · rtonsing · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/dopull/dopull.py b/dopull/dopull.py
@@ -19,7 +19,7 @@
 except ImportError:
     pwd = None
 
-VERSION = "2026.05.29"
+VERSION = "2026.06.09"
 
 SCRIPT_DIR = Path(__file__).resolve().parent
 # Parent directory of where to look for files to push out.
@@ -33,11 +33,9 @@
 LOGFILE = Path(os.getenv("LOGFILE", str(SCRIPT_DIR / "logs/dopull.log")))
 # Lock file to prevent multiple dopulls running at the same time.
 PULLRUNNING = Path(os.getenv("PULLRUNNING", str(SCRIPT_DIR / ".dopull-running")))
-# Trigger directory for JSON processing on ibiblio (kept for compatibility with shell config).
 IBIBLIO = "gutenberg.login.ibiblio.org"
 PRIVATE = os.getenv('PRIVATE') or ''
 IBIBLIO_DOPULL_DIR = os.path.join(PRIVATE, 'logs', 'dopull')
-IBIBLIO_JSON_DIR = os.path.join(PRIVATE, 'logs', 'json')
 # Email address to send trouble reports to.
 BOSS = os.getenv("BOSS", "pterodactyl@fastmail.com")
 LOGGER = logging.getLogger("dopull")
@@ -125,7 +123,6 @@ def main() -> int:
     • For each trigger file found in "push" directory,
         ◦ Get owner of file (user)
         ◦ Trigger ebook update by copying it to the ibiblio dopull dir.
-        ◦ If file is .json, trigger ebook indexing by copying it to the ibiblio JSON dir.
         ◦ Move file to DONE archive
         ◦ Send success/fail email to user
     """
@@ -179,16 +176,6 @@ def process_trigger_file(trigger_file: Path) -> str:
             append_out(f"Failed to trigger ibiblio update for {filename}: {e}")
             return "failure"
 
-        # Handle .json files for ebook indexing.
-        if trigger_file.suffix.lower() == ".json":
-            try:
-                dest = f"{IBIBLIO}:{IBIBLIO_JSON_DIR}/{filename}"
-                subprocess.run(["scp", str(trigger_file), dest], check=True)
-                append_out(f"Copied {filename} to ibiblio to trigger ebook indexing.")
-            except Exception as e:
-                append_out(f"Failed to trigger ebook indexing for {filename}: {e}")
-                return "failure"
-
         # If we got to here, all is OK, move trigger file to the DONE directory,
         # otherwise, it will be retried on the next run.
         try:

diff --git a/puller.py b/puller.py
@@ -42,11 +42,24 @@
 # These are where .zip.trig files go on ibiblio :
 DOPULL_LOG_DIR = os.path.join(PRIVATE, 'logs', 'dopull')
 DOPUSH_LOG_DIR = os.path.join(PRIVATE, 'logs', 'dopush')
+JSON_LOG_DIR = os.path.join(PRIVATE, 'logs', 'json')
 
 
 def scan_dopull_log():
-    """ 
-    Scan the dopull log directory for new files.
+    """
+    Scan DOPULL_LOG_DIR for new files.
+    Note: this does 3 things:
+    1. For all trigger files, it pulls the latest files from the upstream repo into the FILES directory.
+    2. Moves .json files to JSON_LOG_DIR for database processing, and creates an .info.txt trigger file.
+    3. Moves .zip.trig files to DOPUSH_LOG_DIR for database updates.
+    Both directories are processed by FileInfo.py. In the future, it should be updated to do the appropriate
+    processing for each file type, but for now this is a simple way to get the files where they need to go
+    without needing to change FileInfo.py.
+
+    If both .zip.trig and .json files are present for the same ebook number:
+    (Workflow creates a .json, file, then Errata Workbench creates a .zip.trig file)
+    should be OK, the repo has all the changes, and we need the trigger file in any case.
+    Repo pull will occur twice, but the second will have no changes, and this should be too rare to worry about.
     """
     for filename in sorted(os.listdir(DOPULL_LOG_DIR)):
         mode = os.stat(os.path.join(DOPULL_LOG_DIR, filename))[stat.ST_MODE]
@@ -55,21 +68,46 @@ def scan_dopull_log():
             continue
 
         ebook_num = 0
-        m = re.match(r'^(\d+)\.zip\.trig$', filename)
+        m = re.match(r'^(\d+)\.(zip\.trig|json)$', filename)
         if m:
-            ebook_num = int(m.group(1))
+            ebook_num = m.group(1)
+            if not ebook_num.isdigit():
+                logging.error(f'Skipping invalid filename (non-numeric book number): {filename}')
+                continue
             logging.info(ebook_num)
             origin = f'{UPSTREAM_REPO_DIR}{ebook_num}.git/'
-            target_path = os.path.join(FILES, str(ebook_num))
+            target_path = os.path.join(FILES, ebook_num)
             logging.info(f'origin: {origin}, target_path: {target_path}')
-
-            if update_folder(origin, target_path):
-                shutil.move(os.path.join(DOPULL_LOG_DIR, filename),
-                             os.path.join(DOPUSH_LOG_DIR, filename))
-            else:
-                logging.error(f'failed to update {ebook_num}')
+
+            # Get the latest files from the upstream repo
+            if not update_folder(origin, target_path):
+                logging.error(f'failed to get files for {ebook_num}')
+                continue
+
+            # Now trigger database/catalog update
+            try:
+                if filename.endswith('.json'):
+                    # For .json files, move them to the JSON_LOG_DIR to add to the database
+                    shutil.move(os.path.join(DOPULL_LOG_DIR, filename),
+                                 os.path.join(JSON_LOG_DIR, filename))
+                    logging.info(f'moved {filename} to JSON log directory for processing.')
+
+                    # Create a corresponding .zip.trig trigger file
+                    trigger_file = os.path.join(DOPUSH_LOG_DIR, ebook_num + '.zip.trig')
+                    if not os.path.exists(trigger_file):
+                        with open(trigger_file, 'w') as file:
+                            pass
+                else:
+                    # Move file to the DOPUSH_LOG_DIR to trigger updating
+                    trigger_push = os.path.join(DOPUSH_LOG_DIR, filename)
+                    if not os.path.exists(trigger_push):
+                        shutil.move(os.path.join(DOPULL_LOG_DIR, filename), trigger_push)
+            except Exception as e:
+                logging.error(f'failed to trigger update for {ebook_num}: {e}')
+
     return
 
+
 def main():
     sys.exit(scan_dopull_log())