diff --git a/Doc/library/site.rst b/Doc/library/site.rst index 656fbd142dfb0f..b67573db5dddc5 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -356,7 +356,27 @@ Module contents This function used to be called unconditionally. -.. function:: addsitedir(sitedir, known_paths=None, *, defer_processing_start_files=False) +.. class:: StartupState(known_paths=None) + + Instances of this class are used as an accumulator for interpreter startup + configuration data, such as ``.pth`` and ``.start`` files, from one or more + site directories. These are used to batch the processing of these startup + files. The optional *known_paths* argument is a set of case-normalized + paths used to prevent duplicate :data:`sys.path` entries. With ``None`` + (the default), this set is built from the current :data:`sys.path`. + :func:`main` implicitly uses an instance of this class. + + .. method:: process() + + Apply the accumulated state by first adding the path extensions to + :data:`sys.path`, then executing the :file:`.start` file entry points + and :file:`.pth` file ``import`` lines (:ref:`deprecated + `). + + .. versionadded:: 3.15 + + +.. function:: addsitedir(sitedir, known_paths=None, *, startup_state=None) Add a directory to sys.path and parse the :file:`.pth` and :file:`.start` files found in that directory. Typically used in :mod:`sitecustomize` or @@ -366,17 +386,39 @@ Module contents used to prevent duplicate :data:`sys.path` entries. When ``None`` (the default), the set is built from the current :data:`sys.path`. - While :file:`.pth` and :file:`.start` files are always parsed, set - *defer_processing_start_files* to ``True`` to prevent processing the - startup data found in those files, so that you can process them explicitly - (this is typically used by the :func:`main` function). + Pass an instance of :class:`StartupState` as *startup_state* to accumulate + startup data from multiple site directories before explicitly processing + with :meth:`StartupState.process`. The *known_paths* and *startup_state* + arguments cannot both be given. + + For example: + + .. code-block:: python + + state = site.StartupState() + for sitedir in site_dirs: + site.addsitedir(sitedir, startup_state=state) + state.process() + + Semantics and return values: + + * When only *sitedir* is given, startup configuration is processed before + the function returns, and ``None`` is returned. + * When *known_paths* is given, startup configuration is process before the + function returns, and the updated *known_paths* is returned. + * When *startup_state* is given, startup configuration is **not** + processed, and the state instance is returned. It is up to the caller to + call :meth:`StartupState.process` on this instance. + * It is a :exc:`TypeError` to pass both *known_paths* and *startup_state*. .. versionchanged:: 3.15 - Also processes :file:`.start` files. See :ref:`site-start-files`. - All :file:`.pth` and :file:`.start` files are now read and - accumulated before any path extensions, ``import`` line execution, - or entry point invocations take place. + Also processes :file:`.start` files. See :ref:`site-start-files`. All + :file:`.pth` and :file:`.start` files are now read and accumulated + before any path extensions, entry point invocations, or ``import`` line + execution take place. + + The *startup_state* keyword-only argument was added. .. function:: getsitepackages() @@ -447,4 +489,3 @@ value greater than 2 if there is an error. * :pep:`370` -- Per user site-packages directory * :pep:`829` -- Startup entry points and the deprecation of import lines in ``.pth`` files * :ref:`sys-path-init` -- The initialization of :data:`sys.path`. - diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8e6b1faa523f68..2c20f7d7b98b54 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -478,7 +478,15 @@ matching :file:`.start` file is found, ``import`` lines in :file:`.pth` files are ignored. There is no change to :data:`sys.path` extension lines in :file:`.pth` files. -(Contributed by Barry Warsaw in :gh:`148641`.) +The :mod:`site` module also provides :class:`site.StartupState` to batch +startup processing for multiple site directories, ensuring all static path +extensions are applied before any startup code is executed. :func:`site.main` +uses an instance of this class implicitly to batch process all startup +configuration files during normal interpreter startup. Callers of +:func:`site.addsitedir` can pass one of these explicitly to control startup +configuration file batch processing. + +(Contributed by Barry Warsaw in :gh:`148641` and :gh:`150542`.) .. _whatsnew315-abi3t: diff --git a/Lib/site.py b/Lib/site.py index 239ee0d6f57bce..e9c00e258aa171 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -157,34 +157,22 @@ def _init_pathinfo(): # PEP 829 implementation notes. # # Startup information (.pth and .start file information) can be processed in -# implicit or explicit batches. Implicit batches are handled by the site.py -# machinery automatically, while explicit batches are driven by user code and -# processed on boundaries defined by that code. -# -# addsitedir() calls which use the default defer_processing_start_files=False -# are self-contained: they create a per-call _StartupState, populate it from -# the site directory's .pth/.start files, run process() on it, and then throw -# the state away. This is implicit batching and in that case the -# _startup_state global variable stays None. +# implicit or explicit batches. Implicit batches are self-contained +# addsitedir() calls: they create a per-call StartupState, populate it from the +# site directory's .pth and .start files, run process() on it, and then throw the +# state away. # # main() needs different semantics: it accumulates state across multiple -# addsitedir() calls (user-site plus all global site-packages) so that -# every sys.path extension is visible *before* any startup code (.pth -# import lines and .start entry points) runs. Callers opt into this by -# passing defer_processing_start_files=True, which preserves the _StartupState -# into the global _startup_state. Subsequent addsitedir() calls (with -# or without defer_processing_start_files=True) then write into that -# same shared state, and a later process_startup_files() call flushes -# all the state and resets the global to None. +# addsitedir() calls (user-site plus all global site-packages) so that every +# sys.path extension is visible *before* any startup code (.start entry points +# and .pth import lines) runs. Callers can opt into the same behavior by +# creating a StartupState and passing it to addsitedir(); callers are responsible +# in this case for calling StartupState.process() themselves. # -# Here's the CRITICAL reentrancy invariant: process_startup_files() must clear -# the global _startup_state *before* calling state.process(), so that any -# reentrant site.addsitedir() calls reached from an exec'd .pth import line or -# a .start entry point falls into the per-call branch and gets its own fresh -# state. Otherwise the recursive addsitedir() would mutate the very dicts -# that the outer state.process() is iterating. This is the bug reported in -# gh-149504. -_startup_state = None +# Here's the CRITICAL reentrancy invariant: recursive site.addsitedir() calls +# reached from a .start entry point or an exec'd .pth import line must not +# mutate the StartupState currently being processed. Reentrant calls without +# an explicit startup_state therefore create their own fresh per-call state. def _read_pthstart_file(sitedir, name, suffix): @@ -238,10 +226,10 @@ def _read_pthstart_file(sitedir, name, suffix): return content.splitlines(), filename -class _StartupState: +class StartupState: """Per-batch accumulator for .pth and .start file processing. - A _StartupState collects sys.path extensions, deprecated .pth import + A StartupState collects sys.path extensions, deprecated .pth import lines, and .start entry points read from one or more site-packages directories. Calling process() applies them in PEP 829 order: paths are added to sys.path first, then import lines from .pth files (skipping @@ -250,34 +238,83 @@ class _StartupState: State lives entirely on the instance; there is no module-level pending state. This is what makes the module reentrancy-safe: a site.addsitedir() call reached recursively from an exec'd import line or a .start entry - point operates on a different _StartupState than the one being processed + point operates on a different StartupState than the one being processed by the outer call. The internal data is intentionally private; the public methods (read_pth_file, read_start_file, process) are the only supported write APIs. """ - __slots__ = ('_syspaths', '_importexecs', '_entrypoints') + __slots__ = ( + '_known_paths', + '_processed_sitedirs', + '_path_entries', + '_importexecs', + '_entrypoints', + ) + + def __init__(self, known_paths=None): + """Create an independent startup state. - def __init__(self): - # All three dicts map "" -> list + *known_paths* is a set of case-normalized paths already present + on sys.path, used to avoid duplicate path entries. When None + (the default), it is initialized from the current sys.path. + """ + self._known_paths = ( + _init_pathinfo() + if known_paths is None + else known_paths) + self._processed_sitedirs = set() + # The sys.path append ledger. This is a list of 2-tuples of the form + # (pthfile, path) where `pthfile` is the .pth file which is extending + # the path, and `path` is the directory to add to sys.path. Note that + # to preserve the interleaving semantics (i.e. .pth file paths are + # added after the sitedir in which the .pth file is found), `path` + # could be a sitedir, in which case `pthfile` will always be None. + self._path_entries = [] + # Both dicts map "" -> list # of items collected from that file. Mapping by filename lets us # cross-reference a .pth and its matching .start (PEP 829 import # suppression rule) and lets _print_error report the source file # when an entry fails. - self._syspaths = {} self._importexecs = {} self._entrypoints = {} - def read_pth_file(self, sitedir, name, known_paths): + def add_sitedir(self, sitedir, *, process_known_sitedirs=True): + sitedir, sitedircase = makepath(sitedir) + # Have we already processed this sitedir? + if sitedircase in self._processed_sitedirs: + return None + # In legacy known_paths mode, a known sitedir means its startup files + # were already processed by an earlier addsitedir() call, so skip it + # to preserve idempotency (gh-75723). In explicit StartupState mode, + # known_paths only tracks sys.path entries; a sitedir may already be + # on sys.path (for example from $PYTHONPATH, gh-149819) but still need + # its .pth and .start files processed once. The separate + # _processed_sitedirs set is what lets explicit batches distinguish + # "already on sys.path" from "startup files already read". + if not process_known_sitedirs and sitedircase in self._known_paths: + return None + # Record that we've processed this sitedir. + self._processed_sitedirs.add(sitedircase) + if sitedircase not in self._known_paths: + self._known_paths.add(sitedircase) + # Add the sitedir to the sys.path extension ledger. There is no + # .pth file to record. + self._path_entries.append((None, sitedir)) + return sitedir + + def read_pth_file(self, sitedir, name, known_paths=None): """Parse a .pth file, accumulating sys.path extensions and import lines. Errors on individual lines do not abort processing of the rest of - the file (PEP 829). ``known_paths`` is the per-batch dedup + the file (PEP 829). ``known_paths`` is the per-batch deduplication ledger: any path already in it is skipped, and newly accepted paths are added to it so that subsequent .pth files in the same batch don't add them more than once. """ + if known_paths is None: + known_paths = self._known_paths lines, filename = _read_pthstart_file(sitedir, name, ".pth") if lines is None: return @@ -308,15 +345,16 @@ def read_pth_file(self, sitedir, name, known_paths): _trace(f"Error in {filename!r}, line {n:d}: {line!r}", exc) continue - # PEP 829 dedup: skip paths already seen in this batch. See - # _startup_state docstring above for batch lifetimes. + # PEP 829 dedup: skip paths already seen in this batch. if dircase in known_paths: _trace( f"In {filename!r}, line {n:d}: " f"skipping duplicate sys.path entry: {dir_}" ) else: - self._syspaths.setdefault(filename, []).append(dir_) + # Add this directory to the sys.path extension ledger, while + # also recording the .pth file it was found in. + self._path_entries.append((filename, dir_)) known_paths.add(dircase) def read_start_file(self, sitedir, name): @@ -355,18 +393,22 @@ def process(self): def _extend_syspath(self): # Duplicates have already been filtered (in existing sys.path or # across .pth files via known_paths), and entries are already - # abspath/normpath'd, so all that remains is to confirm the path - # exists. - for filename, dirs in self._syspaths.items(): - for dir_ in dirs: - if os.path.exists(dir_): + # abspath/normpath'd, so all that remains is to confirm that .pth + # file path entries exist before appending them. filename will be + # None for sitedir entries in the ledger, and these have already been + # checked for existence, so no need to do so again. + for filename, dir_ in self._path_entries: + if dir_ in sys.path: + continue + if filename is None or os.path.exists(dir_): + if filename is not None: _trace(f"Extending sys.path with {dir_} from {filename}") - sys.path.append(dir_) - else: - _print_error( - f"In {filename}: {dir_} does not exist; " - f"skipping sys.path append" - ) + sys.path.append(dir_) + else: + _print_error( + f"In {filename}: {dir_} does not exist; " + f"skipping sys.path append" + ) def _exec_imports(self): # For each `import` line we've seen in a .pth file, exec() it in @@ -435,26 +477,6 @@ def _execute_start_entrypoints(self): ) -def process_startup_files(): - """Flush any pending startup-file state accumulated during a batch. - - Used by main() (and any external caller that drove addsitedir() with - defer_processing_start_files=True) to apply the accumulated paths - and run the deferred import lines / entry points. - - Reentrancy: the active batch state is detached from _startup_state - *before* state.process() runs. This way, if an exec'd import line - or .start entry point itself calls site.addsitedir(), that call - creates its own per-call _StartupState rather than mutating the dicts - being iterated here. See gh-149504. - """ - global _startup_state - if _startup_state is None: - return - state, _startup_state = _startup_state, None - state.process() - - def addpackage(sitedir, name, known_paths): """Process a .pth file within the site-packages directory.""" if known_paths is None: @@ -463,101 +485,97 @@ def addpackage(sitedir, name, known_paths): else: reset = False - # If a batch is already in progress (for example, main() is still - # accumulating sitedirs), participate in the batch by writing into the - # shared _startup_state and letting the eventual process_startup_files() - # flush it. Otherwise this is a standalone call, so create a unique - # per-call state, populate it, and process it before returning. - if _startup_state is None: - state = _StartupState() - state.read_pth_file(sitedir, name, known_paths) - state.process() - else: - _startup_state.read_pth_file(sitedir, name, known_paths) + # Although never documented, the semantics of addpackage() is to fully + # process a single sitedir. + state = StartupState(known_paths) + state.read_pth_file(sitedir, name) + state.process() return None if reset else known_paths -def addsitedir(sitedir, known_paths=None, *, defer_processing_start_files=False): - """Add 'sitedir' argument to sys.path if missing and handle startup - files.""" - global _startup_state +def addsitedir(sitedir, known_paths=None, *, startup_state=None): + """Add a site directory and process its startup files. + + If *startup_state* is given, add the directory's startup data to that + state without processing it, and the caller is responsible for calling + startup_state.process(). Otherwise, process the directory eagerly. + The *known_paths* and *startup_state* arguments cannot both be given. + """ _trace(f"Adding directory: {sitedir!r}") - if known_paths is None: + if known_paths is not None and startup_state is not None: + raise TypeError("known_paths and startup_state are mutually exclusive") + + # Select the processing mode. known_paths is the deduplication ledger, + # reset controls the historical return value, flush_now says whether this + # call processes startup data eagerly, and process_known_sitedirs controls + # whether site directories already present in known_paths still have their + # startup files read. + if startup_state is not None: + # Explicit batch mode: accumulate startup data in the caller's state. + # The caller is responsible for calling startup_state.process(). + known_paths = startup_state._known_paths + reset = False + flush_now = False + process_known_sitedirs = True + elif known_paths is None: + # Standalone mode: derive known paths from current sys.path, process + # eagerly, and preserve the historical return value of None. known_paths = _init_pathinfo() reset = True + startup_state = StartupState(known_paths) + flush_now = True + process_known_sitedirs = False else: + # Legacy known_paths mode: process eagerly and return the caller's + # updated known_paths set. reset = False - sitedir, sitedircase = makepath(sitedir) - - # If the normcase'd new sitedir isn't already known, record it to - # prevent re-processing, append it to sys.path (only if not already - # present), and process all .pth and .start files found in that - # directory. Use a direct sys.path membership check for the append - # guard so that callers (like main()) can pass a fresh known_paths - # set while avoiding duplicate sys.path entries (gh-149819). - if sitedircase not in known_paths: - known_paths.add(sitedircase) - if sitedir not in sys.path: - sys.path.append(sitedir) + startup_state = StartupState(known_paths) + flush_now = True + process_known_sitedirs = False - try: - names = os.listdir(sitedir) - except OSError: - return None if reset else known_paths - - # Pick the _StartupState we'll write into. There are three cases: - # - # 1. A batch is already active (_startup_state is set, e.g. because - # main() previously called us with - # defer_processing_start_files=True). Participate in this batch by - # sharing the same state. Don't flush the state since the batch's - # eventual process_startup_files() will do that. - # - # 2. There is no active batch but the caller passed - # defer_processing_start_files=True. Preserve a fresh - # _StartupState into the global _startup_state so that subsequent - # addsitedir() calls participate in this batch, and so that the - # caller's later process_startup_files() finds it. - # - # 3. This is a standalone call (there is no active batch and - # defer_processing_start_files=False). Create a unique per-call - # state, populate it, process it, and then clear it. Per-call - # state is what makes reentrant addsitedir() safe; a recursive call - # from inside process() lands here too and gets its own independent - # state. - - if _startup_state is not None: - state = _startup_state - flush_now = False - elif defer_processing_start_files: - state = _startup_state = _StartupState() - flush_now = False - else: - state = _StartupState() - flush_now = True - - # The following phases are defined by PEP 829. - # Phases 1-3: Read .pth files, accumulating paths and import lines. - pth_names = sorted( - name for name in names - if name.endswith(".pth") and not name.startswith(".") - ) - for name in pth_names: - state.read_pth_file(sitedir, name, known_paths) - - # Phases 6-7: Discover .start files and accumulate their entry points. - # Import lines from .pth files with a matching .start file are - # discarded at flush time by _StartupState._exec_imports(). - start_names = sorted( - name for name in names - if name.endswith(".start") and not name.startswith(".") - ) - for name in start_names: - state.read_start_file(sitedir, name) + sitedir = startup_state.add_sitedir( + sitedir, + process_known_sitedirs=process_known_sitedirs, + ) + if sitedir is None: + if not flush_now: + return startup_state + return None if reset else known_paths + try: + names = os.listdir(sitedir) + except OSError: if flush_now: - state.process() + startup_state.process() + if not flush_now: + return startup_state + return None if reset else known_paths + + # The following phases are defined by PEP 829. + # Phases 1-3: Read .pth files, accumulating paths and import lines. + pth_names = sorted( + name for name in names + if name.endswith(".pth") and not name.startswith(".") + ) + for name in pth_names: + startup_state.read_pth_file(sitedir, name) + + # Phases 6-7: Discover .start files and accumulate their entry points. + # Import lines from .pth files with a matching .start file are + # discarded at flush time by StartupState._exec_imports(). + start_names = sorted( + name for name in names + if name.endswith(".start") and not name.startswith(".") + ) + for name in start_names: + startup_state.read_start_file(sitedir, name) + + if flush_now: + startup_state.process() + + if not flush_now: + return startup_state return None if reset else known_paths @@ -671,21 +689,29 @@ def getusersitepackages(): return USER_SITE -def addusersitepackages(known_paths, *, defer_processing_start_files=False): - """Add a per user site-package to sys.path - Each user has its own python directory with site-packages in the - home directory. +def addusersitepackages(known_paths, *, startup_state=None): + """Add the per-user site-packages directory, if enabled. + + The user site directory is added only when user site-packages are enabled + and the directory exists. If *startup_state* is given, the directory's + startup data is accumulated there for later processing, and the caller is + responsible for calling startup_state.process(); otherwise it is processed + eagerly. Return *known_paths*, updated with any paths added by addsitedir(). """ - # get the per user site-package path - # this call will also make sure USER_BASE and USER_SITE are set + # Get the per-user site directory. This call will also make sure + # $USER_BASE and $USER_SITE are set. _trace("Processing user site-packages") user_site = getusersitepackages() if ENABLE_USER_SITE and os.path.isdir(user_site): - addsitedir(user_site, known_paths, defer_processing_start_files=defer_processing_start_files) + if startup_state is None: + addsitedir(user_site, known_paths) + else: + addsitedir(user_site, startup_state=startup_state) return known_paths + def getsitepackages(prefixes=None): """Returns a list containing all global site-packages directories. @@ -725,15 +751,28 @@ def getsitepackages(prefixes=None): sitepackages.append(os.path.join(prefix, "Lib", "site-packages")) return sitepackages -def addsitepackages(known_paths, prefixes=None, *, defer_processing_start_files=False): - """Add site-packages to sys.path""" + +def addsitepackages(known_paths, prefixes=None, *, startup_state=None): + """Add global site-packages directories, if they exist. + + Site-packages directories are computed from *prefixes*, or from the global + prefixes when *prefixes* is None. If *startup_state* is given, each + directory's startup data is accumulated there for later processing, and the + caller is responsible for calling startup_state.process(); otherwise each + directory is processed eagerly. Return *known_paths*, updated with any + paths added by addsitedir(). + """ _trace("Processing global site-packages") for sitedir in getsitepackages(prefixes): if os.path.isdir(sitedir): - addsitedir(sitedir, known_paths, defer_processing_start_files=defer_processing_start_files) + if startup_state is None: + addsitedir(sitedir, known_paths) + else: + addsitedir(sitedir, startup_state=startup_state) return known_paths + def setquit(): """Define new builtins 'quit' and 'exit'. @@ -899,7 +938,7 @@ def write_history(): atexit.register(write_history) -def venv(known_paths): +def venv(known_paths, *, startup_state=None): global PREFIXES, ENABLE_USER_SITE env = os.environ @@ -944,7 +983,7 @@ def venv(known_paths): _warn(f'Unexpected value in sys.exec_prefix, expected {site_prefix}, got {sys.exec_prefix}', RuntimeWarning) # Doing this here ensures venv takes precedence over user-site - addsitepackages(known_paths, [sys.prefix]) + addsitepackages(known_paths, [sys.prefix], startup_state=startup_state) if system_site == "true": PREFIXES += [sys.base_prefix, sys.base_exec_prefix] @@ -1009,18 +1048,18 @@ def main(): # Fix __file__ of already imported modules too. abs_paths() - known_paths = venv(known_paths=set()) + known_paths = set() + startup_state = StartupState(known_paths) + known_paths = venv(known_paths, startup_state=startup_state) if ENABLE_USER_SITE is None: ENABLE_USER_SITE = check_enableusersite() - known_paths = addusersitepackages(known_paths, defer_processing_start_files=True) - known_paths = addsitepackages(known_paths, defer_processing_start_files=True) + known_paths = addusersitepackages(known_paths, startup_state=startup_state) + known_paths = addsitepackages(known_paths, startup_state=startup_state) # PEP 829: flush accumulated data from all .pth and .start files. # Paths are extended first, then deprecated import lines are exec'd, # and finally .start entry points are executed — ensuring sys.path is - # fully populated before any startup code runs. process_startup_files() - # also clears the pending state so a later addsitedir() call does - # not re-apply already-processed data. - process_startup_files() + # fully populated before any startup code runs. + startup_state.process() setquit() setcopyright() sethelper() diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index e2a81b82321ede..ad722ef5cf9222 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -187,20 +187,18 @@ def test_addsitedir(self): self.pth_file_tests(pth_file) def test_addsitedir_explicit_flush(self): - # addsitedir() reads .pth files and, with - # defer_processing_start_files=True, accumulates pending state - # without flushing. A subsequent process_startup_files() call - # then applies the paths and runs the import lines. + # addsitedir() reads .pth files and, with an explicit StartupState, + # accumulates pending state without flushing. A subsequent + # state.process() call then applies the paths and runs the import + # lines. pth_file = PthFile() # Ensure we have a clean slate. pth_file.cleanup(prep=True) with pth_file.create(): - # Pass defer_processing_start_files=True to prevent flushing. - site.addsitedir( - pth_file.base_dir, set(), - defer_processing_start_files=True) + state = site.StartupState(known_paths=set()) + site.addsitedir(pth_file.base_dir, startup_state=state) self.assertNotIn(pth_file.imported, sys.modules) - site.process_startup_files() + state.process() self.pth_file_tests(pth_file) def test_addsitedir_dotfile(self): @@ -915,16 +913,9 @@ class StartFileTests(unittest.TestCase): def setUp(self): self.enterContext(import_helper.DirsOnSysPath()) self.tmpdir = self.sitedir = self.enterContext(os_helper.temp_dir()) - # Each test gets its own _StartupState to drive the parser and - # processor methods directly. Defensively clear any _startup_state - # that a prior test may have left set via defer_processing_start_files - # without a corresponding process_startup_files() flush. - self.state = site._StartupState() - site._startup_state = None - self.addCleanup(self._reset_startup_state) - - def _reset_startup_state(self): - site._startup_state = None + # Each test gets its own StartupState to batch the parsing and + # explicitly invoke the processing. + self.state = site.StartupState() def _make_start(self, content, name='testpkg', basedir=None): """Write a .start file and return its basename. @@ -974,16 +965,17 @@ def _make_mod(self, contents, name='mod', *, package=False, on_path=False): sys.path.insert(0, extdir) return extdir - def _all_entrypoints(self): + def _all_entrypoints(self, state=None): """Flatten state._entrypoints into a list of (filename, entry) tuples.""" result = [] - for filename, entries in self.state._entrypoints.items(): + state = self.state if state is None else state + for filename, entries in state._entrypoints.items(): for entry in entries: result.append((filename, entry)) return result - def _just_entrypoints(self): - return [entry for filename, entry in self._all_entrypoints()] + def _just_entrypoints(self, state=None): + return [entry for filename, entry in self._all_entrypoints(state)] # There are two classes of tests here. Tests that start with `test_impl_` # know details about the implementation and they access non-public methods @@ -993,7 +985,21 @@ def _just_entrypoints(self): # integration semantics and functionality as a caller of the public # surfaces would see. - # --- _StartupState.read_start_file tests --- + # --- Basic StartupState implementation tests --- + + def test_impl_startupstate_defaults_to_sys_path(self): + sys.path.insert(0, self.sitedir) + state = site.StartupState() + self.assertIn(site.makepath(self.sitedir)[1], state._known_paths) + + def test_impl_startupstate_uses_supplied_known_paths(self): + known_paths = set() + state = site.StartupState(known_paths) + site.addsitedir(self.sitedir, startup_state=state) + self.assertIs(state._known_paths, known_paths) + self.assertIn(site.makepath(self.sitedir)[1], known_paths) + + # --- StartupState.read_start_file tests --- def test_impl_read_start_file_basic(self): self._make_start("os.path:join\n", name='foo') @@ -1120,7 +1126,7 @@ def test_impl_read_start_file_invalid_utf8_silently_skipped(self): self.assertEqual(self.state._entrypoints, {}) self.assertEqual(err.getvalue(), "") - # --- _StartupState.read_pth_file tests --- + # --- StartupState.read_pth_file tests --- def test_impl_read_pth_file_paths(self): subdir = os.path.join(self.sitedir, 'mylib') @@ -1128,7 +1134,7 @@ def test_impl_read_pth_file_paths(self): self._make_pth("mylib\n", name='foo') self.state.read_pth_file(self.sitedir, 'foo.pth', set()) fullname = os.path.join(self.sitedir, 'foo.pth') - self.assertIn(subdir, self.state._syspaths[fullname]) + self.assertIn((fullname, subdir), self.state._path_entries) def test_impl_read_pth_file_imports_collected(self): self._make_pth("import sys\n", name='foo') @@ -1141,7 +1147,7 @@ def test_impl_read_pth_file_imports_collected(self): def test_impl_read_pth_file_comments_and_blanks(self): self._make_pth("# comment\n\n \n", name='foo') self.state.read_pth_file(self.sitedir, 'foo.pth', set()) - self.assertEqual(self.state._syspaths, {}) + self.assertEqual(self.state._path_entries, []) self.assertEqual(self.state._importexecs, {}) def test_impl_read_pth_file_deduplication(self): @@ -1154,9 +1160,7 @@ def test_impl_read_pth_file_deduplication(self): self.state.read_pth_file(self.sitedir, 'a.pth', known_paths) self.state.read_pth_file(self.sitedir, 'b.pth', known_paths) # There is only one entry across both files. - all_dirs = [] - for dirs in self.state._syspaths.values(): - all_dirs.extend(dirs) + all_dirs = [dir_ for filename, dir_ in self.state._path_entries] self.assertEqual(all_dirs, [subdir]) def test_impl_read_pth_file_bad_line_continues(self): @@ -1167,7 +1171,7 @@ def test_impl_read_pth_file_bad_line_continues(self): with captured_stderr(): self.state.read_pth_file(self.sitedir, 'foo.pth', set()) fullname = os.path.join(self.sitedir, 'foo.pth') - self.assertIn(subdir, self.state._syspaths.get(fullname, [])) + self.assertIn((fullname, subdir), self.state._path_entries) def _flags_with_verbose(self, verbose): # Build a sys.flags clone with verbose overridden but every @@ -1222,9 +1226,9 @@ def test_impl_read_pth_file_locale_fallback(self): ): self.state.read_pth_file(self.sitedir, 'foo.pth', set()) fullname = os.path.join(self.sitedir, 'foo.pth') - self.assertIn(subdir, self.state._syspaths.get(fullname, [])) + self.assertIn((fullname, subdir), self.state._path_entries) - # --- _StartupState._execute_start_entrypoints tests --- + # --- StartupState._execute_start_entrypoints tests --- def test_impl_execute_entrypoints_with_callable(self): # An entry point with a callable. @@ -1304,7 +1308,7 @@ def bump(): import countmod self.assertEqual(countmod.call_count, 2) - # --- _StartupState._exec_imports tests --- + # --- StartupState._exec_imports tests --- def test_impl_exec_imports_suppressed_by_matching_start(self): # Import lines from foo.pth are suppressed when foo.start exists. @@ -1314,10 +1318,8 @@ def bump(): global call_count call_count += 1 """, name='countmod', package=False, on_path=True) - pth_fullname = os.path.join(self.sitedir, 'foo.pth') - start_fullname = os.path.join(self.sitedir, 'foo.start') - self.state._importexecs[pth_fullname] = ['import countmod; countmod.bump()'] - self.state._entrypoints[start_fullname] = ['os.path:join'] + self._make_start("os.path:join\n", name='foo') + self._make_pth("import countmod; countmod.bump()\n", name='foo') self.state._exec_imports() import countmod self.assertEqual(countmod.call_count, 0) @@ -1353,18 +1355,18 @@ def startup(): import epmod self.assertFalse(epmod.called) - # --- _StartupState._extend_syspath tests --- + # --- StartupState._extend_syspath tests --- def test_impl_extend_syspath_existing_dir(self): subdir = os.path.join(self.sitedir, 'extlib') os.mkdir(subdir) - self.state._syspaths['test.pth'] = [subdir] + self.state._path_entries.append(('test.pth', subdir)) self.state._extend_syspath() self.assertIn(subdir, sys.path) def test_impl_extend_syspath_nonexistent_dir(self): nonesuch = os.path.join(self.sitedir, 'nosuchdir') - self.state._syspaths['test.pth'] = [nonesuch] + self.state._path_entries.append(('test.pth', nonesuch)) with captured_stderr() as err: self.state._extend_syspath() self.assertNotIn(nonesuch, sys.path) @@ -1372,6 +1374,13 @@ def test_impl_extend_syspath_nonexistent_dir(self): # --- addsitedir integration tests --- + def test_addsitedir_rejects_known_paths_with_startup_state(self): + with self.assertRaises(TypeError): + site.addsitedir( + self.sitedir, + known_paths=set(), + startup_state=site.StartupState()) + def test_addsitedir_pth_import_skipped_when_matching_start_exists(self): # PEP 829: an empty .start file disables the matching .pth's import # lines, even when the .start has no entry points of its own. @@ -1415,137 +1424,143 @@ def test_addsitedir_dedups_paths_across_pth_files(self): def test_addsitedir_discovers_start_files(self): # addsitedir() should discover .start files and accumulate entries. - # With defer_processing_start_files=True the preserved state lives on - # site._startup_state and isn't flushed until the caller invokes - # process_startup_files(). self._make_start("os.path:join\n", name='foo') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) fullname = os.path.join(self.sitedir, 'foo.start') self.assertIn( - 'os.path:join', site._startup_state._entrypoints[fullname] + 'os.path:join', state._entrypoints[fullname] ) - def test_impl_exec_imports_skips_when_matching_start(self): - # When foo.start exists, import lines in foo.pth are skipped - # at flush time by _StartupState._exec_imports(). - self._make_start("os.path:join\n", name='foo') - self._make_pth("import sys\n", name='foo') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) - pth_fullname = os.path.join(self.sitedir, 'foo.pth') - start_fullname = os.path.join(self.sitedir, 'foo.start') - # Import line was collected... - self.assertIn( - 'import sys', - site._startup_state._importexecs.get(pth_fullname, []), - ) - # ...but _exec_imports() will skip it because foo.start exists. - site._startup_state._exec_imports() - def test_addsitedir_pth_paths_still_work_with_start(self): # Path lines in .pth files still work even when a .start file exists. subdir = os.path.join(self.sitedir, 'mylib') os.mkdir(subdir) self._make_start("os.path:join\n", name='foo') self._make_pth("mylib\n", name='foo') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) fullname = os.path.join(self.sitedir, 'foo.pth') - self.assertIn( - subdir, site._startup_state._syspaths.get(fullname, []) - ) + self.assertIn((fullname, subdir), state._path_entries) def test_addsitedir_start_alphabetical_order(self): # Multiple .start files are discovered alphabetically. - # _all_entrypoints() reads from self.state, so swap in the - # preserved batch state for the duration of the assertion. self._make_start("os.path:join\n", name='zzz') self._make_start("os.path:exists\n", name='aaa') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) - self.state = site._startup_state - all_entries = self._all_entrypoints() - entries = [entry for _, entry in all_entries] + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) + entries = self._just_entrypoints(state) idx_a = entries.index('os.path:exists') idx_z = entries.index('os.path:join') self.assertLess(idx_a, idx_z) - def test_addsitedir_pth_before_start(self): - # PEP 829: .pth files are scanned before .start files. - # Create a .pth and .start with the same basename; verify - # the .pth data is collected before .start data. + def test_addsitedir_pth_and_start(self): + # Create a .pth and .start with the same basename; verify both the + # .pth data and .start data is collected. subdir = os.path.join(self.sitedir, 'mylib') os.mkdir(subdir) self._make_pth("mylib\n", name='foo') self._make_start("os.path:join\n", name='foo') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) # Both should be collected. pth_fullname = os.path.join(self.sitedir, 'foo.pth') start_fullname = os.path.join(self.sitedir, 'foo.start') - self.assertIn( - subdir, site._startup_state._syspaths.get(pth_fullname, []) - ) + self.assertIn((pth_fullname, subdir), state._path_entries) self.assertIn( 'os.path:join', - site._startup_state._entrypoints.get(start_fullname, []), + state._entrypoints.get(start_fullname, []), ) def test_impl_addsitedir_skips_dotfile_start(self): - # .start files starting with '.' are skipped. Defer flushing so - # the preserved batch state stays inspectable on - # site._startup_state; otherwise process_startup_files() would - # detach and consume it regardless of whether the dotfile was - # picked up. + # .start files starting with '.' are skipped. + # This will create `.hidden.start`. self._make_start("os.path:join\n", name='.hidden') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) - self.assertEqual(site._startup_state._entrypoints, {}) + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) + self.assertEqual(state._entrypoints, {}) def test_addsitedir_standalone_flushes(self): - # When called with defer_processing_start_files=False (the - # default), addsitedir creates a per-call _StartupState and - # processes it before returning, so the caller sees the effect - # immediately. No batch state is left behind on - # site._startup_state. + # Standalone addsitedir creates a per-call StartupState and processes + # it before returning, so the caller sees the effect immediately. subdir = os.path.join(self.sitedir, 'flushlib') os.mkdir(subdir) self._make_pth("flushlib\n", name='foo') - site.addsitedir(self.sitedir) # known_paths=None + # No arguments means state is implied and processing is eager. + site.addsitedir(self.sitedir) self.assertIn(subdir, sys.path) - self.assertIsNone(site._startup_state) - def test_addsitedir_defer_does_not_flush(self): - # With defer_processing_start_files=True, addsitedir accumulates - # pending state but does not flush; sys.path is updated only when - # process_startup_files() is called explicitly. The accumulated - # state lives on the lazily-promoted site._startup_state. + def test_addsitedir_explicit_startup_state_does_not_flush(self): + # With an explicit StartupState, addsitedir accumulates pending state + # but does not flush it; sys.path is updated only when process() is + # called explicitly. subdir = os.path.join(self.sitedir, 'acclib') os.mkdir(subdir) self._make_pth("acclib\n", name='foo') - site.addsitedir( - self.sitedir, set(), - defer_processing_start_files=True, - ) + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) # Path is pending, not yet on sys.path. self.assertNotIn(subdir, sys.path) fullname = os.path.join(self.sitedir, 'foo.pth') - self.assertIn( - subdir, site._startup_state._syspaths.get(fullname, []) - ) + self.assertIn((fullname, subdir), state._path_entries) + + def test_addsitedir_startup_state_preserves_site_relative_order(self): + # As pointed out by @ncoghlan in + # https://github.com/python/cpython/issues/150228#issuecomment-4528614952 + # a subtle ordering change was inadvertently introduced where the + # interspersing of the sitedirs with the sys.path extensions they defined + # was lost during batch mode. You'd see all the sitedirs, then all path + # extensions. This test ensures that the old interspersing behavior + # has been restored. + # + # Let's start by creating two sitedirs, each with an extension directory + # which will be added to sys.path by .pth files in the respective sitedirs. + sitedir2 = self.enterContext(os_helper.temp_dir()) + extdir1 = os.path.join(self.sitedir, 'ext1') + extdir2 = os.path.join(sitedir2, 'ext2') + os.mkdir(extdir1) + os.mkdir(extdir2) + self._make_pth(extdir1 + "\n", name='one') + self._make_pth(extdir2 + "\n", name='two', basedir=sitedir2) + # Now create an explicit batch, add each sitedir, then process the + # entire batch. + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) + site.addsitedir(sitedir2, startup_state=state) + state.process() + # Ensure that on sys.path we see this interspersed order: + # [sitedir1, extdir1, sitedir2, extdir2] + indexes = [ + sys.path.index(path) for path in ( + self.sitedir, extdir1, sitedir2, extdir2 + )] + # If the index ordering is the same, we preserved the intersperse. + self.assertEqual(indexes, sorted(indexes)) + + def test_addsitedir_startup_state_paths_before_entrypoints(self): + # Ensure that sys.path extensions are available by the time + # .start file entry points are called. + extdir = self._make_mod("""\ +called = False +def hook(): + global called + called = True +""") + self.assertNotIn(extdir, sys.path) + self._make_pth("extdir\n", name='extlib') + self._make_start("mod:hook\n", name='extlib') + # Before the startup state is explicitly processed, neither + # the path extension is added, nor the entry point called. + state = site.StartupState(known_paths=set()) + site.addsitedir(self.sitedir, startup_state=state) + self.assertNotIn(extdir, sys.path) + self.assertNotIn('mod', sys.modules) + # After processing the batch, sys.path is extended and + # the entry point was called. + state.process() + self.assertIn(extdir, sys.path) + import mod + self.assertTrue(mod.called) def test_pth_path_is_available_to_start_entrypoint(self): # Core PEP 829 invariant: all .pth path extensions are applied to diff --git a/Misc/NEWS.d/next/Library/2026-05-27-11-18-36.gh-issue-150228.pNPiO-.rst b/Misc/NEWS.d/next/Library/2026-05-27-11-18-36.gh-issue-150228.pNPiO-.rst new file mode 100644 index 00000000000000..5b9399d46ac579 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-27-11-18-36.gh-issue-150228.pNPiO-.rst @@ -0,0 +1,7 @@ +:class:`site.StartupState` is publicly exposed to allow for :pep:`829` +explicit batch processing of startup configuration files. :func:`~site.addsitedir` +now takes an optional instance of this class so callers can control their +own batch processing. The previously added +:data:`!defer_processing_start_files` argument has been removed. The module +global function ``process_startup_files()`` was removed (use +:meth:`site.StartupState.process` instead).