Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
[1.0.4] - 2026-XX-XX
--------------------

**Features**

- CLI commands that load a tree sequence now read from stdin when the input
path argument is omitted. (:user:`chris-a-talbot`, :user:`jeromekelleher`,
:issue:`3468`, :pr:`3469`)

--------------------
[1.0.3] - 2026-05-14
--------------------
Expand Down
165 changes: 165 additions & 0 deletions python/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import io
import os
import subprocess
import sys
import tempfile
import unittest
Expand Down Expand Up @@ -310,6 +311,29 @@ def test_vcf_allow_position_zero(self, flags, expected):
assert args.tree_sequence == tree_sequence
assert args.allow_position_zero == expected

@pytest.mark.parametrize(
"cmd",
[
"info",
"trees",
"vcf",
"nodes",
"edges",
"sites",
"mutations",
"migrations",
"individuals",
"populations",
"provenances",
],
)
def test_tree_sequence_argument_optional(self, cmd):
# Omitting the positional argument selects stdin (tree_sequence is None);
# providing a path stores the path string.
parser = cli.get_tskit_parser()
assert parser.parse_args([cmd]).tree_sequence is None
assert parser.parse_args([cmd, "test.trees"]).tree_sequence == "test.trees"

def test_info_default_values(self):
parser = cli.get_tskit_parser()
cmd = "info"
Expand Down Expand Up @@ -560,6 +584,16 @@ def test_vcf(self):
assert len(stderr) == 0
self.verify_vcf(stdout)

def test_vcf_stdin(self):
# Omitting the path argument reads the tree sequence from stdin. The
# low-level loader requires a real file descriptor, so sys.stdin must be
# patched with an actual open binary file (not e.g. an io.BytesIO).
with open(self._tree_sequence_file, "rb") as f:
with mock.patch("sys.stdin", f):
stdout, stderr = capture_output(cli.tskit_main, ["vcf", "-0"])
assert len(stderr) == 0
self.verify_vcf(stdout)

def verify_info(self, ts, output_info):
assert str(ts) == output_info

Expand Down Expand Up @@ -642,3 +676,134 @@ def test_migrations(self):

def test_provenances(self):
self.verify("provenances")


@pytest.fixture(scope="module")
def treeseq_file(tmp_path_factory):
"""
A tree sequence dumped to file, containing migrations, mutations and
individuals so that every loading subcommand has something to output.
"""
ts = msprime.simulate(
length=1,
recombination_rate=2,
mutation_rate=2,
random_seed=1,
migration_matrix=[[0, 1], [1, 0]],
population_configurations=[msprime.PopulationConfiguration(5) for _ in range(2)],
record_migrations=True,
)
assert ts.num_migrations > 0
ts = tsutil.insert_random_ploidy_individuals(ts, samples_only=True)
path = tmp_path_factory.mktemp("tsk_cli_stdin") / "stdin.trees"
ts.dump(path)
return str(path)


# The loading subcommands and any extra flags they need to produce output.
STDIN_SUBCOMMANDS = [
["info"],
["trees"],
["vcf", "-0"],
["nodes"],
["edges"],
["sites"],
["mutations"],
["migrations"],
["individuals"],
["populations"],
["provenances"],
]


class TestStdin:
"""
Tests that reading from stdin (omitting the path argument) produces the same
output as loading from a file, for every loading subcommand.
"""

@pytest.mark.parametrize("subcommand", STDIN_SUBCOMMANDS)
def test_stdin_matches_file(self, treeseq_file, subcommand):
file_stdout, file_stderr = capture_output(
cli.tskit_main, [*subcommand, treeseq_file]
)
with open(treeseq_file, "rb") as f:
with mock.patch("sys.stdin", f):
stdin_stdout, stdin_stderr = capture_output(cli.tskit_main, subcommand)
assert file_stderr == ""
assert stdin_stderr == ""
assert len(file_stdout) > 0
assert stdin_stdout == file_stdout


class TestStdinErrors:
"""
Tests that errors loading from stdin are reported cleanly.
"""

def run_info_stdin(self, path):
with mock.patch("sys.exit", side_effect=TestException) as mocked_exit:
with open(path, "rb") as f:
with mock.patch("sys.stdin", f):
with pytest.raises(TestException):
capture_output(cli.tskit_main, ["info"])
return mocked_exit.call_args[0][0]

def test_empty_stdin(self, tmp_path):
path = tmp_path / "empty.trees"
path.write_bytes(b"")
assert self.run_info_stdin(path) == "Load error: End of file"

def test_garbage_stdin(self, tmp_path):
path = tmp_path / "garbage.trees"
path.write_bytes(b"not a tree sequence at all")
message = self.run_info_stdin(path)
assert message.startswith("Load error: File not in kastore format")

def test_truncated_stdin(self, tmp_path, treeseq_file):
path = tmp_path / "truncated.trees"
with open(treeseq_file, "rb") as f:
path.write_bytes(f.read(100))
message = self.run_info_stdin(path)
assert message.startswith("Load error: File not in kastore format")


class TestStdinSubprocess:
"""
End-to-end tests that feed the tree sequence through a real OS pipe. Unlike
the in-process tests (which mock sys.stdin with a seekable file), these
exercise the genuine non-seekable stdin path.
"""

def run_cli(self, args, input_bytes):
# Force UTF-8 stdout in the child so that commands printing unicode (e.g.
# the box-drawing characters from "info") don't fail when stdout is a
# pipe on platforms that default to a non-UTF-8 codec (e.g. Windows).
env = {**os.environ, "PYTHONIOENCODING": "utf-8"}
return subprocess.run(
[sys.executable, "-m", "tskit", *args],
input=input_bytes,
capture_output=True,
env=env,
)

@pytest.mark.parametrize("subcommand", [["info"], ["vcf", "-0"], ["nodes"]])
def test_stdin_pipe_matches_file(self, treeseq_file, subcommand):
with open(treeseq_file, "rb") as f:
ts_bytes = f.read()
stdin_result = self.run_cli(subcommand, ts_bytes)
file_result = self.run_cli([*subcommand, treeseq_file], b"")
assert stdin_result.returncode == 0
assert stdin_result.stderr == b""
assert len(stdin_result.stdout) > 0
assert stdin_result.stdout == file_result.stdout

def test_empty_pipe(self):
result = self.run_cli(["info"], b"")
assert result.returncode != 0
assert b"End of file" in result.stderr

def test_garbage_pipe(self):
result = self.run_cli(["info"], b"not a tree sequence")
assert result.returncode != 0
assert b"not in kastore format" in result.stderr
13 changes: 10 additions & 3 deletions python/tskit/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# Copyright (c) 2018-2025 Tskit Developers
# Copyright (c) 2018-2026 Tskit Developers
# Copyright (c) 2015-2018 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -45,9 +45,11 @@ def sys_exit(message):


def load_tree_sequence(path):
if path is None:
path = sys.stdin
try:
return tskit.load(path)
except OSError as e:
except (OSError, EOFError, tskit.FileFormatError) as e:
sys_exit(f"Load error: {e}")


Expand Down Expand Up @@ -134,7 +136,12 @@ def run_vcf(args):


def add_tree_sequence_argument(parser):
parser.add_argument("tree_sequence", help="The tskit tree sequence file")
parser.add_argument(
"tree_sequence",
help="The tskit tree sequence file. If not provided, read from stdin.",
default=None,
nargs="?",
)


def add_precision_argument(parser):
Expand Down
Loading