From c242612d90da21c33f68dcbc19076b3698a1b620 Mon Sep 17 00:00:00 2001 From: Badr Ezzir Date: Thu, 25 Jun 2026 13:24:49 +0100 Subject: [PATCH] ENG-55756 Fix data race in `CloneForSummary` by adding mutex protection for `Filters` accesses --- view/state.go | 6 ++-- view/state_race_test.go | 77 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 view/state_race_test.go diff --git a/view/state.go b/view/state.go index 370b9d38..83215e48 100644 --- a/view/state.go +++ b/view/state.go @@ -152,8 +152,8 @@ func (s *Statelet) IgnoreRead() { s.Ignore = true } -// CloneForSummary creates a lock-safe copy of Statelet state for summary/meta work. -// It intentionally does not copy mutex or lock-owner bookkeeping. +// CloneForSummary returns a copy of the Statelet for summary/meta work. +// Filters is read under filtersMu to avoid racing with AppendFilters. func (s *Statelet) CloneForSummary() *Statelet { if s == nil { return NewStatelet() @@ -179,9 +179,11 @@ func (s *Statelet) CloneForSummary() *Statelet { ret._columnNames = map[string]bool{} } + s.filtersMu.Lock() if len(s.Filters) > 0 { ret.Filters = append(predicate.Filters(nil), s.Filters...) } + s.filtersMu.Unlock() if len(s.Fields) > 0 { ret.Fields = append([]string(nil), s.Fields...) diff --git a/view/state_race_test.go b/view/state_race_test.go new file mode 100644 index 00000000..4caf41f6 --- /dev/null +++ b/view/state_race_test.go @@ -0,0 +1,77 @@ +package view + +import ( + "os" + "sync" + "testing" + + "github.com/viant/datly/view/state/predicate" +) + +// Reproduces the prod CloneForSummary segfault: the summary goroutine clones the +// Statelet (reads Filters) while the object-query goroutine grows Filters via +// AppendFilters. Reading Filters without filtersMu is a data race; a torn slice +// header (new len + nil ptr) faults at addr=0x0. + +const summaryCloneRaceIterations = 5000 + +// startAppendFiltersWriter runs the writer side: AppendFilters as called from +// service/reader/sql.go, repeatedly growing Filters to force reallocation. +func startAppendFiltersWriter(wg *sync.WaitGroup, statelet *Statelet) { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < summaryCloneRaceIterations; i++ { + statelet.AppendFilters(predicate.Filters{&predicate.Filter{Name: "accountID"}}) + } + }() +} + +// Fixed CloneForSummary vs AppendFilters: passes under -race. +func TestStatelet_SummaryClone_Fixed_NoRace(t *testing.T) { + statelet := NewStatelet() + + var wg sync.WaitGroup + startAppendFiltersWriter(&wg, statelet) + + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < summaryCloneRaceIterations; i++ { + _ = statelet.CloneForSummary() + } + }() + + wg.Wait() +} + +// Pre-fix unlocked read vs AppendFilters: fails under -race (the prod bug). +// Skipped by default; set REPRODUCE_STATELET_RACE=1 to run. +func TestStatelet_SummaryClone_Unsafe_ReproducesRace(t *testing.T) { + if os.Getenv("REPRODUCE_STATELET_RACE") == "" { + t.Skip("set REPRODUCE_STATELET_RACE=1 to run the intentional data-race reproduction") + } + + statelet := NewStatelet() + + var wg sync.WaitGroup + startAppendFiltersWriter(&wg, statelet) + + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < summaryCloneRaceIterations; i++ { + _ = unsafeCloneFilters(statelet) + } + }() + + wg.Wait() +} + +// unsafeCloneFilters reads Filters without filtersMu, mirroring the pre-fix bug. +func unsafeCloneFilters(s *Statelet) predicate.Filters { + if len(s.Filters) == 0 { + return nil + } + return append(predicate.Filters(nil), s.Filters...) +}