Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,20 @@ func (h *Handler) VirtToPhy(virtFrame, phyFrame []byte) (int, bool) {
return 0, false
}

// Bound the inner packet so the ciphertext + AEAD tag fit within phyFrame.
// Seal appends ptLen+Overhead() bytes after the Geneve header; an oversized
// inner packet would otherwise overflow phyFrame (when its capacity exceeds
// its length) or force Seal to silently reallocate onto the heap, so the
// ciphertext would not land in the frame udp.Encode/the caller transmits.
// Drop instead (APO-667).
if hdrLen+len(ipPacket)+txCipher.Overhead() > len(payload) {
slog.Debug("Dropping oversized inner packet for encap",
slog.Int("innerLen", len(ipPacket)),
slog.Int("payloadLen", len(payload)))
vnet.Stats.TXErrors.Add(1)
return 0, false
}

encryptedFrameLen := len(txCipher.Seal(payload[hdrLen:hdrLen], nonce, ipPacket, payload[:hdrLen]))

// Underlay source selection.
Expand Down
16 changes: 16 additions & 0 deletions inplace_transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,22 @@ func (h *Handler) VirtToPhyInPlace(buf []byte, off, length int) (int, int, bool)
// and the 16-byte tag lands in the tailroom immediately after. The AAD is
// the Geneve header we just marshalled directly in front.
ptLen := len(ipPacket)

// Bound the inner packet so the ciphertext + AEAD tag cannot be sealed past
// the end of buf. Seal writes ptLen+Overhead() bytes starting at ipStart; an
// oversized inner packet would otherwise overflow into the adjacent UMEM
// frame (when the caller's buf capacity spans it) or force Seal to silently
// reallocate onto the heap — leaving plaintext in the frame the descriptor
// still points at. Drop instead (APO-667).
if ipStart+ptLen+txCipher.Overhead() > len(buf) {
slog.Debug("Dropping oversized inner packet for in-place encap",
slog.Int("innerLen", ptLen),
slog.Int("ipStart", ipStart),
slog.Int("bufLen", len(buf)))
vnet.Stats.TXErrors.Add(1)
return dropWindowOffset, 0, false
}

aad := buf[geneveStart : geneveStart+hdrLen]
encryptedFrameLen := len(txCipher.Seal(buf[ipStart:ipStart], nonce, buf[ipStart:ipStart+ptLen], aad))

Expand Down
132 changes: 132 additions & 0 deletions oversized_inner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package icx

import (
"net/netip"
"testing"

"github.com/stretchr/testify/require"
"gvisor.dev/gvisor/pkg/tcpip/header"
)

// These tests are the APO-667 regression: the encap transforms must bound the inner
// IP packet so the ciphertext + AEAD tag cannot be sealed past the end of the
// destination buffer. The realistic overflow is by up to the tag length — an inner
// packet that nearly fills the frame leaves < tagLen bytes of tailroom, so the GCM
// tag spills past the frame edge. Each test gives the transform a buffer whose
// backing array continues past its length (the way a UMEM frame slice's backing
// array continues into the adjacent frame, and the shape an external EngineXfrm /
// vtep caller can pass) and plants a canary in that tail; the canary must remain
// untouched, i.e. the oversized packet must be dropped before Seal, not sealed over
// the neighbour.

// buildSizedInnerIPv4 builds a raw IPv4+UDP packet whose total length is exactly n
// bytes. Src/dst sit inside the test route prefixes so the encap trie lookup
// succeeds and execution reaches the seal-bound guard.
func buildSizedInnerIPv4(t *testing.T, src, dst netip.Addr, n int) []byte {
t.Helper()
require.GreaterOrEqual(t, n, header.IPv4MinimumSize+header.UDPMinimumSize,
"requested inner packet smaller than an IPv4+UDP header")
payload := make([]byte, n-header.IPv4MinimumSize-header.UDPMinimumSize)
pkt := buildInnerIPv4Packet(src, dst, payload)
require.Len(t, pkt, n)
return pkt
}

// TestVirtToPhyInPlaceDropsOversizedInner covers the UMEM (in-place) encap path.
func TestVirtToPhyInPlaceDropsOversizedInner(t *testing.T) {
env := newInplaceEnv(t, inplaceTestCase{layer3: true, payloadLen: 1})
tagLen := env.vnet.txCipher.Load().Overhead()

const (
frameLen = 2048
canaryLen = 64
off = 96 // realistic RX headroom (>= minInPlaceHeadroom)
)

backing := make([]byte, frameLen+canaryLen)
for i := frameLen; i < len(backing); i++ {
backing[i] = 0xAB
}
buf := backing[:frameLen] // len == frameLen, cap == frameLen+canaryLen

// ptLen is chosen so the inner packet still fits within the logical frame
// (off+ptLen <= frameLen) but the tag does not (off+ptLen+tagLen > frameLen).
ptLen := frameLen - off - tagLen + 8
require.LessOrEqual(t, off+ptLen, frameLen, "inner packet itself must fit in the frame")
require.Greater(t, off+ptLen+tagLen, frameLen, "tag must overflow the frame edge")

inner := buildSizedInnerIPv4(t, env.innerSrc, env.innerDst, ptLen)
copy(buf[off:], inner)

before := env.vnet.Stats.TXErrors.Load()
gotOff, gotLen, handled := env.h.VirtToPhyInPlace(buf, off, len(inner))

require.False(t, handled)
require.Equal(t, 0, gotLen, "oversized inner packet must be dropped")
require.Equal(t, dropWindowOffset, gotOff)
require.Equal(t, before+1, env.vnet.Stats.TXErrors.Load(), "drop must count as a TX error")

for i := frameLen; i < len(backing); i++ {
require.Equalf(t, byte(0xAB), backing[i],
"byte %d past the frame was overwritten — Seal overflowed into the adjacent UMEM frame", i)
}
}

// TestVirtToPhyInPlaceAcceptsMaxSizedInner pins the boundary: the largest inner
// packet whose ciphertext+tag exactly reaches the frame edge must still encapsulate
// (the guard rejects only strictly-past-the-edge, no off-by-one).
func TestVirtToPhyInPlaceAcceptsMaxSizedInner(t *testing.T) {
env := newInplaceEnv(t, inplaceTestCase{layer3: true, payloadLen: 1})
tagLen := env.vnet.txCipher.Load().Overhead()

const (
frameLen = 2048
off = 96
)
maxPtLen := frameLen - off - tagLen // off+maxPtLen+tagLen == frameLen exactly

buf := make([]byte, frameLen)
inner := buildSizedInnerIPv4(t, env.innerSrc, env.innerDst, maxPtLen)
copy(buf[off:], inner)

gotOff, gotLen, handled := env.h.VirtToPhyInPlace(buf, off, len(inner))
require.False(t, handled)
require.Greater(t, gotLen, 0, "the max-sized inner packet must still encapsulate")
require.GreaterOrEqual(t, gotOff, 0)
require.LessOrEqual(t, gotOff+gotLen, frameLen, "output must stay within the frame")
}

// TestVirtToPhyDropsOversizedInner covers the two-buffer (copy) encap path.
func TestVirtToPhyDropsOversizedInner(t *testing.T) {
env := newInplaceEnv(t, inplaceTestCase{layer3: true, payloadLen: 1})
tagLen := env.vnet.txCipher.Load().Overhead()

const (
phyLen = 2048
canaryLen = 64
)
payloadOff := header.EthernetMinimumSize + header.IPv4MinimumSize + header.UDPMinimumSize // IPv4 underlay

backing := make([]byte, phyLen+canaryLen)
for i := phyLen; i < len(backing); i++ {
backing[i] = 0xCD
}
phyFrame := backing[:phyLen] // len == phyLen, cap == phyLen+canaryLen

// Size the inner packet so hdr(32)+ptLen+tag just exceeds the available payload
// room (phyLen-payloadOff), forcing the tag past the frame edge.
ptLen := phyLen - payloadOff - geneveHdrLen - tagLen + 8
inner := buildSizedInnerIPv4(t, env.innerSrc, env.innerDst, ptLen)

before := env.vnet.Stats.TXErrors.Load()
gotLen, handled := env.h.VirtToPhy(inner, phyFrame)

require.False(t, handled)
require.Equal(t, 0, gotLen, "oversized inner packet must be dropped")
require.Equal(t, before+1, env.vnet.Stats.TXErrors.Load(), "drop must count as a TX error")

for i := phyLen; i < len(backing); i++ {
require.Equalf(t, byte(0xCD), backing[i],
"byte %d past phyFrame was overwritten — Seal overflowed the destination buffer", i)
}
}
6 changes: 6 additions & 0 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ func MTU(pathMTU int) int {
// Calculate Geneve header length with our full set of options.
func headerLength() int {
hdr := geneve.Header{
// NumOptions drives MarshalBinary's option loop; without it the two
// options below are silently skipped and the header marshals to its
// 8-byte base, undercounting encap overhead by 24 bytes and inflating
// the advertised MTU (APO-665). Must match the fixed 2-option header
// the datapath emits (geneveHdrLen = 32).
NumOptions: 2,
Options: [geneve.MaxOptions]geneve.Option{
{
Class: geneve.ClassExperimental,
Expand Down
37 changes: 37 additions & 0 deletions util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package icx

import (
"crypto/aes"
"testing"

"gvisor.dev/gvisor/pkg/tcpip/header"
)

// TestHeaderLengthCountsAllOptions is the APO-665 regression: headerLength() must
// marshal the full 2-option Geneve header (geneveHdrLen = 32 bytes), not the 8-byte
// base. The bug was a missing NumOptions on the header literal, which made
// MarshalBinary skip both options and report only the base length — undercounting
// encapsulation overhead by 24 bytes and inflating the MTU advertised by MTU().
func TestHeaderLengthCountsAllOptions(t *testing.T) {
got := headerLength()
if got != geneveHdrLen {
t.Fatalf("headerLength() = %d, want %d (the fixed 2-option header); "+
"encap overhead is undercounted by %d bytes", got, geneveHdrLen, geneveHdrLen-got)
}
}

// TestMTUDeductsFullEncapOverhead asserts MTU() subtracts the complete outer
// encapsulation stack — including the full 32-byte Geneve header — and rounds down
// to a whole AES block. Before the APO-665 fix the Geneve term was 8 instead of 32,
// so MTU() over-advertised by 24 bytes and the inner stack could emit packets whose
// encapsulated size exceeded the path MTU (fragmentation / black-holing).
func TestMTUDeductsFullEncapOverhead(t *testing.T) {
const pathMTU = 1500
overhead := header.IPv6MinimumSize + header.UDPMinimumSize + geneveHdrLen + aes.BlockSize
want := pathMTU - overhead
want -= want % aes.BlockSize

if got := MTU(pathMTU); got != want {
t.Fatalf("MTU(%d) = %d, want %d (deducting %d bytes of overhead)", pathMTU, got, want, overhead)
}
}
Loading