Update dependencies

2025-04-09 01:00:12 +01:00
parent f0641ffd6e
commit 5a9cfc022c
882 changed files with 68930 additions and 24201 deletions
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go
@@ -41,10 +41,12 @@ type AddressableEndpointState struct {
 	// AddressableEndpointState.mu
 	//   addressState.mu
 	mu addressableEndpointStateRWMutex `state:"nosave"`
+	// TODO(b/361075310): Enable s/r for the below fields.
+	//
 	// +checklocks:mu
-	endpoints map[tcpip.Address]*addressState
+	endpoints map[tcpip.Address]*addressState `state:"nosave"`
 	// +checklocks:mu
-	primary []*addressState
+	primary []*addressState `state:"nosave"`
 }

 // AddressableEndpointStateOptions contains options used to configure an
@@ -736,8 +738,6 @@ func (a *AddressableEndpointState) Cleanup() {
 var _ AddressEndpoint = (*addressState)(nil)

 // addressState holds state for an address.
-//
-// +stateify savable
 type addressState struct {
 	addressableEndpointState *AddressableEndpointState
 	addr                     tcpip.AddressWithPrefix
@@ -748,7 +748,7 @@ type addressState struct {
 	//
 	// AddressableEndpointState.mu
 	//   addressState.mu
-	mu   addressStateRWMutex `state:"nosave"`
+	mu   addressStateRWMutex
 	refs addressStateRefs
 	// checklocks:mu
 	kind AddressKind
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go
@@ -22,11 +22,28 @@ import (

 var _ NetworkLinkEndpoint = (*BridgeEndpoint)(nil)

+// +stateify savable
 type bridgePort struct {
 	bridge *BridgeEndpoint
 	nic    *nic
 }

+// BridgeFDBKey is the MAC address of a device which a bridge port is associated with.
+type BridgeFDBKey tcpip.LinkAddress
+
+// BridgeFDBEntry consists of all metadata for a FDB record.
+type BridgeFDBEntry struct {
+	port *bridgePort
+}
+
+// PortLinkAddress returns the mac address of the device that is bound to the bridge port.
+func (e BridgeFDBEntry) PortLinkAddress() tcpip.LinkAddress {
+	if e.port == nil {
+		return ""
+	}
+	return e.port.nic.LinkAddress()
+}
+
 // ParseHeader implements stack.LinkEndpoint.
 func (p *bridgePort) ParseHeader(pkt *PacketBuffer) bool {
 	_, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
@@ -36,23 +53,49 @@ func (p *bridgePort) ParseHeader(pkt *PacketBuffer) bool {
 // DeliverNetworkPacket implements stack.NetworkDispatcher.
 func (p *bridgePort) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
 	bridge := p.bridge
+	eth := header.Ethernet(pkt.LinkHeader().Slice())
+	updateFDB := false
 	bridge.mu.RLock()
-
-	// Send the packet to all other ports.
-	for _, port := range bridge.ports {
-		if p == port {
-			continue
+	// Add an entry at the bridge FDB, it maps a MAC address
+	// to a bridge port where the traffic is received when
+	// the MAC address is not multicast.
+	// Network packets that are sent to the learned MAC address
+	// will be forwarded to the bridge port that is stored in
+	// the FDB table.
+	sourceAddress := eth.SourceAddress()
+	if _, hasSourceFDB := bridge.fdbTable[BridgeFDBKey(sourceAddress)]; !header.IsMulticastEthernetAddress(sourceAddress) && !hasSourceFDB {
+		updateFDB = true
+	}
+	if entry, exist := bridge.fdbTable[BridgeFDBKey(eth.DestinationAddress())]; !exist {
+		// When no FDB entry is found, send the packet to all ports.
+		for _, port := range bridge.ports {
+			if p == port {
+				continue
+			}
+			newPkt := NewPacketBuffer(PacketBufferOptions{
+				ReserveHeaderBytes: int(port.nic.MaxHeaderLength()),
+				Payload:            pkt.ToBuffer(),
+			})
+			port.nic.writeRawPacket(newPkt)
+			newPkt.DecRef()
 		}
+	} else if entry.port != p {
+		destPort := entry.port
 		newPkt := NewPacketBuffer(PacketBufferOptions{
-			ReserveHeaderBytes: int(port.nic.MaxHeaderLength()),
+			ReserveHeaderBytes: int(destPort.nic.MaxHeaderLength()),
 			Payload:            pkt.ToBuffer(),
 		})
-		port.nic.writeRawPacket(newPkt)
+		destPort.nic.writeRawPacket(newPkt)
 		newPkt.DecRef()
 	}

 	d := bridge.dispatcher
 	bridge.mu.RUnlock()
+	if updateFDB {
+		bridge.mu.Lock()
+		bridge.addFDBEntryLocked(eth.SourceAddress(), p, 0)
+		bridge.mu.Unlock()
+	}
 	if d != nil {
 		// The dispatcher may acquire Stack.mu in DeliverNetworkPacket(), which is
 		// ordered above bridge.mu. So call DeliverNetworkPacket() without holding
@@ -71,12 +114,15 @@ func NewBridgeEndpoint(mtu uint32) *BridgeEndpoint {
 		addr: tcpip.GetRandMacAddr(),
 	}
 	b.ports = make(map[tcpip.NICID]*bridgePort)
+	b.fdbTable = make(map[BridgeFDBKey]BridgeFDBEntry)
 	return b
 }

 // BridgeEndpoint is a bridge endpoint.
+//
+// +stateify savable
 type BridgeEndpoint struct {
-	mu bridgeRWMutex
+	mu bridgeRWMutex `state:"nosave"`
 	// +checklocks:mu
 	ports map[tcpip.NICID]*bridgePort
 	// +checklocks:mu
@@ -86,7 +132,9 @@ type BridgeEndpoint struct {
 	// +checklocks:mu
 	attached bool
 	// +checklocks:mu
-	mtu             uint32
+	mtu uint32
+	// +checklocks:mu
+	fdbTable        map[BridgeFDBKey]BridgeFDBEntry
 	maxHeaderLength atomicbitops.Uint32
 }

@@ -140,6 +188,12 @@ func (b *BridgeEndpoint) DelNIC(nic *nic) tcpip.Error {
 	b.mu.Lock()
 	defer b.mu.Unlock()

+	port := b.ports[nic.id]
+	for k, e := range b.fdbTable {
+		if e.port == port {
+			delete(b.fdbTable, k)
+		}
+	}
 	delete(b.ports, nic.id)
 	nic.NetworkLinkEndpoint.Attach(nic)
 	return nil
@@ -169,8 +223,8 @@ func (b *BridgeEndpoint) MaxHeaderLength() uint16 {

 // LinkAddress implements stack.LinkEndpoint.LinkAddress.
 func (b *BridgeEndpoint) LinkAddress() tcpip.LinkAddress {
-	b.mu.Lock()
-	defer b.mu.Unlock()
+	b.mu.RLock()
+	defer b.mu.RUnlock()
 	return b.addr
 }

@@ -195,6 +249,7 @@ func (b *BridgeEndpoint) Attach(dispatcher NetworkDispatcher) {
 	}
 	b.dispatcher = dispatcher
 	b.ports = make(map[tcpip.NICID]*bridgePort)
+	b.fdbTable = make(map[BridgeFDBKey]BridgeFDBEntry)
 }

 // IsAttached implements stack.LinkEndpoint.IsAttached.
@@ -227,3 +282,25 @@ func (b *BridgeEndpoint) Close() {}

 // SetOnCloseAction implements stack.LinkEndpoint.Close.
 func (b *BridgeEndpoint) SetOnCloseAction(func()) {}
+
+// Add a new FDBEntry by learning. The learning happens when a packet
+// is received by a bridge port, the bridge will use the port for the future
+// deliveries to the NIC device.
+// The addr is the key when it looks for the entry.
+//
+// +checklocks:b.mu
+func (b *BridgeEndpoint) addFDBEntryLocked(addr tcpip.LinkAddress, source *bridgePort, flags uint64) bool {
+	// TODO(b/376924093): limit bridge FDB size.
+	b.fdbTable[BridgeFDBKey(addr)] = BridgeFDBEntry{
+		port: source,
+	}
+	return true
+}
+
+// FindFDBEntry find the FDB entry for the given address. If it doesn't exist,
+// it will return an empty entry.
+func (b *BridgeEndpoint) FindFDBEntry(addr tcpip.LinkAddress) BridgeFDBEntry {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.fdbTable[BridgeFDBKey(addr)]
+}
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/gro/gro.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/gro/gro.go
@@ -24,17 +24,16 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )

-// TODO(b/256037250): Enable by default.
-// TODO(b/256037250): We parse headers here. We should save those headers in
-// PacketBuffers so they don't have to be re-parsed later.
-// TODO(b/256037250): I still see the occasional SACK block in the zero-loss
-// benchmark, which should not happen.
-// TODO(b/256037250): Some dispatchers, e.g. XDP and RecvMmsg, can receive
-// multiple packets at a time. Even if the GRO interval is 0, there is an
-// opportunity for coalescing.
-// TODO(b/256037250): We're doing some header parsing here, which presents the
-// opportunity to skip it later.
-// TODO(b/256037250): Can we pass a packet list up the stack too?
+// There is room for improvement to the GRO engine:
+//   - We should save those headers in
+//     PacketBuffers so they don't have to be re-parsed later.
+//   - We still see the occasional SACK block in the zero-loss
+//     benchmark, which should not happen.
+//   - Some dispatchers, e.g. XDP and RecvMmsg, can receive
+//     multiple packets at a time. Even if the GRO interval is 0, there is an
+//     opportunity for coalescing.
+//   - We could pass a packet list up the stack to reduce traversals up the
+//     stack.

 const (
 	// groNBuckets is the number of GRO buckets.
@@ -50,6 +49,8 @@ const (
 )

 // A groBucket holds packets that are undergoing GRO.
+//
+// +stateify savable
 type groBucket struct {
 	// count is the number of packets in the bucket.
 	count int
@@ -265,6 +266,8 @@ func (gb *groBucket) found(gd *GRO, groPkt *groPacket, flushGROPkt bool, pkt *st

 // A groPacket is packet undergoing GRO. It may be several packets coalesced
 // together.
+//
+// +stateify savable
 type groPacket struct {
 	// groPacketEntry is an intrusive list.
 	groPacketEntry
@@ -303,6 +306,8 @@ func (pk *groPacket) payloadSize() int {
 }

 // GRO coalesces incoming packets to increase throughput.
+//
+// +stateify savable
 type GRO struct {
 	enabled bool
 	buckets [groNBuckets]groBucket
@@ -444,6 +449,7 @@ func (gd *GRO) dispatch6(pkt *stack.PacketBuffer) {
 		case header.IPv6HopByHopOptionsExtHdr:
 		case header.IPv6RoutingExtHdr:
 		case header.IPv6DestinationOptionsExtHdr:
+		case header.IPv6ExperimentExtHdr:
 		default:
 			// This is either a TCP header or something we can't handle.
 			ipHdrSize = int(it.HeaderOffset())
@@ -508,8 +514,7 @@ func (gd *GRO) dispatch6(pkt *stack.PacketBuffer) {
 }

 func (gd *GRO) bucketForPacket4(ipHdr header.IPv4, tcpHdr header.TCP) int {
-	// TODO(b/256037250): Use jenkins or checksum. Write a test to print
-	// distribution.
+	// It would be better to use jenkins or checksum.
 	var sum int
 	srcAddr := ipHdr.SourceAddress()
 	for _, val := range srcAddr.AsSlice() {
@@ -525,8 +530,7 @@ func (gd *GRO) bucketForPacket4(ipHdr header.IPv4, tcpHdr header.TCP) int {
 }

 func (gd *GRO) bucketForPacket6(ipHdr header.IPv6, tcpHdr header.TCP) int {
-	// TODO(b/256037250): Use jenkins or checksum. Write a test to print
-	// distribution.
+	// It would be better to use jenkins or checksum.
 	var sum int
 	srcAddr := ipHdr.SourceAddress()
 	for _, val := range srcAddr.AsSlice() {
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/gro/gro_state_autogen.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/gro/gro_state_autogen.go
@@ -8,6 +8,111 @@ import (
 	"gvisor.dev/gvisor/pkg/state"
 )

+func (gb *groBucket) StateTypeName() string {
+	return "pkg/tcpip/stack/gro.groBucket"
+}
+
+func (gb *groBucket) StateFields() []string {
+	return []string{
+		"count",
+		"packets",
+		"packetsPrealloc",
+		"allocIdxs",
+	}
+}
+
+func (gb *groBucket) beforeSave() {}
+
+// +checklocksignore
+func (gb *groBucket) StateSave(stateSinkObject state.Sink) {
+	gb.beforeSave()
+	stateSinkObject.Save(0, &gb.count)
+	stateSinkObject.Save(1, &gb.packets)
+	stateSinkObject.Save(2, &gb.packetsPrealloc)
+	stateSinkObject.Save(3, &gb.allocIdxs)
+}
+
+func (gb *groBucket) afterLoad(context.Context) {}
+
+// +checklocksignore
+func (gb *groBucket) StateLoad(ctx context.Context, stateSourceObject state.Source) {
+	stateSourceObject.Load(0, &gb.count)
+	stateSourceObject.Load(1, &gb.packets)
+	stateSourceObject.Load(2, &gb.packetsPrealloc)
+	stateSourceObject.Load(3, &gb.allocIdxs)
+}
+
+func (pk *groPacket) StateTypeName() string {
+	return "pkg/tcpip/stack/gro.groPacket"
+}
+
+func (pk *groPacket) StateFields() []string {
+	return []string{
+		"groPacketEntry",
+		"pkt",
+		"ipHdr",
+		"tcpHdr",
+		"initialLength",
+		"idx",
+	}
+}
+
+func (pk *groPacket) beforeSave() {}
+
+// +checklocksignore
+func (pk *groPacket) StateSave(stateSinkObject state.Sink) {
+	pk.beforeSave()
+	stateSinkObject.Save(0, &pk.groPacketEntry)
+	stateSinkObject.Save(1, &pk.pkt)
+	stateSinkObject.Save(2, &pk.ipHdr)
+	stateSinkObject.Save(3, &pk.tcpHdr)
+	stateSinkObject.Save(4, &pk.initialLength)
+	stateSinkObject.Save(5, &pk.idx)
+}
+
+func (pk *groPacket) afterLoad(context.Context) {}
+
+// +checklocksignore
+func (pk *groPacket) StateLoad(ctx context.Context, stateSourceObject state.Source) {
+	stateSourceObject.Load(0, &pk.groPacketEntry)
+	stateSourceObject.Load(1, &pk.pkt)
+	stateSourceObject.Load(2, &pk.ipHdr)
+	stateSourceObject.Load(3, &pk.tcpHdr)
+	stateSourceObject.Load(4, &pk.initialLength)
+	stateSourceObject.Load(5, &pk.idx)
+}
+
+func (gd *GRO) StateTypeName() string {
+	return "pkg/tcpip/stack/gro.GRO"
+}
+
+func (gd *GRO) StateFields() []string {
+	return []string{
+		"enabled",
+		"buckets",
+		"Dispatcher",
+	}
+}
+
+func (gd *GRO) beforeSave() {}
+
+// +checklocksignore
+func (gd *GRO) StateSave(stateSinkObject state.Sink) {
+	gd.beforeSave()
+	stateSinkObject.Save(0, &gd.enabled)
+	stateSinkObject.Save(1, &gd.buckets)
+	stateSinkObject.Save(2, &gd.Dispatcher)
+}
+
+func (gd *GRO) afterLoad(context.Context) {}
+
+// +checklocksignore
+func (gd *GRO) StateLoad(ctx context.Context, stateSourceObject state.Source) {
+	stateSourceObject.Load(0, &gd.enabled)
+	stateSourceObject.Load(1, &gd.buckets)
+	stateSourceObject.Load(2, &gd.Dispatcher)
+}
+
 func (l *groPacketList) StateTypeName() string {
 	return "pkg/tcpip/stack/gro.groPacketList"
 }
@@ -65,6 +170,9 @@ func (e *groPacketEntry) StateLoad(ctx context.Context, stateSourceObject state.
 }

 func init() {
+	state.Register((*groBucket)(nil))
+	state.Register((*groPacket)(nil))
+	state.Register((*GRO)(nil))
 	state.Register((*groPacketList)(nil))
 	state.Register((*groPacketEntry)(nil))
 }
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go
@@ -335,9 +335,9 @@ func (it *IPTables) shouldSkipOrPopulateTables(tables []checkTable, pkt *PacketB
 // This is called in the hot path even when iptables are disabled, so we ensure
 // that it does not allocate. Note that called functions (e.g.
 // getConnAndUpdate) can allocate.
-// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add.
+// +checkescape
 func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndpoint, inNicName string) bool {
-	tables := [...]checkTable{
+	tables := [...]checkTable{ // escapes: on arm this causes an allocation.
 		{
 			fn:      check,
 			tableID: MangleID,
@@ -373,9 +373,9 @@ func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndp
 // This is called in the hot path even when iptables are disabled, so we ensure
 // that it does not allocate. Note that called functions (e.g.
 // getConnAndUpdate) can allocate.
-// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add.
+// +checkescape
 func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool {
-	tables := [...]checkTable{
+	tables := [...]checkTable{ // escapes: on arm this causes an allocation.
 		{
 			fn:      checkNAT,
 			tableID: NATID,
@@ -413,9 +413,9 @@ func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool {
 // This is called in the hot path even when iptables are disabled, so we ensure
 // that it does not allocate. Note that called functions (e.g.
 // getConnAndUpdate) can allocate.
-// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add.
+// +checkescape
 func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string) bool {
-	tables := [...]checkTable{
+	tables := [...]checkTable{ // escapes: on arm this causes an allocation.
 		{
 			fn:      check,
 			tableID: FilterID,
@@ -445,9 +445,9 @@ func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string
 // This is called in the hot path even when iptables are disabled, so we ensure
 // that it does not allocate. Note that called functions (e.g.
 // getConnAndUpdate) can allocate.
-// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add.
+// +checkescape
 func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string) bool {
-	tables := [...]checkTable{
+	tables := [...]checkTable{ // escapes: on arm this causes an allocation.
 		{
 			fn:      check,
 			tableID: MangleID,
@@ -489,9 +489,9 @@ func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string)
 // This is called in the hot path even when iptables are disabled, so we ensure
 // that it does not allocate. Note that called functions (e.g.
 // getConnAndUpdate) can allocate.
-// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add.
+// +checkescape
 func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, outNicName string) bool {
-	tables := [...]checkTable{
+	tables := [...]checkTable{ // escapes: on arm this causes an allocation.
 		{
 			fn:      check,
 			tableID: MangleID,
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go
@@ -29,6 +29,8 @@ const (
 )

 // NeighborEntry describes a neighboring device in the local network.
+//
+// +stateify savable
 type NeighborEntry struct {
 	Addr      tcpip.Address
 	LinkAddr  tcpip.LinkAddress
@@ -76,17 +78,38 @@ const (
 	Unreachable
 )

+// +stateify savable
 type timer struct {
 	// done indicates to the timer that the timer was stopped.
 	done *bool

-	timer tcpip.Timer
+	timer tcpip.Timer `state:"nosave"`
+}
+
+// +stateify savable
+type neighborEntryMu struct {
+	neighborEntryRWMutex `state:"nosave"`
+
+	neigh NeighborEntry
+
+	// done is closed when address resolution is complete. It is nil iff s is
+	// incomplete and resolution is not yet in progress.
+	done chan struct{} `state:"nosave"`
+
+	// onResolve is called with the result of address resolution.
+	onResolve []func(LinkResolutionResult) `state:"nosave"`
+
+	isRouter bool
+
+	timer timer
 }

 // neighborEntry implements a neighbor entry's individual node behavior, as per
 // RFC 4861 section 7.3.3. Neighbor Unreachability Detection operates in
 // parallel with the sending of packets to a neighbor, necessitating the
 // entry's lock to be acquired for all operations.
+//
+// +stateify savable
 type neighborEntry struct {
 	neighborEntryEntry

@@ -95,22 +118,7 @@ type neighborEntry struct {
 	// nudState points to the Neighbor Unreachability Detection configuration.
 	nudState *NUDState

-	mu struct {
-		neighborEntryRWMutex
-
-		neigh NeighborEntry
-
-		// done is closed when address resolution is complete. It is nil iff s is
-		// incomplete and resolution is not yet in progress.
-		done chan struct{}
-
-		// onResolve is called with the result of address resolution.
-		onResolve []func(LinkResolutionResult)
-
-		isRouter bool
-
-		timer timer
-	}
+	mu neighborEntryMu
 }

 // newNeighborEntry creates a neighbor cache entry starting at the default
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go
@@ -90,6 +90,10 @@ type nic struct {

 	// Primary is the main controlling interface in a bonded setup.
 	Primary *nic
+
+	// experimentIPOptionEnabled indicates whether the NIC supports the
+	// experiment IP option.
+	experimentIPOptionEnabled bool
 }

 // makeNICStats initializes the NIC statistics and associates them to the global
@@ -103,7 +107,7 @@ func makeNICStats(global tcpip.NICStats) sharedStats {

 // +stateify savable
 type packetEndpointList struct {
-	mu packetEndpointListRWMutex
+	mu packetEndpointListRWMutex `state:"nosave"`

 	// eps is protected by mu, but the contained PacketEndpoint values are not.
 	//
@@ -188,6 +192,7 @@ func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *nic
 		duplicateAddressDetectors: make(map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector),
 		qDisc:                     qDisc,
 		deliverLinkPackets:        opts.DeliverLinkPackets,
+		experimentIPOptionEnabled: opts.EnableExperimentIPOption,
 	}
 	nic.linkResQueue.init(nic)

@@ -1095,6 +1100,12 @@ func (n *nic) multicastForwarding(protocol tcpip.NetworkProtocolNumber) (bool, t
 	return ep.MulticastForwarding(), nil
 }

+// GetExperimentIPOptionEnabled returns whether the NIC is responsible for
+// passing the experiment IP option.
+func (n *nic) GetExperimentIPOptionEnabled() bool {
+	return n.experimentIPOptionEnabled
+}
+
 // CoordinatorNIC represents NetworkLinkEndpoint that can join multiple network devices.
 type CoordinatorNIC interface {
 	// AddNIC adds the specified NIC device.
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go
@@ -381,6 +381,7 @@ func (pk *PacketBuffer) Clone() *PacketBuffer {
 	newPk.Hash = pk.Hash
 	newPk.Owner = pk.Owner
 	newPk.GSOOptions = pk.GSOOptions
+	newPk.EgressRoute = pk.EgressRoute
 	newPk.NetworkProtocolNumber = pk.NetworkProtocolNumber
 	newPk.dnatDone = pk.dnatDone
 	newPk.snatDone = pk.snatDone
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go
@@ -33,9 +33,8 @@ type pendingPacket struct {
 	pkt       *PacketBuffer
 }

-// +stateify savable
 type packetsPendingLinkResolutionMu struct {
-	packetsPendingLinkResolutionMutex `state:"nosave"`
+	packetsPendingLinkResolutionMutex

 	// The packets to send once the resolver completes.
 	//
@@ -56,7 +55,7 @@ type packetsPendingLinkResolutionMu struct {
 // +stateify savable
 type packetsPendingLinkResolution struct {
 	nic *nic
-	mu  packetsPendingLinkResolutionMu
+	mu  packetsPendingLinkResolutionMu `state:"nosave"`
 }

 func (f *packetsPendingLinkResolution) incrementOutgoingPacketErrors(pkt *PacketBuffer) {
@@ -150,7 +149,7 @@ func (f *packetsPendingLinkResolution) enqueue(r *Route, pkt *PacketBuffer) tcpi
 	packets, ok := f.mu.packets[ch]
 	packets = append(packets, pendingPacket{
 		routeInfo: routeInfo,
-		pkt:       pkt.IncRef(),
+		pkt:       pkt.Clone(),
 	})

 	if len(packets) > maxPendingPacketsPerResolution {
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go
@@ -162,7 +162,7 @@ type PacketEndpoint interface {
 	// match the endpoint.
 	//
 	// Implementers should treat packet as immutable and should copy it
-	// before before modification.
+	// before modification.
 	//
 	// linkHeader may have a length of 0, in which case the PacketEndpoint
 	// should construct its own ethernet header for applications.
@@ -171,6 +171,67 @@ type PacketEndpoint interface {
 	HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 }

+// MappablePacketEndpoint is a packet endpoint that supports forwarding its
+// packets to a PacketMMapEndpoint.
+type MappablePacketEndpoint interface {
+	PacketEndpoint
+
+	// GetPacketMMapOpts returns the options for initializing a PacketMMapEndpoint
+	// for this endpoint.
+	GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) PacketMMapOpts
+
+	// SetPacketMMapEndpoint sets the PacketMMapEndpoint for this endpoint. All
+	// packets received by this endpoint will be forwarded to the provided
+	// PacketMMapEndpoint.
+	SetPacketMMapEndpoint(ep PacketMMapEndpoint)
+
+	// GetPacketMMapEndpoint returns the PacketMMapEndpoint for this endpoint or
+	// nil if there is none.
+	GetPacketMMapEndpoint() PacketMMapEndpoint
+
+	// HandlePacketMMapCopy is a function that is called when a packet received is
+	// too large for the buffer size specified for the memory mapped endpoint. In
+	// this case, the packet is copied and passed to the original packet endpoint.
+	HandlePacketMMapCopy(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
+}
+
+// PacketMMapOpts are the options for initializing a PacketMMapEndpoint.
+//
+// +stateify savable
+type PacketMMapOpts struct {
+	Req            *tcpip.TpacketReq
+	IsRx           bool
+	Cooked         bool
+	Stack          *Stack
+	Stats          *tcpip.TransportEndpointStats
+	Wq             *waiter.Queue
+	NICID          tcpip.NICID
+	NetProto       tcpip.NetworkProtocolNumber
+	PacketEndpoint MappablePacketEndpoint
+}
+
+// PacketMMapEndpoint is the interface implemented by endpoints to handle memory
+// mapped packets over the packet transport protocol (PACKET_MMAP).
+type PacketMMapEndpoint interface {
+	// HandlePacket is called by the stack when new packets arrive that
+	// match the endpoint.
+	//
+	// Implementers should treat packet as immutable and should copy it
+	// before modification.
+	//
+	// linkHeader may have a length of 0, in which case the PacketEndpoint
+	// should construct its own ethernet header for applications.
+	//
+	// HandlePacket may modify pkt.
+	HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
+
+	// Close releases any resources associated with the endpoint.
+	Close()
+
+	// Readiness returns the events that the endpoint is ready for.
+	Readiness(mask waiter.EventMask) waiter.EventMask
+}
+
 // UnknownDestinationPacketDisposition enumerates the possible return values from
 // HandleUnknownDestinationPacket().
 type UnknownDestinationPacketDisposition int
@@ -244,6 +305,9 @@ type TransportProtocol interface {
 	// previously paused by Pause.
 	Resume()

+	// Restore starts any protocol level background workers during restore.
+	Restore()
+
 	// Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does
 	// neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() <
 	// MinimumPacketSize()
@@ -319,6 +383,10 @@ type NetworkHeaderParams struct {

 	// DF indicates whether the DF bit should be set.
 	DF bool
+
+	// ExperimentOptionValue is a 16 bit value that is set for the IP experiment
+	// option headers if it is not zero.
+	ExperimentOptionValue uint16
 }

 // GroupAddressableEndpoint is an endpoint that supports group addressing.
@@ -1142,7 +1210,7 @@ type NetworkLinkEndpoint interface {
 	// Close is called when the endpoint is removed from a stack.
 	Close()

-	// SetOnCloseAction sets the action that will be exected before closing the
+	// SetOnCloseAction sets the action that will be executed before closing the
 	// endpoint. It is used to destroy a network device when its endpoint
 	// is closed. Endpoints that are closed only after destroying their
 	// network devices can implement this method as no-op.
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go
@@ -0,0 +1,29 @@
+// Copyright 2024 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stack
+
+import (
+	"context"
+	"math/rand"
+	"time"
+
+	cryptorand "gvisor.dev/gvisor/pkg/rand"
+)
+
+// afterLoad is invoked by stateify.
+func (s *Stack) afterLoad(context.Context) {
+	s.insecureRNG = rand.New(rand.NewSource(time.Now().UnixNano()))
+	s.secureRNG = cryptorand.RNGFrom(cryptorand.Reader)
+}
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go
@@ -20,11 +20,11 @@
 package stack

 import (
+	"context"
 	"encoding/binary"
 	"fmt"
 	"io"
 	"math/rand"
-	"sync/atomic"
 	"time"

 	"golang.org/x/time/rate"
@@ -90,16 +90,16 @@ type Stack struct {

 	// routeTable is a list of routes sorted by prefix length, longest (most specific) first.
 	// +checklocks:routeMu
-	routeTable tcpip.RouteList
+	routeTable tcpip.RouteList `state:"nosave"`

 	mu stackRWMutex `state:"nosave"`
 	// +checklocks:mu
-	nics map[tcpip.NICID]*nic
+	nics map[tcpip.NICID]*nic `state:"nosave"`
 	// +checklocks:mu
 	defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{}

 	// nicIDGen is used to generate NIC IDs.
-	nicIDGen atomicbitops.Int32
+	nicIDGen atomicbitops.Int32 `state:"nosave"`

 	// cleanupEndpointsMu protects cleanupEndpoints.
 	cleanupEndpointsMu cleanupEndpointsMutex `state:"nosave"`
@@ -108,11 +108,6 @@ type Stack struct {

 	*ports.PortManager

-	// If not nil, then any new endpoints will have this probe function
-	// invoked everytime they receive a TCP segment.
-	// TODO(b/341946753): Restore them when netstack is savable.
-	tcpProbeFunc atomic.Value `state:"nosave"` // TCPProbeFunc
-
 	// clock is used to generate user-visible times.
 	clock tcpip.Clock

@@ -150,11 +145,9 @@ type Stack struct {
 	// randomGenerator is an injectable pseudo random generator that can be
 	// used when a random number is required. It must not be used in
 	// security-sensitive contexts.
-	// TODO(b/341946753): Restore them when netstack is savable.
 	insecureRNG *rand.Rand `state:"nosave"`

 	// secureRNG is a cryptographically secure random number generator.
-	// TODO(b/341946753): Restore them when netstack is savable.
 	secureRNG cryptorand.RNG `state:"nosave"`

 	// sendBufferSize holds the min/default/max send buffer sizes for
@@ -180,6 +173,9 @@ type Stack struct {
 	// tsOffsetSecret is the secret key for generating timestamp offsets
 	// initialized at stack startup.
 	tsOffsetSecret uint32
+
+	// saveRestoreEnabled indicates whether the stack is saved and restored.
+	saveRestoreEnabled bool
 }

 // NetworkProtocolFactory instantiates a network protocol.
@@ -779,23 +775,27 @@ func (s *Stack) addRouteLocked(route *tcpip.Route) {
 	s.routeTable.PushBack(route)
 }

-// RemoveRoutes removes matching routes from the route table.
-func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) {
+// RemoveRoutes removes matching routes from the route table, it
+// returns the number of routes that are removed.
+func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) int {
 	s.routeMu.Lock()
 	defer s.routeMu.Unlock()

-	s.removeRoutesLocked(match)
+	return s.removeRoutesLocked(match)
 }

 // +checklocks:s.routeMu
-func (s *Stack) removeRoutesLocked(match func(tcpip.Route) bool) {
+func (s *Stack) removeRoutesLocked(match func(tcpip.Route) bool) int {
+	count := 0
 	for route := s.routeTable.Front(); route != nil; {
 		next := route.Next()
 		if match(*route) {
 			s.routeTable.Remove(route)
+			count++
 		}
 		route = next
 	}
+	return count
 }

 // ReplaceRoute replaces the route in the routing table which matchse
@@ -878,6 +878,10 @@ type NICOptions struct {
 	// DeliverLinkPackets specifies whether the NIC is responsible for
 	// delivering raw packets to packet sockets.
 	DeliverLinkPackets bool
+
+	// EnableExperimentIPOption specifies whether the NIC is responsible for
+	// passing the experiment IP option.
+	EnableExperimentIPOption bool
 }

 // GetNICByID return a network device associated with the specified ID.
@@ -1049,7 +1053,10 @@ func (s *Stack) SetNICCoordinator(id tcpip.NICID, mid tcpip.NICID) tcpip.Error {
 	if !ok {
 		return &tcpip.ErrUnknownNICID{}
 	}
-
+	// Setting a coordinator for a coordinator NIC is not allowed.
+	if _, ok := nic.NetworkLinkEndpoint.(CoordinatorNIC); ok {
+		return &tcpip.ErrNoSuchFile{}
+	}
 	m, ok := s.nics[mid]
 	if !ok {
 		return &tcpip.ErrUnknownNICID{}
@@ -1959,6 +1966,36 @@ func (s *Stack) Pause() {
 	}
 }

+func (s *Stack) getNICs() map[tcpip.NICID]*nic {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	nics := s.nics
+	return nics
+}
+
+// ReplaceConfig replaces config in the loaded stack.
+func (s *Stack) ReplaceConfig(st *Stack) {
+	if st == nil {
+		panic("stack.Stack cannot be nil when netstack s/r is enabled")
+	}
+
+	// Update route table.
+	s.SetRouteTable(st.GetRouteTable())
+
+	// Update NICs.
+	nics := st.getNICs()
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.nics = make(map[tcpip.NICID]*nic)
+	for id, nic := range nics {
+		nic.stack = s
+		s.nics[id] = nic
+		_ = s.NextNICID()
+	}
+	s.tables = st.tables
+}
+
 // Restore restarts the stack after a restore. This must be called after the
 // entire system has been restored.
 func (s *Stack) Restore() {
@@ -1967,13 +2004,18 @@ func (s *Stack) Restore() {
 	s.mu.Lock()
 	eps := s.restoredEndpoints
 	s.restoredEndpoints = nil
+	saveRestoreEnabled := s.saveRestoreEnabled
 	s.mu.Unlock()
 	for _, e := range eps {
 		e.Restore(s)
 	}
 	// Now resume any protocol level background workers.
 	for _, p := range s.transportProtocols {
-		p.proto.Resume()
+		if saveRestoreEnabled {
+			p.proto.Restore()
+		} else {
+			p.proto.Resume()
+		}
 	}
 }

@@ -2102,41 +2144,6 @@ func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) Tra
 	return nil
 }

-// AddTCPProbe installs a probe function that will be invoked on every segment
-// received by a given TCP endpoint. The probe function is passed a copy of the
-// TCP endpoint state before and after processing of the segment.
-//
-// NOTE: TCPProbe is added only to endpoints created after this call. Endpoints
-// created prior to this call will not call the probe function.
-//
-// Further, installing two different probes back to back can result in some
-// endpoints calling the first one and some the second one. There is no
-// guarantee provided on which probe will be invoked. Ideally this should only
-// be called once per stack.
-func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
-	s.tcpProbeFunc.Store(probe)
-}
-
-// GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
-// otherwise.
-func (s *Stack) GetTCPProbe() TCPProbeFunc {
-	p := s.tcpProbeFunc.Load()
-	if p == nil {
-		return nil
-	}
-	return p.(TCPProbeFunc)
-}
-
-// RemoveTCPProbe removes an installed TCP probe.
-//
-// NOTE: This only ensures that endpoints created after this call do not
-// have a probe attached. Endpoints already created will continue to invoke
-// TCP probe.
-func (s *Stack) RemoveTCPProbe() {
-	// This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics.
-	s.tcpProbeFunc.Store(TCPProbeFunc(nil))
-}
-
 // JoinGroup joins the given multicast group on the given NIC.
 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
 	s.mu.RLock()
@@ -2399,3 +2406,32 @@ func (s *Stack) SetNICStack(id tcpip.NICID, peer *Stack) (tcpip.NICID, tcpip.Err
 	id = tcpip.NICID(peer.NextNICID())
 	return id, peer.CreateNICWithOptions(id, ne, NICOptions{Name: nic.Name()})
 }
+
+// EnableSaveRestore marks the saveRestoreEnabled to true.
+func (s *Stack) EnableSaveRestore() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.saveRestoreEnabled = true
+}
+
+// IsSaveRestoreEnabled returns true if save restore is enabled for the stack.
+func (s *Stack) IsSaveRestoreEnabled() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	return s.saveRestoreEnabled
+}
+
+// contextID is this package's type for context.Context.Value keys.
+type contextID int
+
+const (
+	// CtxRestoreStack is a Context.Value key for the stack to be used in restore.
+	CtxRestoreStack contextID = iota
+)
+
+// RestoreStackFromContext returns the stack to be used during restore.
+func RestoreStackFromContext(ctx context.Context) *Stack {
+	return ctx.Value(CtxRestoreStack).(*Stack)
+}
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go
--- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/tcp.go
+++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/tcp.go
@@ -1,494 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package stack
-
-import (
-	"context"
-	"time"
-
-	"gvisor.dev/gvisor/pkg/atomicbitops"
-	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
-	"gvisor.dev/gvisor/pkg/tcpip/internal/tcp"
-	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
-)
-
-// contextID is this package's type for context.Context.Value keys.
-type contextID int
-
-const (
-	// CtxRestoreStack is a Context.Value key for the stack to be used in restore.
-	CtxRestoreStack contextID = iota
-)
-
-// RestoreStackFromContext returns the stack to be used during restore.
-func RestoreStackFromContext(ctx context.Context) *Stack {
-	return ctx.Value(CtxRestoreStack).(*Stack)
-}
-
-// TCPProbeFunc is the expected function type for a TCP probe function to be
-// passed to stack.AddTCPProbe.
-type TCPProbeFunc func(s *TCPEndpointState)
-
-// TCPCubicState is used to hold a copy of the internal cubic state when the
-// TCPProbeFunc is invoked.
-//
-// +stateify savable
-type TCPCubicState struct {
-	// WLastMax is the previous wMax value.
-	WLastMax float64
-
-	// WMax is the value of the congestion window at the time of the last
-	// congestion event.
-	WMax float64
-
-	// T is the time when the current congestion avoidance was entered.
-	T tcpip.MonotonicTime
-
-	// TimeSinceLastCongestion denotes the time since the current
-	// congestion avoidance was entered.
-	TimeSinceLastCongestion time.Duration
-
-	// C is the cubic constant as specified in RFC8312, page 11.
-	C float64
-
-	// K is the time period (in seconds) that the above function takes to
-	// increase the current window size to WMax if there are no further
-	// congestion events and is calculated using the following equation:
-	//
-	// K = cubic_root(WMax*(1-beta_cubic)/C) (Eq. 2, page 5)
-	K float64
-
-	// Beta is the CUBIC multiplication decrease factor. That is, when a
-	// congestion event is detected, CUBIC reduces its cwnd to
-	// WC(0)=WMax*beta_cubic.
-	Beta float64
-
-	// WC is window computed by CUBIC at time TimeSinceLastCongestion. It's
-	// calculated using the formula:
-	//
-	//  WC(TimeSinceLastCongestion) = C*(t-K)^3 + WMax (Eq. 1)
-	WC float64
-
-	// WEst is the window computed by CUBIC at time
-	// TimeSinceLastCongestion+RTT i.e WC(TimeSinceLastCongestion+RTT).
-	WEst float64
-
-	// EndSeq is the sequence number that, when cumulatively ACK'd, ends the
-	// HyStart round.
-	EndSeq seqnum.Value
-
-	// CurrRTT is the minimum round-trip time from the current round.
-	CurrRTT time.Duration
-
-	// LastRTT is the minimum round-trip time from the previous round.
-	LastRTT time.Duration
-
-	// SampleCount is the number of samples from the current round.
-	SampleCount uint
-
-	// LastAck is the time we received the most recent ACK (or start of round if
-	// more recent).
-	LastAck tcpip.MonotonicTime
-
-	// RoundStart is the time we started the most recent HyStart round.
-	RoundStart tcpip.MonotonicTime
-}
-
-// TCPRACKState is used to hold a copy of the internal RACK state when the
-// TCPProbeFunc is invoked.
-//
-// +stateify savable
-type TCPRACKState struct {
-	// XmitTime is the transmission timestamp of the most recent
-	// acknowledged segment.
-	XmitTime tcpip.MonotonicTime
-
-	// EndSequence is the ending TCP sequence number of the most recent
-	// acknowledged segment.
-	EndSequence seqnum.Value
-
-	// FACK is the highest selectively or cumulatively acknowledged
-	// sequence.
-	FACK seqnum.Value
-
-	// RTT is the round trip time of the most recently delivered packet on
-	// the connection (either cumulatively acknowledged or selectively
-	// acknowledged) that was not marked invalid as a possible spurious
-	// retransmission.
-	RTT time.Duration
-
-	// Reord is true iff reordering has been detected on this connection.
-	Reord bool
-
-	// DSACKSeen is true iff the connection has seen a DSACK.
-	DSACKSeen bool
-
-	// ReoWnd is the reordering window time used for recording packet
-	// transmission times. It is used to defer the moment at which RACK
-	// marks a packet lost.
-	ReoWnd time.Duration
-
-	// ReoWndIncr is the multiplier applied to adjust reorder window.
-	ReoWndIncr uint8
-
-	// ReoWndPersist is the number of loss recoveries before resetting
-	// reorder window.
-	ReoWndPersist int8
-
-	// RTTSeq is the SND.NXT when RTT is updated.
-	RTTSeq seqnum.Value
-}
-
-// TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
-//
-// +stateify savable
-type TCPEndpointID struct {
-	// LocalPort is the local port associated with the endpoint.
-	LocalPort uint16
-
-	// LocalAddress is the local [network layer] address associated with
-	// the endpoint.
-	LocalAddress tcpip.Address
-
-	// RemotePort is the remote port associated with the endpoint.
-	RemotePort uint16
-
-	// RemoteAddress it the remote [network layer] address associated with
-	// the endpoint.
-	RemoteAddress tcpip.Address
-}
-
-// TCPFastRecoveryState holds a copy of the internal fast recovery state of a
-// TCP endpoint.
-//
-// +stateify savable
-type TCPFastRecoveryState struct {
-	// Active if true indicates the endpoint is in fast recovery. The
-	// following fields are only meaningful when Active is true.
-	Active bool
-
-	// First is the first unacknowledged sequence number being recovered.
-	First seqnum.Value
-
-	// Last is the 'recover' sequence number that indicates the point at
-	// which we should exit recovery barring any timeouts etc.
-	Last seqnum.Value
-
-	// MaxCwnd is the maximum value we are permitted to grow the congestion
-	// window during recovery. This is set at the time we enter recovery.
-	// It exists to avoid attacks where the receiver intentionally sends
-	// duplicate acks to artificially inflate the sender's cwnd.
-	MaxCwnd int
-
-	// HighRxt is the highest sequence number which has been retransmitted
-	// during the current loss recovery phase.  See: RFC 6675 Section 2 for
-	// details.
-	HighRxt seqnum.Value
-
-	// RescueRxt is the highest sequence number which has been
-	// optimistically retransmitted to prevent stalling of the ACK clock
-	// when there is loss at the end of the window and no new data is
-	// available for transmission.  See: RFC 6675 Section 2 for details.
-	RescueRxt seqnum.Value
-}
-
-// TCPReceiverState holds a copy of the internal state of the receiver for a
-// given TCP endpoint.
-//
-// +stateify savable
-type TCPReceiverState struct {
-	// RcvNxt is the TCP variable RCV.NXT.
-	RcvNxt seqnum.Value
-
-	// RcvAcc is one beyond the last acceptable sequence number. That is,
-	// the "largest" sequence value that the receiver has announced to its
-	// peer that it's willing to accept. This may be different than RcvNxt
-	// + (last advertised receive window) if the receive window is reduced;
-	// in that case we have to reduce the window as we receive more data
-	// instead of shrinking it.
-	RcvAcc seqnum.Value
-
-	// RcvWndScale is the window scaling to use for inbound segments.
-	RcvWndScale uint8
-
-	// PendingBufUsed is the number of bytes pending in the receive queue.
-	PendingBufUsed int
-}
-
-// TCPRTTState holds a copy of information about the endpoint's round trip
-// time.
-//
-// +stateify savable
-type TCPRTTState struct {
-	// SRTT is the smoothed round trip time defined in section 2 of RFC
-	// 6298.
-	SRTT time.Duration
-
-	// RTTVar is the round-trip time variation as defined in section 2 of
-	// RFC 6298.
-	RTTVar time.Duration
-
-	// SRTTInited if true indicates that a valid RTT measurement has been
-	// completed.
-	SRTTInited bool
-}
-
-// TCPSenderState holds a copy of the internal state of the sender for a given
-// TCP Endpoint.
-//
-// +stateify savable
-type TCPSenderState struct {
-	// LastSendTime is the timestamp at which we sent the last segment.
-	LastSendTime tcpip.MonotonicTime
-
-	// DupAckCount is the number of Duplicate ACKs received. It is used for
-	// fast retransmit.
-	DupAckCount int
-
-	// SndCwnd is the size of the sending congestion window in packets.
-	SndCwnd int
-
-	// Ssthresh is the threshold between slow start and congestion
-	// avoidance.
-	Ssthresh int
-
-	// SndCAAckCount is the number of packets acknowledged during
-	// congestion avoidance. When enough packets have been ack'd (typically
-	// cwnd packets), the congestion window is incremented by one.
-	SndCAAckCount int
-
-	// Outstanding is the number of packets that have been sent but not yet
-	// acknowledged.
-	Outstanding int
-
-	// SackedOut is the number of packets which have been selectively
-	// acked.
-	SackedOut int
-
-	// SndWnd is the send window size in bytes.
-	SndWnd seqnum.Size
-
-	// SndUna is the next unacknowledged sequence number.
-	SndUna seqnum.Value
-
-	// SndNxt is the sequence number of the next segment to be sent.
-	SndNxt seqnum.Value
-
-	// RTTMeasureSeqNum is the sequence number being used for the latest
-	// RTT measurement.
-	RTTMeasureSeqNum seqnum.Value
-
-	// RTTMeasureTime is the time when the RTTMeasureSeqNum was sent.
-	RTTMeasureTime tcpip.MonotonicTime
-
-	// Closed indicates that the caller has closed the endpoint for
-	// sending.
-	Closed bool
-
-	// RTO is the retransmit timeout as defined in section of 2 of RFC
-	// 6298.
-	RTO time.Duration
-
-	// RTTState holds information about the endpoint's round trip time.
-	RTTState TCPRTTState
-
-	// MaxPayloadSize is the maximum size of the payload of a given
-	// segment.  It is initialized on demand.
-	MaxPayloadSize int
-
-	// SndWndScale is the number of bits to shift left when reading the
-	// send window size from a segment.
-	SndWndScale uint8
-
-	// MaxSentAck is the highest acknowledgement number sent till now.
-	MaxSentAck seqnum.Value
-
-	// FastRecovery holds the fast recovery state for the endpoint.
-	FastRecovery TCPFastRecoveryState
-
-	// Cubic holds the state related to CUBIC congestion control.
-	Cubic TCPCubicState
-
-	// RACKState holds the state related to RACK loss detection algorithm.
-	RACKState TCPRACKState
-
-	// RetransmitTS records the timestamp used to detect spurious recovery.
-	RetransmitTS uint32
-
-	// SpuriousRecovery indicates if the sender entered recovery spuriously.
-	SpuriousRecovery bool
-}
-
-// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
-//
-// +stateify savable
-type TCPSACKInfo struct {
-	// Blocks is the list of SACK Blocks that identify the out of order
-	// segments held by a given TCP endpoint.
-	Blocks []header.SACKBlock
-
-	// ReceivedBlocks are the SACK blocks received by this endpoint from
-	// the peer endpoint.
-	ReceivedBlocks []header.SACKBlock
-
-	// MaxSACKED is the highest sequence number that has been SACKED by the
-	// peer.
-	MaxSACKED seqnum.Value
-}
-
-// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
-//
-// +stateify savable
-type RcvBufAutoTuneParams struct {
-	// MeasureTime is the time at which the current measurement was
-	// started.
-	MeasureTime tcpip.MonotonicTime
-
-	// CopiedBytes is the number of bytes copied to user space since this
-	// measure began.
-	CopiedBytes int
-
-	// PrevCopiedBytes is the number of bytes copied to userspace in the
-	// previous RTT period.
-	PrevCopiedBytes int
-
-	// RcvBufSize is the auto tuned receive buffer size.
-	RcvBufSize int
-
-	// RTT is the smoothed RTT as measured by observing the time between
-	// when a byte is first acknowledged and the receipt of data that is at
-	// least one window beyond the sequence number that was acknowledged.
-	RTT time.Duration
-
-	// RTTVar is the "round-trip time variation" as defined in section 2 of
-	// RFC6298.
-	RTTVar time.Duration
-
-	// RTTMeasureSeqNumber is the highest acceptable sequence number at the
-	// time this RTT measurement period began.
-	RTTMeasureSeqNumber seqnum.Value
-
-	// RTTMeasureTime is the absolute time at which the current RTT
-	// measurement period began.
-	RTTMeasureTime tcpip.MonotonicTime
-
-	// Disabled is true if an explicit receive buffer is set for the
-	// endpoint.
-	Disabled bool
-}
-
-// TCPRcvBufState contains information about the state of an endpoint's receive
-// socket buffer.
-//
-// +stateify savable
-type TCPRcvBufState struct {
-	// RcvBufUsed is the amount of bytes actually held in the receive
-	// socket buffer for the endpoint.
-	RcvBufUsed int
-
-	// RcvBufAutoTuneParams is used to hold state variables to compute the
-	// auto tuned receive buffer size.
-	RcvAutoParams RcvBufAutoTuneParams
-
-	// RcvClosed if true, indicates the endpoint has been closed for
-	// reading.
-	RcvClosed bool
-}
-
-// TCPSndBufState contains information about the state of an endpoint's send
-// socket buffer.
-//
-// +stateify savable
-type TCPSndBufState struct {
-	// SndBufSize is the size of the socket send buffer.
-	SndBufSize int
-
-	// SndBufUsed is the number of bytes held in the socket send buffer.
-	SndBufUsed int
-
-	// SndClosed indicates that the endpoint has been closed for sends.
-	SndClosed bool
-
-	// PacketTooBigCount is used to notify the main protocol routine how
-	// many times a "packet too big" control packet is received.
-	PacketTooBigCount int
-
-	// SndMTU is the smallest MTU seen in the control packets received.
-	SndMTU int
-
-	// AutoTuneSndBufDisabled indicates that the auto tuning of send buffer
-	// is disabled.
-	AutoTuneSndBufDisabled atomicbitops.Uint32
-}
-
-// TCPEndpointStateInner contains the members of TCPEndpointState used directly
-// (that is, not within another containing struct) within the endpoint's
-// internal implementation.
-//
-// +stateify savable
-type TCPEndpointStateInner struct {
-	// TSOffset is a randomized offset added to the value of the TSVal
-	// field in the timestamp option.
-	TSOffset tcp.TSOffset
-
-	// SACKPermitted is set to true if the peer sends the TCPSACKPermitted
-	// option in the SYN/SYN-ACK.
-	SACKPermitted bool
-
-	// SendTSOk is used to indicate when the TS Option has been negotiated.
-	// When sendTSOk is true every non-RST segment should carry a TS as per
-	// RFC7323#section-1.1.
-	SendTSOk bool
-
-	// RecentTS is the timestamp that should be sent in the TSEcr field of
-	// the timestamp for future segments sent by the endpoint. This field
-	// is updated if required when a new segment is received by this
-	// endpoint.
-	RecentTS uint32
-}
-
-// TCPEndpointState is a copy of the internal state of a TCP endpoint.
-//
-// +stateify savable
-type TCPEndpointState struct {
-	// TCPEndpointStateInner contains the members of TCPEndpointState used
-	// by the endpoint's internal implementation.
-	TCPEndpointStateInner
-
-	// ID is a copy of the TransportEndpointID for the endpoint.
-	ID TCPEndpointID
-
-	// SegTime denotes the absolute time when this segment was received.
-	SegTime tcpip.MonotonicTime
-
-	// RcvBufState contains information about the state of the endpoint's
-	// receive socket buffer.
-	RcvBufState TCPRcvBufState
-
-	// SndBufState contains information about the state of the endpoint's
-	// send socket buffer.
-	SndBufState TCPSndBufState
-
-	// SACK holds TCP SACK related information for this endpoint.
-	SACK TCPSACKInfo
-
-	// Receiver holds variables related to the TCP receiver for the
-	// endpoint.
-	Receiver TCPReceiverState
-
-	// Sender holds state related to the TCP Sender for the endpoint.
-	Sender TCPSenderState
-}