Update

2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions
--- a/vendor/tailscale.com/wgengine/magicsock/batching_conn.go
+++ b/vendor/tailscale.com/wgengine/magicsock/batching_conn.go
@@ -1,25 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-package magicsock
-
-import (
-	"net/netip"
-
-	"golang.org/x/net/ipv4"
-	"golang.org/x/net/ipv6"
-	"tailscale.com/types/nettype"
-)
-
-var (
-	// This acts as a compile-time check for our usage of ipv6.Message in
-	// batchingConn for both IPv6 and IPv4 operations.
-	_ ipv6.Message = ipv4.Message{}
-)
-
-// batchingConn is a nettype.PacketConn that provides batched i/o.
-type batchingConn interface {
-	nettype.PacketConn
-	ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
-	WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error
-}
--- a/vendor/tailscale.com/wgengine/magicsock/batching_conn_default.go
+++ b/vendor/tailscale.com/wgengine/magicsock/batching_conn_default.go
@@ -1,14 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-//go:build !linux
-
-package magicsock
-
-import (
-	"tailscale.com/types/nettype"
-)
-
-func tryUpgradeToBatchingConn(pconn nettype.PacketConn, _ string, _ int) nettype.PacketConn {
-	return pconn
-}
--- a/vendor/tailscale.com/wgengine/magicsock/batching_conn_linux.go
+++ b/vendor/tailscale.com/wgengine/magicsock/batching_conn_linux.go
@@ -1,424 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-package magicsock
-
-import (
-	"encoding/binary"
-	"errors"
-	"fmt"
-	"net"
-	"net/netip"
-	"runtime"
-	"strings"
-	"sync"
-	"sync/atomic"
-	"syscall"
-	"time"
-	"unsafe"
-
-	"golang.org/x/net/ipv4"
-	"golang.org/x/net/ipv6"
-	"golang.org/x/sys/unix"
-	"tailscale.com/hostinfo"
-	"tailscale.com/net/neterror"
-	"tailscale.com/types/nettype"
-)
-
-// xnetBatchReaderWriter defines the batching i/o methods of
-// golang.org/x/net/ipv4.PacketConn (and ipv6.PacketConn).
-// TODO(jwhited): This should eventually be replaced with the standard library
-// implementation of https://github.com/golang/go/issues/45886
-type xnetBatchReaderWriter interface {
-	xnetBatchReader
-	xnetBatchWriter
-}
-
-type xnetBatchReader interface {
-	ReadBatch([]ipv6.Message, int) (int, error)
-}
-
-type xnetBatchWriter interface {
-	WriteBatch([]ipv6.Message, int) (int, error)
-}
-
-// linuxBatchingConn is a UDP socket that provides batched i/o. It implements
-// batchingConn.
-type linuxBatchingConn struct {
-	pc                    nettype.PacketConn
-	xpc                   xnetBatchReaderWriter
-	rxOffload             bool                                  // supports UDP GRO or similar
-	txOffload             atomic.Bool                           // supports UDP GSO or similar
-	setGSOSizeInControl   func(control *[]byte, gsoSize uint16) // typically setGSOSizeInControl(); swappable for testing
-	getGSOSizeFromControl func(control []byte) (int, error)     // typically getGSOSizeFromControl(); swappable for testing
-	sendBatchPool         sync.Pool
-}
-
-func (c *linuxBatchingConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
-	if c.rxOffload {
-		// UDP_GRO is opt-in on Linux via setsockopt(). Once enabled you may
-		// receive a "monster datagram" from any read call. The ReadFrom() API
-		// does not support passing the GSO size and is unsafe to use in such a
-		// case. Other platforms may vary in behavior, but we go with the most
-		// conservative approach to prevent this from becoming a footgun in the
-		// future.
-		return 0, netip.AddrPort{}, errors.New("rx UDP offload is enabled on this socket, single packet reads are unavailable")
-	}
-	return c.pc.ReadFromUDPAddrPort(p)
-}
-
-func (c *linuxBatchingConn) SetDeadline(t time.Time) error {
-	return c.pc.SetDeadline(t)
-}
-
-func (c *linuxBatchingConn) SetReadDeadline(t time.Time) error {
-	return c.pc.SetReadDeadline(t)
-}
-
-func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
-	return c.pc.SetWriteDeadline(t)
-}
-
-const (
-	// This was initially established for Linux, but may split out to
-	// GOOS-specific values later. It originates as UDP_MAX_SEGMENTS in the
-	// kernel's TX path, and UDP_GRO_CNT_MAX for RX.
-	udpSegmentMaxDatagrams = 64
-)
-
-const (
-	// Exceeding these values results in EMSGSIZE.
-	maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
-	maxIPv6PayloadLen = 1<<16 - 1 - 8
-)
-
-// coalesceMessages iterates msgs, coalescing them where possible while
-// maintaining datagram order. All msgs have their Addr field set to addr.
-func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message) int {
-	var (
-		base     = -1 // index of msg we are currently coalescing into
-		gsoSize  int  // segmentation size of msgs[base]
-		dgramCnt int  // number of dgrams coalesced into msgs[base]
-		endBatch bool // tracking flag to start a new batch on next iteration of buffs
-	)
-	maxPayloadLen := maxIPv4PayloadLen
-	if addr.IP.To4() == nil {
-		maxPayloadLen = maxIPv6PayloadLen
-	}
-	for i, buff := range buffs {
-		if i > 0 {
-			msgLen := len(buff)
-			baseLenBefore := len(msgs[base].Buffers[0])
-			freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
-			if msgLen+baseLenBefore <= maxPayloadLen &&
-				msgLen <= gsoSize &&
-				msgLen <= freeBaseCap &&
-				dgramCnt < udpSegmentMaxDatagrams &&
-				!endBatch {
-				msgs[base].Buffers[0] = append(msgs[base].Buffers[0], make([]byte, msgLen)...)
-				copy(msgs[base].Buffers[0][baseLenBefore:], buff)
-				if i == len(buffs)-1 {
-					c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
-				}
-				dgramCnt++
-				if msgLen < gsoSize {
-					// A smaller than gsoSize packet on the tail is legal, but
-					// it must end the batch.
-					endBatch = true
-				}
-				continue
-			}
-		}
-		if dgramCnt > 1 {
-			c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
-		}
-		// Reset prior to incrementing base since we are preparing to start a
-		// new potential batch.
-		endBatch = false
-		base++
-		gsoSize = len(buff)
-		msgs[base].OOB = msgs[base].OOB[:0]
-		msgs[base].Buffers[0] = buff
-		msgs[base].Addr = addr
-		dgramCnt = 1
-	}
-	return base + 1
-}
-
-type sendBatch struct {
-	msgs []ipv6.Message
-	ua   *net.UDPAddr
-}
-
-func (c *linuxBatchingConn) getSendBatch() *sendBatch {
-	batch := c.sendBatchPool.Get().(*sendBatch)
-	return batch
-}
-
-func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
-	for i := range batch.msgs {
-		batch.msgs[i] = ipv6.Message{Buffers: batch.msgs[i].Buffers, OOB: batch.msgs[i].OOB}
-	}
-	c.sendBatchPool.Put(batch)
-}
-
-func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
-	batch := c.getSendBatch()
-	defer c.putSendBatch(batch)
-	if addr.Addr().Is6() {
-		as16 := addr.Addr().As16()
-		copy(batch.ua.IP, as16[:])
-		batch.ua.IP = batch.ua.IP[:16]
-	} else {
-		as4 := addr.Addr().As4()
-		copy(batch.ua.IP, as4[:])
-		batch.ua.IP = batch.ua.IP[:4]
-	}
-	batch.ua.Port = int(addr.Port())
-	var (
-		n       int
-		retried bool
-	)
-retry:
-	if c.txOffload.Load() {
-		n = c.coalesceMessages(batch.ua, buffs, batch.msgs)
-	} else {
-		for i := range buffs {
-			batch.msgs[i].Buffers[0] = buffs[i]
-			batch.msgs[i].Addr = batch.ua
-			batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
-		}
-		n = len(buffs)
-	}
-
-	err := c.writeBatch(batch.msgs[:n])
-	if err != nil && c.txOffload.Load() && neterror.ShouldDisableUDPGSO(err) {
-		c.txOffload.Store(false)
-		retried = true
-		goto retry
-	}
-	if retried {
-		return neterror.ErrUDPGSODisabled{OnLaddr: c.pc.LocalAddr().String(), RetryErr: err}
-	}
-	return err
-}
-
-func (c *linuxBatchingConn) SyscallConn() (syscall.RawConn, error) {
-	sc, ok := c.pc.(syscall.Conn)
-	if !ok {
-		return nil, errUnsupportedConnType
-	}
-	return sc.SyscallConn()
-}
-
-func (c *linuxBatchingConn) writeBatch(msgs []ipv6.Message) error {
-	var head int
-	for {
-		n, err := c.xpc.WriteBatch(msgs[head:], 0)
-		if err != nil || n == len(msgs[head:]) {
-			// Returning the number of packets written would require
-			// unraveling individual msg len and gso size during a coalesced
-			// write. The top of the call stack disregards partial success,
-			// so keep this simple for now.
-			return err
-		}
-		head += n
-	}
-}
-
-// splitCoalescedMessages splits coalesced messages from the tail of dst
-// beginning at index 'firstMsgAt' into the head of the same slice. It reports
-// the number of elements to evaluate in msgs for nonzero len (msgs[i].N). An
-// error is returned if a socket control message cannot be parsed or a split
-// operation would overflow msgs.
-func (c *linuxBatchingConn) splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int) (n int, err error) {
-	for i := firstMsgAt; i < len(msgs); i++ {
-		msg := &msgs[i]
-		if msg.N == 0 {
-			return n, err
-		}
-		var (
-			gsoSize    int
-			start      int
-			end        = msg.N
-			numToSplit = 1
-		)
-		gsoSize, err = c.getGSOSizeFromControl(msg.OOB[:msg.NN])
-		if err != nil {
-			return n, err
-		}
-		if gsoSize > 0 {
-			numToSplit = (msg.N + gsoSize - 1) / gsoSize
-			end = gsoSize
-		}
-		for j := 0; j < numToSplit; j++ {
-			if n > i {
-				return n, errors.New("splitting coalesced packet resulted in overflow")
-			}
-			copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
-			msgs[n].N = copied
-			msgs[n].Addr = msg.Addr
-			start = end
-			end += gsoSize
-			if end > msg.N {
-				end = msg.N
-			}
-			n++
-		}
-		if i != n-1 {
-			// It is legal for bytes to move within msg.Buffers[0] as a result
-			// of splitting, so we only zero the source msg len when it is not
-			// the destination of the last split operation above.
-			msg.N = 0
-		}
-	}
-	return n, nil
-}
-
-func (c *linuxBatchingConn) ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) {
-	if !c.rxOffload || len(msgs) < 2 {
-		return c.xpc.ReadBatch(msgs, flags)
-	}
-	// Read into the tail of msgs, split into the head.
-	readAt := len(msgs) - 2
-	numRead, err := c.xpc.ReadBatch(msgs[readAt:], 0)
-	if err != nil || numRead == 0 {
-		return 0, err
-	}
-	return c.splitCoalescedMessages(msgs, readAt)
-}
-
-func (c *linuxBatchingConn) LocalAddr() net.Addr {
-	return c.pc.LocalAddr().(*net.UDPAddr)
-}
-
-func (c *linuxBatchingConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
-	return c.pc.WriteToUDPAddrPort(b, addr)
-}
-
-func (c *linuxBatchingConn) Close() error {
-	return c.pc.Close()
-}
-
-// tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn,
-// and returns two booleans indicating TX and RX UDP offload support.
-func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
-	if c, ok := pconn.(*net.UDPConn); ok {
-		rc, err := c.SyscallConn()
-		if err != nil {
-			return
-		}
-		err = rc.Control(func(fd uintptr) {
-			_, errSyscall := syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
-			hasTX = errSyscall == nil
-			errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)
-			hasRX = errSyscall == nil
-		})
-		if err != nil {
-			return false, false
-		}
-	}
-	return hasTX, hasRX
-}
-
-// getGSOSizeFromControl returns the GSO size found in control. If no GSO size
-// is found or the len(control) < unix.SizeofCmsghdr, this function returns 0.
-// A non-nil error will be returned if len(control) > unix.SizeofCmsghdr but
-// its contents cannot be parsed as a socket control message.
-func getGSOSizeFromControl(control []byte) (int, error) {
-	var (
-		hdr  unix.Cmsghdr
-		data []byte
-		rem  = control
-		err  error
-	)
-
-	for len(rem) > unix.SizeofCmsghdr {
-		hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
-		if err != nil {
-			return 0, fmt.Errorf("error parsing socket control message: %w", err)
-		}
-		if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= 2 {
-			return int(binary.NativeEndian.Uint16(data[:2])), nil
-		}
-	}
-	return 0, nil
-}
-
-// setGSOSizeInControl sets a socket control message in control containing
-// gsoSize. If len(control) < controlMessageSize control's len will be set to 0.
-func setGSOSizeInControl(control *[]byte, gsoSize uint16) {
-	*control = (*control)[:0]
-	if cap(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
-		return
-	}
-	if cap(*control) < controlMessageSize {
-		return
-	}
-	*control = (*control)[:cap(*control)]
-	hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
-	hdr.Level = unix.SOL_UDP
-	hdr.Type = unix.UDP_SEGMENT
-	hdr.SetLen(unix.CmsgLen(2))
-	binary.NativeEndian.PutUint16((*control)[unix.SizeofCmsghdr:], gsoSize)
-	*control = (*control)[:unix.CmsgSpace(2)]
-}
-
-// tryUpgradeToBatchingConn probes the capabilities of the OS and pconn, and
-// upgrades pconn to a *linuxBatchingConn if appropriate.
-func tryUpgradeToBatchingConn(pconn nettype.PacketConn, network string, batchSize int) nettype.PacketConn {
-	if runtime.GOOS != "linux" {
-		// Exclude Android.
-		return pconn
-	}
-	if network != "udp4" && network != "udp6" {
-		return pconn
-	}
-	if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
-		// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
-		// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
-		// As a cheap heuristic: if the Linux kernel starts with "2", just
-		// consider it too old for mmsg. Nobody who cares about performance runs
-		// such ancient kernels. UDP offload was added much later, so no
-		// upgrades are available.
-		return pconn
-	}
-	uc, ok := pconn.(*net.UDPConn)
-	if !ok {
-		return pconn
-	}
-	b := &linuxBatchingConn{
-		pc:                    pconn,
-		getGSOSizeFromControl: getGSOSizeFromControl,
-		setGSOSizeInControl:   setGSOSizeInControl,
-		sendBatchPool: sync.Pool{
-			New: func() any {
-				ua := &net.UDPAddr{
-					IP: make([]byte, 16),
-				}
-				msgs := make([]ipv6.Message, batchSize)
-				for i := range msgs {
-					msgs[i].Buffers = make([][]byte, 1)
-					msgs[i].Addr = ua
-					msgs[i].OOB = make([]byte, controlMessageSize)
-				}
-				return &sendBatch{
-					ua:   ua,
-					msgs: msgs,
-				}
-			},
-		},
-	}
-	switch network {
-	case "udp4":
-		b.xpc = ipv4.NewPacketConn(uc)
-	case "udp6":
-		b.xpc = ipv6.NewPacketConn(uc)
-	default:
-		panic("bogus network")
-	}
-	var txOffload bool
-	txOffload, b.rxOffload = tryEnableUDPOffload(uc)
-	b.txOffload.Store(txOffload)
-	return b
-}
--- a/vendor/tailscale.com/wgengine/magicsock/blockforever_conn.go
+++ b/vendor/tailscale.com/wgengine/magicsock/blockforever_conn.go
@@ -10,11 +10,13 @@ import (
 	"sync"
 	"syscall"
 	"time"
+
+	"tailscale.com/syncs"
 )

 // blockForeverConn is a net.PacketConn whose reads block until it is closed.
 type blockForeverConn struct {
-	mu     sync.Mutex
+	mu     syncs.Mutex
 	cond   *sync.Cond
 	closed bool
 }
--- a/vendor/tailscale.com/wgengine/magicsock/cloudinfo.go
+++ b/vendor/tailscale.com/wgengine/magicsock/cloudinfo.go
@@ -1,182 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-//go:build !(ios || android || js)
-
-package magicsock
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"io"
-	"net"
-	"net/http"
-	"net/netip"
-	"slices"
-	"strings"
-	"time"
-
-	"tailscale.com/types/logger"
-	"tailscale.com/util/cloudenv"
-)
-
-const maxCloudInfoWait = 2 * time.Second
-
-type cloudInfo struct {
-	client http.Client
-	logf   logger.Logf
-
-	// The following parameters are fixed for the lifetime of the cloudInfo
-	// object, but are used for testing.
-	cloud    cloudenv.Cloud
-	endpoint string
-}
-
-func newCloudInfo(logf logger.Logf) *cloudInfo {
-	tr := &http.Transport{
-		DisableKeepAlives: true,
-		Dial: (&net.Dialer{
-			Timeout: maxCloudInfoWait,
-		}).Dial,
-	}
-
-	return &cloudInfo{
-		client:   http.Client{Transport: tr},
-		logf:     logf,
-		cloud:    cloudenv.Get(),
-		endpoint: "http://" + cloudenv.CommonNonRoutableMetadataIP,
-	}
-}
-
-// GetPublicIPs returns any public IPs attached to the current cloud instance,
-// if the tailscaled process is running in a known cloud and there are any such
-// IPs present.
-func (ci *cloudInfo) GetPublicIPs(ctx context.Context) ([]netip.Addr, error) {
-	switch ci.cloud {
-	case cloudenv.AWS:
-		ret, err := ci.getAWS(ctx)
-		ci.logf("[v1] cloudinfo.GetPublicIPs: AWS: %v, %v", ret, err)
-		return ret, err
-	}
-
-	return nil, nil
-}
-
-// getAWSMetadata makes a request to the AWS metadata service at the given
-// path, authenticating with the provided IMDSv2 token. The returned metadata
-// is split by newline and returned as a slice.
-func (ci *cloudInfo) getAWSMetadata(ctx context.Context, token, path string) ([]string, error) {
-	req, err := http.NewRequestWithContext(ctx, "GET", ci.endpoint+path, nil)
-	if err != nil {
-		return nil, fmt.Errorf("creating request to %q: %w", path, err)
-	}
-	req.Header.Set("X-aws-ec2-metadata-token", token)
-
-	resp, err := ci.client.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("making request to metadata service %q: %w", path, err)
-	}
-	defer resp.Body.Close()
-
-	switch resp.StatusCode {
-	case http.StatusOK:
-		// Good
-	case http.StatusNotFound:
-		// Nothing found, but this isn't an error; just return
-		return nil, nil
-	default:
-		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
-	}
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return nil, fmt.Errorf("reading response body for %q: %w", path, err)
-	}
-
-	return strings.Split(strings.TrimSpace(string(body)), "\n"), nil
-}
-
-// getAWS returns all public IPv4 and IPv6 addresses present in the AWS instance metadata.
-func (ci *cloudInfo) getAWS(ctx context.Context) ([]netip.Addr, error) {
-	ctx, cancel := context.WithTimeout(ctx, maxCloudInfoWait)
-	defer cancel()
-
-	// Get a token so we can query the metadata service.
-	req, err := http.NewRequestWithContext(ctx, "PUT", ci.endpoint+"/latest/api/token", nil)
-	if err != nil {
-		return nil, fmt.Errorf("creating token request: %w", err)
-	}
-	req.Header.Set("X-Aws-Ec2-Metadata-Token-Ttl-Seconds", "10")
-
-	resp, err := ci.client.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("making token request to metadata service: %w", err)
-	}
-	body, err := io.ReadAll(resp.Body)
-	resp.Body.Close()
-	if err != nil {
-		return nil, fmt.Errorf("reading token response body: %w", err)
-	}
-	token := string(body)
-
-	server := resp.Header.Get("Server")
-	if server != "EC2ws" {
-		return nil, fmt.Errorf("unexpected server header: %q", server)
-	}
-
-	// Iterate over all interfaces and get their public IP addresses, both IPv4 and IPv6.
-	macAddrs, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/")
-	if err != nil {
-		return nil, fmt.Errorf("getting interface MAC addresses: %w", err)
-	}
-
-	var (
-		addrs []netip.Addr
-		errs  []error
-	)
-
-	addAddr := func(addr string) {
-		ip, err := netip.ParseAddr(addr)
-		if err != nil {
-			errs = append(errs, fmt.Errorf("parsing IP address %q: %w", addr, err))
-			return
-		}
-		addrs = append(addrs, ip)
-	}
-	for _, mac := range macAddrs {
-		ips, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/public-ipv4s")
-		if err != nil {
-			errs = append(errs, fmt.Errorf("getting IPv4 addresses for %q: %w", mac, err))
-			continue
-		}
-
-		for _, ip := range ips {
-			addAddr(ip)
-		}
-
-		// Try querying for IPv6 addresses.
-		ips, err = ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/ipv6s")
-		if err != nil {
-			errs = append(errs, fmt.Errorf("getting IPv6 addresses for %q: %w", mac, err))
-			continue
-		}
-		for _, ip := range ips {
-			addAddr(ip)
-		}
-	}
-
-	// Sort the returned addresses for determinism.
-	slices.SortFunc(addrs, func(a, b netip.Addr) int {
-		return a.Compare(b)
-	})
-
-	// Preferentially return any addresses we found, even if there were errors.
-	if len(addrs) > 0 {
-		return addrs, nil
-	}
-	if len(errs) > 0 {
-		return nil, fmt.Errorf("getting IP addresses: %w", errors.Join(errs...))
-	}
-	return nil, nil
-}
--- a/vendor/tailscale.com/wgengine/magicsock/cloudinfo_nocloud.go
+++ b/vendor/tailscale.com/wgengine/magicsock/cloudinfo_nocloud.go
@@ -1,23 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-//go:build ios || android || js
-
-package magicsock
-
-import (
-	"context"
-	"net/netip"
-
-	"tailscale.com/types/logger"
-)
-
-type cloudInfo struct{}
-
-func newCloudInfo(_ logger.Logf) *cloudInfo {
-	return &cloudInfo{}
-}
-
-func (ci *cloudInfo) GetPublicIPs(_ context.Context) ([]netip.Addr, error) {
-	return nil, nil
-}
--- a/vendor/tailscale.com/wgengine/magicsock/debughttp.go
+++ b/vendor/tailscale.com/wgengine/magicsock/debughttp.go
@@ -13,6 +13,8 @@ import (
 	"strings"
 	"time"

+	"tailscale.com/feature"
+	"tailscale.com/feature/buildfeatures"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tstime/mono"
 	"tailscale.com/types/key"
@@ -24,6 +26,11 @@ import (
 // /debug/magicsock) or via peerapi to a peer that's owned by the same
 // user (so they can e.g. inspect their phones).
 func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
+	if !buildfeatures.HasDebug {
+		http.Error(w, feature.ErrUnavailable.Error(), http.StatusNotImplemented)
+		return
+	}
+
 	c.mu.Lock()
 	defer c.mu.Unlock()

@@ -72,18 +79,18 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
 	fmt.Fprintf(w, "<h2 id=ipport><a href=#ipport>#</a> ip:port to endpoint</h2><ul>")
 	{
 		type kv struct {
-			ipp netip.AddrPort
-			pi  *peerInfo
+			addr epAddr
+			pi   *peerInfo
 		}
-		ent := make([]kv, 0, len(c.peerMap.byIPPort))
-		for k, v := range c.peerMap.byIPPort {
+		ent := make([]kv, 0, len(c.peerMap.byEpAddr))
+		for k, v := range c.peerMap.byEpAddr {
 			ent = append(ent, kv{k, v})
 		}
-		sort.Slice(ent, func(i, j int) bool { return ipPortLess(ent[i].ipp, ent[j].ipp) })
+		sort.Slice(ent, func(i, j int) bool { return epAddrLess(ent[i].addr, ent[j].addr) })
 		for _, e := range ent {
 			ep := e.pi.ep
 			shortStr := ep.publicKey.ShortString()
-			fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.ipp, strings.Trim(shortStr, "[]"), shortStr)
+			fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.addr, strings.Trim(shortStr, "[]"), shortStr)
 		}

 	}
@@ -148,11 +155,11 @@ func printEndpointHTML(w io.Writer, ep *endpoint) {
 	for ipp := range ep.endpointState {
 		eps = append(eps, ipp)
 	}
-	sort.Slice(eps, func(i, j int) bool { return ipPortLess(eps[i], eps[j]) })
+	sort.Slice(eps, func(i, j int) bool { return addrPortLess(eps[i], eps[j]) })
 	io.WriteString(w, "<p>Endpoints:</p><ul>")
 	for _, ipp := range eps {
 		s := ep.endpointState[ipp]
-		if ipp == ep.bestAddr.AddrPort {
+		if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.IsSet() {
 			fmt.Fprintf(w, "<li><b>%s</b>: (best)<ul>", ipp)
 		} else {
 			fmt.Fprintf(w, "<li>%s: ...<ul>", ipp)
@@ -196,9 +203,19 @@ func peerDebugName(p tailcfg.NodeView) string {
 	return p.Hostinfo().Hostname()
 }

-func ipPortLess(a, b netip.AddrPort) bool {
+func addrPortLess(a, b netip.AddrPort) bool {
 	if v := a.Addr().Compare(b.Addr()); v != 0 {
 		return v < 0
 	}
 	return a.Port() < b.Port()
 }
+
+func epAddrLess(a, b epAddr) bool {
+	if v := a.ap.Addr().Compare(b.ap.Addr()); v != 0 {
+		return v < 0
+	}
+	if a.ap.Port() == b.ap.Port() {
+		return a.vni.Get() < b.vni.Get()
+	}
+	return a.ap.Port() < b.ap.Port()
+}
--- a/vendor/tailscale.com/wgengine/magicsock/debugknobs.go
+++ b/vendor/tailscale.com/wgengine/magicsock/debugknobs.go
@@ -62,6 +62,9 @@ var (
 	//
 	//lint:ignore U1000 used on Linux/Darwin only
 	debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
+	// debugNeverDirectUDP disables the use of direct UDP connections, forcing
+	// all peer communication over DERP or peer relay.
+	debugNeverDirectUDP = envknob.RegisterBool("TS_DEBUG_NEVER_DIRECT_UDP")
 	// Hey you! Adding a new debugknob? Make sure to stub it out in the
 	// debugknobs_stubs.go file too.
 )
--- a/vendor/tailscale.com/wgengine/magicsock/debugknobs_stubs.go
+++ b/vendor/tailscale.com/wgengine/magicsock/debugknobs_stubs.go
@@ -31,3 +31,4 @@ func debugRingBufferMaxSizeBytes() int { return 0 }
 func inTest() bool                     { return false }
 func debugPeerMap() bool               { return false }
 func pretendpoints() []netip.AddrPort  { return []netip.AddrPort{} }
+func debugNeverDirectUDP() bool        { return false }
--- a/vendor/tailscale.com/wgengine/magicsock/derp.go
+++ b/vendor/tailscale.com/wgengine/magicsock/derp.go
@@ -11,9 +11,7 @@ import (
 	"net"
 	"net/netip"
 	"reflect"
-	"runtime"
 	"slices"
-	"sync"
 	"time"
 	"unsafe"

@@ -21,7 +19,6 @@ import (
 	"tailscale.com/derp"
 	"tailscale.com/derp/derphttp"
 	"tailscale.com/health"
-	"tailscale.com/logtail/backoff"
 	"tailscale.com/net/dnscache"
 	"tailscale.com/net/netcheck"
 	"tailscale.com/net/tsaddr"
@@ -30,9 +27,9 @@ import (
 	"tailscale.com/tstime/mono"
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
+	"tailscale.com/util/backoff"
 	"tailscale.com/util/mak"
 	"tailscale.com/util/rands"
-	"tailscale.com/util/sysresources"
 	"tailscale.com/util/testenv"
 )

@@ -94,7 +91,7 @@ func (c *Conn) fallbackDERPRegionForPeer(peer key.NodePublic) (regionID int) {
 type activeDerp struct {
 	c       *derphttp.Client
 	cancel  context.CancelFunc
-	writeCh chan<- derpWriteRequest
+	writeCh chan derpWriteRequest
 	// lastWrite is the time of the last request for its write
 	// channel (currently even if there was no write).
 	// It is always non-nil and initialized to a non-zero Time.
@@ -219,17 +216,28 @@ func (c *Conn) derpRegionCodeLocked(regionID int) string {
 	return ""
 }

+// setHomeDERPGaugeLocked updates the home DERP gauge metric.
+//
+// c.mu must be held.
+func (c *Conn) setHomeDERPGaugeLocked(derpNum int) {
+	if c.homeDERPGauge != nil {
+		c.homeDERPGauge.Set(float64(derpNum))
+	}
+}
+
 // c.mu must NOT be held.
 func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	if !c.wantDerpLocked() {
 		c.myDerp = 0
+		c.setHomeDERPGaugeLocked(0)
 		c.health.SetMagicSockDERPHome(0, c.homeless)
 		return false
 	}
 	if c.homeless {
 		c.myDerp = 0
+		c.setHomeDERPGaugeLocked(0)
 		c.health.SetMagicSockDERPHome(0, c.homeless)
 		return false
 	}
@@ -241,6 +249,7 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
 		metricDERPHomeChange.Add(1)
 	}
 	c.myDerp = derpNum
+	c.setHomeDERPGaugeLocked(derpNum)
 	c.health.SetMagicSockDERPHome(derpNum, c.homeless)

 	if c.privateKey.IsZero() {
@@ -282,59 +291,20 @@ func (c *Conn) goDerpConnect(regionID int) {
 	go c.derpWriteChanForRegion(regionID, key.NodePublic{})
 }

-var (
-	bufferedDerpWrites     int
-	bufferedDerpWritesOnce sync.Once
-)
-
-// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued
-// up the DERP client to write on the wire before we start dropping.
-func bufferedDerpWritesBeforeDrop() int {
-	// For mobile devices, always return the previous minimum value of 32;
-	// we can do this outside the sync.Once to avoid that overhead.
-	if runtime.GOOS == "ios" || runtime.GOOS == "android" {
-		return 32
-	}
-
-	bufferedDerpWritesOnce.Do(func() {
-		// Some rough sizing: for the previous fixed value of 32, the
-		// total consumed memory can be:
-		// = numDerpRegions * messages/region * sizeof(message)
-		//
-		// For sake of this calculation, assume 100 DERP regions; at
-		// time of writing (2023-04-03), we have 24.
-		//
-		// A reasonable upper bound for the worst-case average size of
-		// a message is a *disco.CallMeMaybe message with 16 endpoints;
-		// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
-		// = 100 * 32 * 512
-		// = 1638400 (1.6MiB)
-		//
-		// On a reasonably-small node with 4GiB of memory that's
-		// connected to each region and handling a lot of load, 1.6MiB
-		// is about 0.04% of the total system memory.
-		//
-		// For sake of this calculation, then, let's double that memory
-		// usage to 0.08% and scale based on total system memory.
-		//
-		// For a 16GiB Linux box, this should buffer just over 256
-		// messages.
-		systemMemory := sysresources.TotalMemory()
-		memoryUsable := float64(systemMemory) * 0.0008
-
-		const (
-			theoreticalDERPRegions  = 100
-			messageMaximumSizeBytes = 512
-		)
-		bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
-
-		// Never drop below the previous minimum value.
-		if bufferedDerpWrites < 32 {
-			bufferedDerpWrites = 32
-		}
-	})
-	return bufferedDerpWrites
-}
+// derpWriteQueueDepth is the depth of the in-process write queue to a single
+// DERP region. DERP connections are TCP, and so the actual write queue depth is
+// substantially larger than this suggests - often scaling into megabytes
+// depending on dynamic TCP parameters and platform TCP tuning. This queue is
+// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
+// and does not want to be deep - if there are key situations where a node can't
+// keep up, either the TCP link to DERP is too slow, or there is a
+// synchronization issue in the write path, fixes should be focused on those
+// paths, rather than extending this queue.
+// TODO(raggi): make this even shorter, ideally this should be a fairly direct
+// line into a socket TCP buffer. The challenge at present is that connect and
+// reconnect are in the write path and we don't want to block other write
+// operations on those.
+const derpWriteQueueDepth = 32

 // derpWriteChanForRegion returns a channel to which to send DERP packet write
 // requests. It creates a new DERP connection to regionID if necessary.
@@ -344,7 +314,7 @@ func bufferedDerpWritesBeforeDrop() int {
 //
 // It returns nil if the network is down, the Conn is closed, or the regionID is
 // not known.
-func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<- derpWriteRequest {
+func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan derpWriteRequest {
 	if c.networkDown() {
 		return nil
 	}
@@ -429,7 +399,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
 	dc.DNSCache = dnscache.Get()

 	ctx, cancel := context.WithCancel(c.connCtx)
-	ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop())
+	ch := make(chan derpWriteRequest, derpWriteQueueDepth)

 	ad.c = dc
 	ad.writeCh = ch
@@ -740,8 +710,11 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
 		return 0, nil
 	}

-	ipp := netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
-	if c.handleDiscoMessage(b[:n], ipp, dm.src, discoRXPathDERP) {
+	srcAddr := epAddr{ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))}
+	pt, isGeneveEncap := packetLooksLike(b[:n])
+	if pt == packetLooksLikeDisco &&
+		!isGeneveEncap { // We should never receive Geneve-encapsulated disco over DERP.
+		c.handleDiscoMessage(b[:n], srcAddr, false, dm.src, discoRXPathDERP)
 		return 0, nil
 	}

@@ -755,9 +728,9 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
 		return 0, nil
 	}

-	ep.noteRecvActivity(ipp, mono.Now())
-	if stats := c.stats.Load(); stats != nil {
-		stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, dm.n)
+	ep.noteRecvActivity(srcAddr, mono.Now())
+	if update := c.connCounter.Load(); update != nil {
+		update(0, netip.AddrPortFrom(ep.nodeAddr, 0), srcAddr.ap, 1, dm.n, true)
 	}

 	c.metrics.inboundPacketsDERPTotal.Add(1)
@@ -875,7 +848,6 @@ func (c *Conn) maybeCloseDERPsOnRebind(okayLocalIPs []netip.Prefix) {
 			c.closeOrReconnectDERPLocked(regionID, "rebind-default-route-change")
 			continue
 		}
-		regionID := regionID
 		dc := ad.c
 		go func() {
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
--- a/vendor/tailscale.com/wgengine/magicsock/disco_atomic.go
+++ b/vendor/tailscale.com/wgengine/magicsock/disco_atomic.go
@@ -0,0 +1,58 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+package magicsock
+
+import (
+	"sync/atomic"
+
+	"tailscale.com/types/key"
+)
+
+type discoKeyPair struct {
+	private key.DiscoPrivate
+	public  key.DiscoPublic
+	short   string // public.ShortString()
+}
+
+// discoAtomic is an atomic container for a disco private key, public key, and
+// the public key's ShortString. The private and public keys are always kept
+// synchronized.
+//
+// The zero value is not ready for use. Use [Set] to provide a usable value.
+type discoAtomic struct {
+	pair atomic.Pointer[discoKeyPair]
+}
+
+// Pair returns the private and public keys together atomically.
+// Code that needs both the private and public keys synchronized should
+// use Pair instead of calling Private and Public separately.
+func (dk *discoAtomic) Pair() (key.DiscoPrivate, key.DiscoPublic) {
+	p := dk.pair.Load()
+	return p.private, p.public
+}
+
+// Private returns the private key.
+func (dk *discoAtomic) Private() key.DiscoPrivate {
+	return dk.pair.Load().private
+}
+
+// Public returns the public key.
+func (dk *discoAtomic) Public() key.DiscoPublic {
+	return dk.pair.Load().public
+}
+
+// Short returns the short string of the public key (see [DiscoPublic.ShortString]).
+func (dk *discoAtomic) Short() string {
+	return dk.pair.Load().short
+}
+
+// Set updates the private key (and the cached public key and short string).
+func (dk *discoAtomic) Set(private key.DiscoPrivate) {
+	public := private.Public()
+	dk.pair.Store(&discoKeyPair{
+		private: private,
+		public:  public,
+		short:   public.ShortString(),
+	})
+}
--- a/vendor/tailscale.com/wgengine/magicsock/discopingpurpose_string.go
+++ b/vendor/tailscale.com/wgengine/magicsock/discopingpurpose_string.go
@@ -22,8 +22,9 @@ const _discoPingPurpose_name = "DiscoveryHeartbeatCLIHeartbeatForUDPLifetime"
 var _discoPingPurpose_index = [...]uint8{0, 9, 18, 21, 44}

 func (i discoPingPurpose) String() string {
-	if i < 0 || i >= discoPingPurpose(len(_discoPingPurpose_index)-1) {
+	idx := int(i) - 0
+	if i < 0 || idx >= len(_discoPingPurpose_index)-1 {
 		return "discoPingPurpose(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
-	return _discoPingPurpose_name[_discoPingPurpose_index[i]:_discoPingPurpose_index[i+1]]
+	return _discoPingPurpose_name[_discoPingPurpose_index[idx]:_discoPingPurpose_index[idx+1]]
 }
--- a/vendor/tailscale.com/wgengine/magicsock/endpoint.go
+++ b/vendor/tailscale.com/wgengine/magicsock/endpoint.go
@@ -17,7 +17,6 @@ import (
 	"reflect"
 	"runtime"
 	"slices"
-	"sync"
 	"sync/atomic"
 	"time"

@@ -25,14 +24,16 @@ import (
 	"golang.org/x/net/ipv6"
 	"tailscale.com/disco"
 	"tailscale.com/ipn/ipnstate"
+	"tailscale.com/net/packet"
 	"tailscale.com/net/stun"
 	"tailscale.com/net/tstun"
+	"tailscale.com/syncs"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tstime/mono"
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
 	"tailscale.com/util/mak"
-	"tailscale.com/util/ringbuffer"
+	"tailscale.com/util/ringlog"
 	"tailscale.com/util/slicesx"
 )

@@ -59,7 +60,7 @@ type endpoint struct {
 	lastRecvWG            mono.Time // last time there were incoming packets from this peer destined for wireguard-go (e.g. not disco)
 	lastRecvUDPAny        mono.Time // last time there were incoming UDP packets from this peer of any kind
 	numStopAndResetAtomic int64
-	debugUpdates          *ringbuffer.RingBuffer[EndpointChange]
+	debugUpdates          *ringlog.RingLog[EndpointChange]

 	// These fields are initialized once and never modified.
 	c            *Conn
@@ -72,19 +73,20 @@ type endpoint struct {
 	disco atomic.Pointer[endpointDisco] // if the peer supports disco, the key and short string

 	// mu protects all following fields.
-	mu sync.Mutex // Lock ordering: Conn.mu, then endpoint.mu
+	mu syncs.Mutex // Lock ordering: Conn.mu, then endpoint.mu

-	heartBeatTimer *time.Timer    // nil when idle
-	lastSendExt    mono.Time      // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
-	lastSendAny    mono.Time      // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
-	lastFullPing   mono.Time      // last time we pinged all disco or wireguard only endpoints
-	derpAddr       netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
+	heartBeatTimer            *time.Timer    // nil when idle
+	lastSendExt               mono.Time      // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
+	lastSendAny               mono.Time      // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
+	lastFullPing              mono.Time      // last time we pinged all disco or wireguard only endpoints
+	lastUDPRelayPathDiscovery mono.Time      // last time we ran UDP relay path discovery
+	derpAddr                  netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)

 	bestAddr           addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked()
 	bestAddrAt         mono.Time   // time best address re-confirmed
 	trustBestAddrUntil mono.Time   // time when bestAddr expires
 	sentPing           map[stun.TxID]sentPing
-	endpointState      map[netip.AddrPort]*endpointState
+	endpointState      map[netip.AddrPort]*endpointState // netip.AddrPort type for key (instead of [epAddr]) as [endpointState] is irrelevant for Geneve-encapsulated paths
 	isCallMeMaybeEP    map[netip.AddrPort]bool

 	// The following fields are related to the new "silent disco"
@@ -95,10 +97,40 @@ type endpoint struct {

 	expired         bool // whether the node has expired
 	isWireguardOnly bool // whether the endpoint is WireGuard only
+	relayCapable    bool // whether the node is capable of speaking via a [tailscale.com/net/udprelay.Server]
+}
+
+// udpRelayEndpointReady determines whether the given relay [addrQuality] should
+// be installed as de.bestAddr. It is only called by [relayManager] once it has
+// determined maybeBest is functional via [disco.Pong] reception.
+func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
+	de.mu.Lock()
+	defer de.mu.Unlock()
+	now := mono.Now()
+	curBestAddrTrusted := now.Before(de.trustBestAddrUntil)
+	sameRelayServer := de.bestAddr.vni.IsSet() && maybeBest.relayServerDisco.Compare(de.bestAddr.relayServerDisco) == 0
+
+	if !curBestAddrTrusted ||
+		sameRelayServer ||
+		betterAddr(maybeBest, de.bestAddr) {
+		// We must set maybeBest as de.bestAddr if:
+		//   1. de.bestAddr is untrusted. betterAddr does not consider
+		//      time-based trust.
+		//   2. maybeBest & de.bestAddr are on the same relay. If the maybeBest
+		//      handshake happened to use a different source address/transport,
+		//      the relay will drop packets from the 'old' de.bestAddr's.
+		//   3. maybeBest is a 'betterAddr'.
+		//
+		// TODO(jwhited): add observability around !curBestAddrTrusted and sameRelayServer
+		// TODO(jwhited): collapse path change logging with endpoint.handlePongConnLocked()
+		de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v", de.publicKey.ShortString(), de.discoShort(), maybeBest.epAddr, maybeBest.wireMTU)
+		de.setBestAddrLocked(maybeBest)
+		de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
+	}
 }

 func (de *endpoint) setBestAddrLocked(v addrQuality) {
-	if v.AddrPort != de.bestAddr.AddrPort {
+	if v.epAddr != de.bestAddr.epAddr {
 		de.probeUDPLifetime.resetCycleEndpointLocked()
 	}
 	de.bestAddr = v
@@ -134,11 +166,11 @@ type probeUDPLifetime struct {
 	// timeout cliff in the future.
 	timer *time.Timer

-	// bestAddr contains the endpoint.bestAddr.AddrPort at the time a cycle was
+	// bestAddr contains the endpoint.bestAddr.epAddr at the time a cycle was
 	// scheduled to start. A probing cycle is 1:1 with the current
-	// endpoint.bestAddr.AddrPort in the interest of simplicity. When
-	// endpoint.bestAddr.AddrPort changes, any active probing cycle will reset.
-	bestAddr netip.AddrPort
+	// endpoint.bestAddr.epAddr in the interest of simplicity. When
+	// endpoint.bestAddr.epAddr changes, any active probing cycle will reset.
+	bestAddr epAddr
 	// cycleStartedAt contains the time at which the first cliff
 	// (ProbeUDPLifetimeConfig.Cliffs[0]) was pinged for the current/last cycle.
 	cycleStartedAt time.Time
@@ -190,7 +222,7 @@ func (p *probeUDPLifetime) resetCycleEndpointLocked() {
 	}
 	p.cycleActive = false
 	p.currentCliff = 0
-	p.bestAddr = netip.AddrPort{}
+	p.bestAddr = epAddr{}
 }

 // ProbeUDPLifetimeConfig represents the configuration for probing UDP path
@@ -333,7 +365,7 @@ type endpointDisco struct {
 }

 type sentPing struct {
-	to      netip.AddrPort
+	to      epAddr
 	at      mono.Time
 	timer   *time.Timer // timeout timer
 	purpose discoPingPurpose
@@ -445,7 +477,8 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
 		From: ep,
 	})
 	delete(de.endpointState, ep)
-	if de.bestAddr.AddrPort == ep {
+	asEpAddr := epAddr{ap: ep}
+	if de.bestAddr.epAddr == asEpAddr {
 		de.debugUpdates.Add(EndpointChange{
 			When: time.Now(),
 			What: "deleteEndpointLocked-bestAddr-" + why,
@@ -467,11 +500,12 @@ func (de *endpoint) initFakeUDPAddr() {
 }

 // noteRecvActivity records receive activity on de, and invokes
-// Conn.noteRecvActivity no more than once every 10s.
-func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
+// Conn.noteRecvActivity no more than once every 10s, returning true if it
+// was called, otherwise false.
+func (de *endpoint) noteRecvActivity(src epAddr, now mono.Time) bool {
 	if de.isWireguardOnly {
 		de.mu.Lock()
-		de.bestAddr.AddrPort = ipp
+		de.bestAddr.ap = src.ap
 		de.bestAddrAt = now
 		de.trustBestAddrUntil = now.Add(5 * time.Second)
 		de.mu.Unlock()
@@ -481,7 +515,7 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
 		// kick off discovery disco pings every trustUDPAddrDuration and mirror
 		// to DERP.
 		de.mu.Lock()
-		if de.heartbeatDisabled && de.bestAddr.AddrPort == ipp {
+		if de.heartbeatDisabled && de.bestAddr.epAddr == src {
 			de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
 		}
 		de.mu.Unlock()
@@ -492,10 +526,12 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
 		de.lastRecvWG.StoreAtomic(now)

 		if de.c.noteRecvActivity == nil {
-			return
+			return false
 		}
 		de.c.noteRecvActivity(de.publicKey)
+		return true
 	}
+	return false
 }

 func (de *endpoint) discoShort() string {
@@ -529,10 +565,10 @@ func (de *endpoint) DstToBytes() []byte  { return packIPPort(de.fakeWGAddr) }
 // de.mu must be held.
 //
 // TODO(val): Rewrite the addrFor*Locked() variations to share code.
-func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) {
-	udpAddr = de.bestAddr.AddrPort
+func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr epAddr, derpAddr netip.AddrPort, sendWGPing bool) {
+	udpAddr = de.bestAddr.epAddr

-	if udpAddr.IsValid() && !now.After(de.trustBestAddrUntil) {
+	if udpAddr.ap.IsValid() && !now.After(de.trustBestAddrUntil) {
 		return udpAddr, netip.AddrPort{}, false
 	}

@@ -551,12 +587,12 @@ func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.Ad
 // addrForWireGuardSendLocked returns the address that should be used for
 // sending the next packet. If a packet has never or not recently been sent to
 // the endpoint, then a randomly selected address for the endpoint is returned,
-// as well as a bool indiciating that WireGuard discovery pings should be started.
+// as well as a bool indicating that WireGuard discovery pings should be started.
 // If the addresses have latency information available, then the address with the
 // best latency is used.
 //
 // de.mu must be held.
-func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.AddrPort, shouldPing bool) {
+func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr epAddr, shouldPing bool) {
 	if len(de.endpointState) == 0 {
 		de.c.logf("magicsock: addrForSendWireguardLocked: [unexpected] no candidates available for endpoint")
 		return udpAddr, false
@@ -580,22 +616,22 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
 				// TODO(catzkorn): Consider a small increase in latency to use
 				// IPv6 in comparison to IPv4, when possible.
 				lowestLatency = latency
-				udpAddr = ipp
+				udpAddr.ap = ipp
 			}
 		}
 	}
 	needPing := len(de.endpointState) > 1 && now.Sub(oldestPing) > wireguardPingInterval

-	if !udpAddr.IsValid() {
+	if !udpAddr.ap.IsValid() {
 		candidates := slicesx.MapKeys(de.endpointState)

 		// Randomly select an address to use until we retrieve latency information
 		// and give it a short trustBestAddrUntil time so we avoid flapping between
 		// addresses while waiting on latency information to be populated.
-		udpAddr = candidates[rand.IntN(len(candidates))]
+		udpAddr.ap = candidates[rand.IntN(len(candidates))]
 	}

-	de.bestAddr.AddrPort = udpAddr
+	de.bestAddr.epAddr = epAddr{ap: udpAddr.ap}
 	// Only extend trustBestAddrUntil by one second to avoid packet
 	// reordering and/or CPU usage from random selection during the first
 	// second. We should receive a response due to a WireGuard handshake in
@@ -613,18 +649,18 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
 // both of the returned UDP address and DERP address may be non-zero.
 //
 // de.mu must be held.
-func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) {
+func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr epAddr, derpAddr netip.AddrPort) {
 	if size == 0 {
 		udpAddr, derpAddr, _ = de.addrForSendLocked(now)
 		return
 	}

-	udpAddr = de.bestAddr.AddrPort
+	udpAddr = de.bestAddr.epAddr
 	pathMTU := de.bestAddr.wireMTU
-	requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
+	requestedMTU := pingSizeToPktLen(size, udpAddr)
 	mtuOk := requestedMTU <= pathMTU

-	if udpAddr.IsValid() && mtuOk {
+	if udpAddr.ap.IsValid() && mtuOk {
 		if !now.After(de.trustBestAddrUntil) {
 			return udpAddr, netip.AddrPort{}
 		}
@@ -637,7 +673,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
 	// for the packet. Return a zero-value udpAddr to signal that we should
 	// keep probing the path MTU to all addresses for this endpoint, and a
 	// valid DERP addr to signal that we should also send via DERP.
-	return netip.AddrPort{}, de.derpAddr
+	return epAddr{}, de.derpAddr
 }

 // maybeProbeUDPLifetimeLocked returns an afterInactivityFor duration and true
@@ -648,7 +684,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
 	if p == nil {
 		return afterInactivityFor, false
 	}
-	if !de.bestAddr.IsValid() {
+	if !de.bestAddr.ap.IsValid() {
 		return afterInactivityFor, false
 	}
 	epDisco := de.disco.Load()
@@ -661,7 +697,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
 	// shuffling probing probability where the local node ends up with a large
 	// key value lexicographically relative to the other nodes it tends to
 	// communicate with. If de's disco key changes, the cycle will reset.
-	if de.c.discoPublic.Compare(epDisco.key) >= 0 {
+	if de.c.discoAtomic.Public().Compare(epDisco.key) >= 0 {
 		// lower disco pub key node probes higher
 		return afterInactivityFor, false
 	}
@@ -700,7 +736,7 @@ func (de *endpoint) scheduleHeartbeatForLifetimeLocked(after time.Duration, via
 	}
 	de.c.dlogf("[v1] magicsock: disco: scheduling UDP lifetime probe for cliff=%v via=%v to %v (%v)",
 		p.currentCliffDurationEndpointLocked(), via, de.publicKey.ShortString(), de.discoShort())
-	p.bestAddr = de.bestAddr.AddrPort
+	p.bestAddr = de.bestAddr.epAddr
 	p.timer = time.AfterFunc(after, de.heartbeatForLifetime)
 	if via == heartbeatForLifetimeViaSelf {
 		metricUDPLifetimeCliffsRescheduled.Add(1)
@@ -728,7 +764,7 @@ func (de *endpoint) heartbeatForLifetime() {
 		return
 	}
 	p.timer = nil
-	if !p.bestAddr.IsValid() || de.bestAddr.AddrPort != p.bestAddr {
+	if !p.bestAddr.ap.IsValid() || de.bestAddr.epAddr != p.bestAddr {
 		// best path changed
 		p.resetCycleEndpointLocked()
 		return
@@ -760,7 +796,7 @@ func (de *endpoint) heartbeatForLifetime() {
 	}
 	de.c.dlogf("[v1] magicsock: disco: sending disco ping for UDP lifetime probe cliff=%v to %v (%v)",
 		p.currentCliffDurationEndpointLocked(), de.publicKey.ShortString(), de.discoShort())
-	de.startDiscoPingLocked(de.bestAddr.AddrPort, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
+	de.startDiscoPingLocked(de.bestAddr.epAddr, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
 }

 // heartbeat is called every heartbeatInterval to keep the best UDP path alive,
@@ -818,8 +854,8 @@ func (de *endpoint) heartbeat() {
 	}

 	udpAddr, _, _ := de.addrForSendLocked(now)
-	if udpAddr.IsValid() {
-		// We have a preferred path. Ping that every 2 seconds.
+	if udpAddr.ap.IsValid() {
+		// We have a preferred path. Ping that every 'heartbeatInterval'.
 		de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0, nil)
 	}

@@ -827,6 +863,10 @@ func (de *endpoint) heartbeat() {
 		de.sendDiscoPingsLocked(now, true)
 	}

+	if de.wantUDPRelayPathDiscoveryLocked(now) {
+		de.discoverUDPRelayPathsLocked(now)
+	}
+
 	de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat)
 }

@@ -837,6 +877,53 @@ func (de *endpoint) setHeartbeatDisabled(v bool) {
 	de.heartbeatDisabled = v
 }

+// discoverUDPRelayPathsLocked starts UDP relay path discovery.
+func (de *endpoint) discoverUDPRelayPathsLocked(now mono.Time) {
+	de.lastUDPRelayPathDiscovery = now
+	lastBest := de.bestAddr
+	lastBestIsTrusted := mono.Now().Before(de.trustBestAddrUntil)
+	de.c.relayManager.startUDPRelayPathDiscoveryFor(de, lastBest, lastBestIsTrusted)
+}
+
+// wantUDPRelayPathDiscoveryLocked reports whether we should kick off UDP relay
+// path discovery.
+func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
+	if runtime.GOOS == "js" {
+		return false
+	}
+	if !de.c.hasPeerRelayServers.Load() {
+		// Changes in this value between its access and a call to
+		// [endpoint.discoverUDPRelayPathsLocked] are fine, we will eventually
+		// do the "right" thing during future path discovery. The worst case is
+		// we suppress path discovery for the current cycle, or we unnecessarily
+		// call into [relayManager] and do some wasted work.
+		return false
+	}
+	if !de.relayCapable {
+		return false
+	}
+	if de.bestAddr.isDirect() && now.Before(de.trustBestAddrUntil) {
+		return false
+	}
+	if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) < discoverUDPRelayPathsInterval {
+		return false
+	}
+	// TODO(jwhited): consider applying 'goodEnoughLatency' suppression here,
+	//  but not until we have a strategy for triggering CallMeMaybeVia regularly
+	//  and/or enabling inbound packets to act as a UDP relay path discovery
+	//  trigger, otherwise clients without relay servers may fall off a UDP
+	//  relay path and never come back. They are dependent on the remote side
+	//  regularly TX'ing CallMeMaybeVia, which currently only happens as part
+	//  of full UDP relay path discovery.
+	if now.After(de.trustBestAddrUntil) {
+		return true
+	}
+	if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) >= upgradeUDPRelayInterval {
+		return true
+	}
+	return false
+}
+
 // wantFullPingLocked reports whether we should ping to all our peers looking for
 // a better path.
 //
@@ -845,7 +932,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
 	if runtime.GOOS == "js" {
 		return false
 	}
-	if !de.bestAddr.IsValid() || de.lastFullPing.IsZero() {
+	if !de.bestAddr.isDirect() || de.lastFullPing.IsZero() {
 		return true
 	}
 	if now.After(de.trustBestAddrUntil) {
@@ -854,7 +941,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
 	if de.bestAddr.latency <= goodEnoughLatency {
 		return false
 	}
-	if now.Sub(de.lastFullPing) >= upgradeInterval {
+	if now.Sub(de.lastFullPing) >= upgradeUDPDirectInterval {
 		return true
 	}
 	return false
@@ -905,17 +992,38 @@ func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnst
 	udpAddr, derpAddr := de.addrForPingSizeLocked(now, size)

 	if derpAddr.IsValid() {
-		de.startDiscoPingLocked(derpAddr, now, pingCLI, size, resCB)
+		de.startDiscoPingLocked(epAddr{ap: derpAddr}, now, pingCLI, size, resCB)
 	}
-	if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
-		// Already have an active session, so just ping the address we're using.
-		// Otherwise "tailscale ping" results to a node on the local network
-		// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
-		// IPv6 address, both 1ms away, and it's random who replies first.
+
+	switch {
+	case udpAddr.ap.IsValid() && now.Before(de.trustBestAddrUntil):
+		// We have a "trusted" direct OR peer relay address, ping it.
 		de.startDiscoPingLocked(udpAddr, now, pingCLI, size, resCB)
-	} else {
+		if !udpAddr.vni.IsSet() {
+			// If the path is direct we do not want to fallthrough to pinging
+			// all candidate direct paths, otherwise "tailscale ping" results to
+			// a node on the local network can look like they're bouncing
+			// between, say 10.0.0.0/8 and the peer's IPv6 address, both 1ms
+			// away, and it's random who replies first. cb() is called with the
+			// first reply, vs background path discovery that is subject to
+			// betterAddr() comparison and hysteresis
+			break
+		}
+		// If the trusted path is via a peer relay we want to fallthrough in
+		// order to also try all candidate direct paths.
+		fallthrough
+	default:
+		// Ping all candidate direct paths and start peer relay path discovery,
+		// if appropriate. This work overlaps with what [de.heartbeat] will
+		// periodically fire when it calls [de.sendDiscoPingsLocked] and
+		// [de.discoveryUDPRelayPathsLocked], but a user-initiated [pingCLI] is
+		// a "do it now" operation that should not be subject to
+		// [heartbeatInterval] tick or [discoPingInterval] rate-limiting.
 		for ep := range de.endpointState {
-			de.startDiscoPingLocked(ep, now, pingCLI, size, resCB)
+			de.startDiscoPingLocked(epAddr{ap: ep}, now, pingCLI, size, resCB)
+		}
+		if de.wantUDPRelayPathDiscoveryLocked(now) {
+			de.discoverUDPRelayPathsLocked(now)
 		}
 	}
 }
@@ -926,7 +1034,7 @@ var (
 	errPingTooBig  = errors.New("ping size too big")
 )

-func (de *endpoint) send(buffs [][]byte) error {
+func (de *endpoint) send(buffs [][]byte, offset int) error {
 	de.mu.Lock()
 	if de.expired {
 		de.mu.Unlock()
@@ -940,14 +1048,17 @@ func (de *endpoint) send(buffs [][]byte) error {
 		if startWGPing {
 			de.sendWireGuardOnlyPingsLocked(now)
 		}
-	} else if !udpAddr.IsValid() || now.After(de.trustBestAddrUntil) {
+	} else if !udpAddr.isDirect() || now.After(de.trustBestAddrUntil) {
 		de.sendDiscoPingsLocked(now, true)
+		if de.wantUDPRelayPathDiscoveryLocked(now) {
+			de.discoverUDPRelayPathsLocked(now)
+		}
 	}
 	de.noteTxActivityExtTriggerLocked(now)
 	de.lastSendAny = now
 	de.mu.Unlock()

-	if !udpAddr.IsValid() && !derpAddr.IsValid() {
+	if !udpAddr.ap.IsValid() && !derpAddr.IsValid() {
 		// Make a last ditch effort to see if we have a DERP route for them. If
 		// they contacted us over DERP and we don't know their UDP endpoints or
 		// their DERP home, we can at least assume they're reachable over the
@@ -959,8 +1070,8 @@ func (de *endpoint) send(buffs [][]byte) error {
 		}
 	}
 	var err error
-	if udpAddr.IsValid() {
-		_, err = de.c.sendUDPBatch(udpAddr, buffs)
+	if udpAddr.ap.IsValid() {
+		_, err = de.c.sendUDPBatch(udpAddr, buffs, offset)

 		// If the error is known to indicate that the endpoint is no longer
 		// usable, clear the endpoint statistics so that the next send will
@@ -971,37 +1082,49 @@ func (de *endpoint) send(buffs [][]byte) error {

 		var txBytes int
 		for _, b := range buffs {
-			txBytes += len(b)
+			txBytes += len(b[offset:])
 		}

 		switch {
-		case udpAddr.Addr().Is4():
-			de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
-			de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
-		case udpAddr.Addr().Is6():
-			de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
-			de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
+		case udpAddr.ap.Addr().Is4():
+			if udpAddr.vni.IsSet() {
+				de.c.metrics.outboundPacketsPeerRelayIPv4Total.Add(int64(len(buffs)))
+				de.c.metrics.outboundBytesPeerRelayIPv4Total.Add(int64(txBytes))
+			} else {
+				de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
+				de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
+			}
+		case udpAddr.ap.Addr().Is6():
+			if udpAddr.vni.IsSet() {
+				de.c.metrics.outboundPacketsPeerRelayIPv6Total.Add(int64(len(buffs)))
+				de.c.metrics.outboundBytesPeerRelayIPv6Total.Add(int64(txBytes))
+			} else {
+				de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
+				de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
+			}
 		}

 		// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.
-		if stats := de.c.stats.Load(); err == nil && stats != nil {
-			stats.UpdateTxPhysical(de.nodeAddr, udpAddr, len(buffs), txBytes)
+		if update := de.c.connCounter.Load(); err == nil && update != nil {
+			update(0, netip.AddrPortFrom(de.nodeAddr, 0), udpAddr.ap, len(buffs), txBytes, false)
 		}
 	}
 	if derpAddr.IsValid() {
 		allOk := true
 		var txBytes int
 		for _, buff := range buffs {
+			buff = buff[offset:]
 			const isDisco = false
-			ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco)
+			const isGeneveEncap = false
+			ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco, isGeneveEncap)
 			txBytes += len(buff)
 			if !ok {
 				allOk = false
 			}
 		}

-		if stats := de.c.stats.Load(); stats != nil {
-			stats.UpdateTxPhysical(de.nodeAddr, derpAddr, len(buffs), txBytes)
+		if update := de.c.connCounter.Load(); update != nil {
+			update(0, netip.AddrPortFrom(de.nodeAddr, 0), derpAddr, len(buffs), txBytes, false)
 		}
 		if allOk {
 			return nil
@@ -1053,7 +1176,12 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
 	if !ok {
 		return
 	}
-	if debugDisco() || !de.bestAddr.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
+	bestUntrusted := mono.Now().After(de.trustBestAddrUntil)
+	if sp.to == de.bestAddr.epAddr && sp.to.vni.IsSet() && bestUntrusted {
+		// TODO(jwhited): consider applying this to direct UDP paths as well
+		de.clearBestAddrLocked()
+	}
+	if debugDisco() || !de.bestAddr.ap.IsValid() || bestUntrusted {
 		de.c.dlogf("[v1] magicsock: disco: timeout waiting for pong %x from %v (%v, %v)", txid[:6], sp.to, de.publicKey.ShortString(), de.discoShort())
 	}
 	de.removeSentDiscoPingLocked(txid, sp, discoPingTimedOut)
@@ -1107,7 +1235,7 @@ const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen
 //
 // The caller should use de.discoKey as the discoKey argument.
 // It is passed in so that sendDiscoPing doesn't need to lock de.mu.
-func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
+func (de *endpoint) sendDiscoPing(ep epAddr, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
 	size = min(size, MaxDiscoPingSize)
 	padding := max(size-discoPingSize, 0)

@@ -1123,7 +1251,7 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t

 	if size != 0 {
 		metricSentDiscoPeerMTUProbes.Add(1)
-		metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
+		metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep)))
 	}
 }

@@ -1154,16 +1282,20 @@ const (
 // if non-nil, means that a caller external to the magicsock package internals
 // is interested in the result (such as a CLI "tailscale ping" or a c2n ping
 // request, etc)
-func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
+func (de *endpoint) startDiscoPingLocked(ep epAddr, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
 	if runtime.GOOS == "js" {
 		return
 	}
+	if debugNeverDirectUDP() && !ep.vni.IsSet() && ep.ap.Addr() != tailcfg.DerpMagicIPAddr {
+		return
+	}
 	epDisco := de.disco.Load()
 	if epDisco == nil {
 		return
 	}
-	if purpose != pingCLI {
-		st, ok := de.endpointState[ep]
+	if purpose != pingCLI &&
+		!ep.vni.IsSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
+		st, ok := de.endpointState[ep.ap]
 		if !ok {
 			// Shouldn't happen. But don't ping an endpoint that's
 			// not active for us.
@@ -1180,11 +1312,11 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
 	// Default to sending a single ping of the specified size
 	sizes := []int{size}
 	if de.c.PeerMTUEnabled() {
-		isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
+		isDerp := ep.ap.Addr() == tailcfg.DerpMagicIPAddr
 		if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
 			de.c.dlogf("[v1] magicsock: starting MTU probe")
 			sizes = mtuProbePingSizesV4
-			if ep.Addr().Is6() {
+			if ep.ap.Addr().Is6() {
 				sizes = mtuProbePingSizesV6
 			}
 		}
@@ -1239,7 +1371,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
 			de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
 		}

-		de.startDiscoPingLocked(ep, now, pingDiscovery, 0, nil)
+		de.startDiscoPingLocked(epAddr{ap: ep}, now, pingDiscovery, 0, nil)
 	}
 	derpAddr := de.derpAddr
 	if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@@ -1253,7 +1385,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
 }

 // sendWireGuardOnlyPingsLocked evaluates all available addresses for
-// a WireGuard only endpoint and initates an ICMP ping for useable
+// a WireGuard only endpoint and initiates an ICMP ping for useable
 // addresses.
 func (de *endpoint) sendWireGuardOnlyPingsLocked(now mono.Time) {
 	if runtime.GOOS == "js" {
@@ -1390,6 +1522,8 @@ func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, p
 	}

 	de.setEndpointsLocked(n.Endpoints())
+
+	de.relayCapable = capVerIsRelayCapable(n.Cap())
 }

 func (de *endpoint) setEndpointsLocked(eps interface {
@@ -1472,7 +1606,7 @@ func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.T
 			}
 		}
 		size2 := len(de.endpointState)
-		de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v candidate set from %v to %v entries", size, size2)
+		de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v (%s) candidate set from %v to %v entries", de.discoShort(), de.publicKey.ShortString(), size, size2)
 	}
 	return false
 }
@@ -1487,17 +1621,19 @@ func (de *endpoint) clearBestAddrLocked() {
 	de.trustBestAddrUntil = 0
 }

-// noteBadEndpoint marks ipp as a bad endpoint that would need to be
+// noteBadEndpoint marks udpAddr as a bad endpoint that would need to be
 // re-evaluated before future use, this should be called for example if a send
-// to ipp fails due to a host unreachable error or similar.
-func (de *endpoint) noteBadEndpoint(ipp netip.AddrPort) {
+// to udpAddr fails due to a host unreachable error or similar.
+func (de *endpoint) noteBadEndpoint(udpAddr epAddr) {
 	de.mu.Lock()
 	defer de.mu.Unlock()

 	de.clearBestAddrLocked()

-	if st, ok := de.endpointState[ipp]; ok {
-		st.clear()
+	if !udpAddr.vni.IsSet() {
+		if st, ok := de.endpointState[udpAddr.ap]; ok {
+			st.clear()
+		}
 	}
 }

@@ -1517,17 +1653,20 @@ func (de *endpoint) noteConnectivityChange() {

 // pingSizeToPktLen calculates the minimum path MTU that would permit
 // a disco ping message of length size to reach its target at
-// addr. size is the length of the entire disco message including
+// udpAddr. size is the length of the entire disco message including
 // disco headers. If size is zero, assume it is the safe wire MTU.
-func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
+func pingSizeToPktLen(size int, udpAddr epAddr) tstun.WireMTU {
 	if size == 0 {
 		return tstun.SafeWireMTU()
 	}
 	headerLen := ipv4.HeaderLen
-	if is6 {
+	if udpAddr.ap.Addr().Is6() {
 		headerLen = ipv6.HeaderLen
 	}
 	headerLen += 8 // UDP header length
+	if udpAddr.vni.IsSet() {
+		headerLen += packet.GeneveFixedHeaderLength
+	}
 	return tstun.WireMTU(size + headerLen)
 }

@@ -1554,11 +1693,11 @@ func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
 // It should be called with the Conn.mu held.
 //
 // It reports whether m.TxID corresponds to a ping that this endpoint sent.
-func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
+func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAddr) (knownTxID bool) {
 	de.mu.Lock()
 	defer de.mu.Unlock()

-	isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
+	isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr

 	sp, ok := de.sentPing[m.TxID]
 	if !ok {
@@ -1568,7 +1707,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	knownTxID = true // for naked returns below
 	de.removeSentDiscoPingLocked(m.TxID, sp, discoPongReceived)

-	pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
+	pktLen := int(pingSizeToPktLen(sp.size, src))
 	if sp.size != 0 {
 		m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
 		m.Add(1)
@@ -1580,25 +1719,27 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	now := mono.Now()
 	latency := now.Sub(sp.at)

-	if !isDerp {
-		st, ok := de.endpointState[sp.to]
+	if !isDerp && !src.vni.IsSet() {
+		// Note: we check vni.isSet() as relay [epAddr]'s are not stored in
+		// endpointState, they are either de.bestAddr or not.
+		st, ok := de.endpointState[sp.to.ap]
 		if !ok {
 			// This is no longer an endpoint we care about.
 			return
 		}

-		de.c.peerMap.setNodeKeyForIPPort(src, de.publicKey)
+		de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)

 		st.addPongReplyLocked(pongReply{
 			latency: latency,
 			pongAt:  now,
-			from:    src,
+			from:    src.ap,
 			pongSrc: m.Src,
 		})
 	}

 	if sp.purpose != pingHeartbeat && sp.purpose != pingHeartbeatForUDPLifetime {
-		de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
+		de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v)  got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoAtomic.Short(), de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
 			if sp.to != src {
 				fmt.Fprintf(bw, " ping.to=%v", sp.to)
 			}
@@ -1616,21 +1757,30 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	// Promote this pong response to our current best address if it's lower latency.
 	// TODO(bradfitz): decide how latency vs. preference order affects decision
 	if !isDerp {
-		thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
+		thisPong := addrQuality{
+			epAddr:  sp.to,
+			latency: latency,
+			wireMTU: pingSizeToPktLen(sp.size, sp.to),
+		}
+		// TODO(jwhited): consider checking de.trustBestAddrUntil as well. If
+		//  de.bestAddr is untrusted we may want to clear it, otherwise we could
+		//  get stuck with a forever untrusted bestAddr that blackholes, since
+		//  we don't clear direct UDP paths on disco ping timeout (see
+		//  discoPingTimeout).
 		if betterAddr(thisPong, de.bestAddr) {
 			de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
 			de.debugUpdates.Add(EndpointChange{
 				When: time.Now(),
-				What: "handlePingLocked-bestAddr-update",
+				What: "handlePongConnLocked-bestAddr-update",
 				From: de.bestAddr,
 				To:   thisPong,
 			})
 			de.setBestAddrLocked(thisPong)
 		}
-		if de.bestAddr.AddrPort == thisPong.AddrPort {
+		if de.bestAddr.epAddr == thisPong.epAddr {
 			de.debugUpdates.Add(EndpointChange{
 				When: time.Now(),
-				What: "handlePingLocked-bestAddr-latency",
+				What: "handlePongConnLocked-bestAddr-latency",
 				From: de.bestAddr,
 				To:   thisPong,
 			})
@@ -1642,20 +1792,43 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
 	return
 }

-// addrQuality is an IPPort with an associated latency and path mtu.
+// epAddr is a [netip.AddrPort] with an optional Geneve header (RFC8926)
+// [packet.VirtualNetworkID].
+type epAddr struct {
+	ap  netip.AddrPort          // if ap == tailcfg.DerpMagicIPAddr then vni is never set
+	vni packet.VirtualNetworkID // vni.IsSet() indicates if this [epAddr] involves a Geneve header
+}
+
+// isDirect returns true if e.ap is valid and not tailcfg.DerpMagicIPAddr,
+// and a VNI is not set.
+func (e epAddr) isDirect() bool {
+	return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.IsSet()
+}
+
+func (e epAddr) String() string {
+	if !e.vni.IsSet() {
+		return e.ap.String()
+	}
+	return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.Get())
+}
+
+// addrQuality is an [epAddr], an optional [key.DiscoPublic] if a relay server
+// is associated, a round-trip latency measurement, and path mtu.
 type addrQuality struct {
-	netip.AddrPort
-	latency time.Duration
-	wireMTU tstun.WireMTU
+	epAddr
+	relayServerDisco key.DiscoPublic // only relevant if epAddr.vni.isSet(), otherwise zero value
+	latency          time.Duration
+	wireMTU          tstun.WireMTU
 }

 func (a addrQuality) String() string {
-	return fmt.Sprintf("%v@%v+%v", a.AddrPort, a.latency, a.wireMTU)
+	// TODO(jwhited): consider including relayServerDisco
+	return fmt.Sprintf("%v@%v+%v", a.epAddr, a.latency, a.wireMTU)
 }

 // betterAddr reports whether a is a better addr to use than b.
 func betterAddr(a, b addrQuality) bool {
-	if a.AddrPort == b.AddrPort {
+	if a.epAddr == b.epAddr {
 		if a.wireMTU > b.wireMTU {
 			// TODO(val): Think harder about the case of lower
 			// latency and smaller or unknown MTU, and higher
@@ -1666,10 +1839,19 @@ func betterAddr(a, b addrQuality) bool {
 		}
 		return false
 	}
-	if !b.IsValid() {
+	if !b.ap.IsValid() {
 		return true
 	}
-	if !a.IsValid() {
+	if !a.ap.IsValid() {
+		return false
+	}
+
+	// Geneve-encapsulated paths (UDP relay servers) are lower preference in
+	// relation to non.
+	if !a.vni.IsSet() && b.vni.IsSet() {
+		return true
+	}
+	if a.vni.IsSet() && !b.vni.IsSet() {
 		return false
 	}

@@ -1693,27 +1875,27 @@ func betterAddr(a, b addrQuality) bool {
 	// addresses, and prefer link-local unicast addresses over other types
 	// of private IP addresses since it's definitionally more likely that
 	// they'll be on the same network segment than a general private IP.
-	if a.Addr().IsLoopback() {
+	if a.ap.Addr().IsLoopback() {
 		aPoints += 50
-	} else if a.Addr().IsLinkLocalUnicast() {
+	} else if a.ap.Addr().IsLinkLocalUnicast() {
 		aPoints += 30
-	} else if a.Addr().IsPrivate() {
+	} else if a.ap.Addr().IsPrivate() {
 		aPoints += 20
 	}
-	if b.Addr().IsLoopback() {
+	if b.ap.Addr().IsLoopback() {
 		bPoints += 50
-	} else if b.Addr().IsLinkLocalUnicast() {
+	} else if b.ap.Addr().IsLinkLocalUnicast() {
 		bPoints += 30
-	} else if b.Addr().IsPrivate() {
+	} else if b.ap.Addr().IsPrivate() {
 		bPoints += 20
 	}

 	// Prefer IPv6 for being a bit more robust, as long as
 	// the latencies are roughly equivalent.
-	if a.Addr().Is6() {
+	if a.ap.Addr().Is6() {
 		aPoints += 10
 	}
-	if b.Addr().Is6() {
+	if b.ap.Addr().Is6() {
 		bPoints += 10
 	}

@@ -1797,7 +1979,25 @@ func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
 	for _, st := range de.endpointState {
 		st.lastPing = 0
 	}
-	de.sendDiscoPingsLocked(mono.Now(), false)
+	monoNow := mono.Now()
+	de.sendDiscoPingsLocked(monoNow, false)
+
+	// This hook is required to trigger peer relay path discovery around
+	// disco "tailscale ping" initiated by de. We may be configured with peer
+	// relay servers that differ from de.
+	//
+	// The only other peer relay path discovery hook is in [endpoint.heartbeat],
+	// which is kicked off around outbound WireGuard packet flow, or if you are
+	// the "tailscale ping" initiator. Disco "tailscale ping" does not propagate
+	// into wireguard-go.
+	//
+	// We choose not to hook this around disco ping reception since peer relay
+	// path discovery can also trigger disco ping transmission, which *could*
+	// lead to an infinite loop of peer relay path discovery between two peers,
+	// absent intended triggers.
+	if de.wantUDPRelayPathDiscoveryLocked(monoNow) {
+		de.discoverUDPRelayPathsLocked(monoNow)
+	}
 }

 func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
@@ -1814,8 +2014,12 @@ func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
 	ps.LastWrite = de.lastSendExt.WallTime()
 	ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout

-	if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.IsValid() && !derpAddr.IsValid() {
-		ps.CurAddr = udpAddr.String()
+	if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.ap.IsValid() && !derpAddr.IsValid() {
+		if udpAddr.vni.IsSet() {
+			ps.PeerRelay = udpAddr.String()
+		} else {
+			ps.CurAddr = udpAddr.String()
+		}
 	}
 }

@@ -1863,14 +2067,22 @@ func (de *endpoint) resetLocked() {
 		}
 	}
 	de.probeUDPLifetime.resetCycleEndpointLocked()
+	de.c.relayManager.stopWork(de)
 }

 func (de *endpoint) numStopAndReset() int64 {
 	return atomic.LoadInt64(&de.numStopAndResetAtomic)
 }

+// setDERPHome sets the provided regionID as home for de. Calls to setDERPHome
+// must never run concurrent to [Conn.updateRelayServersSet], otherwise
+// [candidatePeerRelay] DERP home changes may be missed from the perspective of
+// [relayManager].
 func (de *endpoint) setDERPHome(regionID uint16) {
 	de.mu.Lock()
 	defer de.mu.Unlock()
 	de.derpAddr = netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
+	if de.c.hasPeerRelayServers.Load() {
+		de.c.relayManager.handleDERPHomeChange(de.publicKey, regionID)
+	}
 }
--- a/vendor/tailscale.com/wgengine/magicsock/endpoint_tracker.go
+++ b/vendor/tailscale.com/wgengine/magicsock/endpoint_tracker.go
@@ -6,9 +6,9 @@ package magicsock
 import (
 	"net/netip"
 	"slices"
-	"sync"
 	"time"

+	"tailscale.com/syncs"
 	"tailscale.com/tailcfg"
 	"tailscale.com/tempfork/heap"
 	"tailscale.com/util/mak"
@@ -107,7 +107,7 @@ func (eh endpointHeap) Min() *endpointTrackerEntry {
 //
 // See tailscale/tailscale#7877 for more information.
 type endpointTracker struct {
-	mu        sync.Mutex
+	mu        syncs.Mutex
 	endpoints map[netip.Addr]*endpointHeap
 }

--- a/vendor/tailscale.com/wgengine/magicsock/magicsock.go
+++ b/vendor/tailscale.com/wgengine/magicsock/magicsock.go
--- a/vendor/tailscale.com/wgengine/magicsock/magicsock_default.go
+++ b/vendor/tailscale.com/wgengine/magicsock/magicsock_default.go
@@ -1,7 +1,7 @@
 // Copyright (c) Tailscale Inc & AUTHORS
 // SPDX-License-Identifier: BSD-3-Clause

-//go:build !linux
+//go:build !linux || ts_omit_listenrawdisco

 package magicsock

@@ -9,19 +9,8 @@ import (
 	"errors"
 	"fmt"
 	"io"
-
-	"tailscale.com/types/logger"
-	"tailscale.com/types/nettype"
 )

 func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
 	return nil, fmt.Errorf("raw disco listening not supported on this OS: %w", errors.ErrUnsupported)
 }
-
-func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
-	portableTrySetSocketBuffer(pconn, logf)
-}
-
-const (
-	controlMessageSize = 0
-)
--- a/vendor/tailscale.com/wgengine/magicsock/magicsock_linux.go
+++ b/vendor/tailscale.com/wgengine/magicsock/magicsock_linux.go
@@ -1,6 +1,8 @@
 // Copyright (c) Tailscale Inc & AUTHORS
 // SPDX-License-Identifier: BSD-3-Clause

+//go:build linux && !ts_omit_listenrawdisco
+
 package magicsock

 import (
@@ -13,7 +15,6 @@ import (
 	"net"
 	"net/netip"
 	"strings"
-	"syscall"
 	"time"

 	"github.com/mdlayher/socket"
@@ -28,7 +29,6 @@ import (
 	"tailscale.com/types/ipproto"
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
-	"tailscale.com/types/nettype"
 )

 const (
@@ -66,10 +66,10 @@ var (
 		// fragmented, and we don't want to handle reassembly.
 		bpf.LoadAbsolute{Off: 6, Size: 2},
 		// More Fragments bit set means this is part of a fragmented packet.
-		bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 7, SkipFalse: 0},
+		bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 8, SkipFalse: 0},
 		// Non-zero fragment offset with MF=0 means this is the last
 		// fragment of packet.
-		bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6, SkipFalse: 0},
+		bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 7, SkipFalse: 0},

 		// Load IP header length into X register.
 		bpf.LoadMemShift{Off: 0},
@@ -453,7 +453,13 @@ func (c *Conn) receiveDisco(pc *socket.Conn, isIPV6 bool) {
 			metricRecvDiscoPacketIPv4.Add(1)
 		}

-		c.handleDiscoMessage(payload, srcAddr, key.NodePublic{}, discoRXPathRawSocket)
+		pt, isGeneveEncap := packetLooksLike(payload)
+		if pt == packetLooksLikeDisco && !isGeneveEncap {
+			// The BPF program matching on disco does not currently support
+			// Geneve encapsulation. isGeneveEncap should not return true if
+			// payload is disco.
+			c.handleDiscoMessage(payload, epAddr{ap: srcAddr}, false, key.NodePublic{}, discoRXPathRawSocket)
+		}
 	}
 }

@@ -483,38 +489,3 @@ func printSockaddr(sa unix.Sockaddr) string {
 		return fmt.Sprintf("unknown(%T)", sa)
 	}
 }
-
-// trySetSocketBuffer attempts to set SO_SNDBUFFORCE and SO_RECVBUFFORCE which
-// can overcome the limit of net.core.{r,w}mem_max, but require CAP_NET_ADMIN.
-// It falls back to the portable implementation if that fails, which may be
-// silently capped to net.core.{r,w}mem_max.
-func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
-	if c, ok := pconn.(*net.UDPConn); ok {
-		var errRcv, errSnd error
-		rc, err := c.SyscallConn()
-		if err == nil {
-			rc.Control(func(fd uintptr) {
-				errRcv = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, socketBufferSize)
-				if errRcv != nil {
-					logf("magicsock: [warning] failed to force-set UDP read buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errRcv)
-				}
-				errSnd = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, socketBufferSize)
-				if errSnd != nil {
-					logf("magicsock: [warning] failed to force-set UDP write buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errSnd)
-				}
-			})
-		}
-
-		if err != nil || errRcv != nil || errSnd != nil {
-			portableTrySetSocketBuffer(pconn, logf)
-		}
-	}
-}
-
-var controlMessageSize = -1 // bomb if used for allocation before init
-
-func init() {
-	// controlMessageSize is set to hold a UDP_GRO or UDP_SEGMENT control
-	// message. These contain a single uint16 of data.
-	controlMessageSize = unix.CmsgSpace(2)
-}
--- a/vendor/tailscale.com/wgengine/magicsock/magicsock_notwindows.go
+++ b/vendor/tailscale.com/wgengine/magicsock/magicsock_notwindows.go
@@ -1,13 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-//go:build !windows
-
-package magicsock
-
-import (
-	"tailscale.com/types/logger"
-	"tailscale.com/types/nettype"
-)
-
-func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {}
--- a/vendor/tailscale.com/wgengine/magicsock/magicsock_windows.go
+++ b/vendor/tailscale.com/wgengine/magicsock/magicsock_windows.go
@@ -1,58 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-//go:build windows
-
-package magicsock
-
-import (
-	"net"
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-	"tailscale.com/types/logger"
-	"tailscale.com/types/nettype"
-)
-
-func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
-	c, ok := pconn.(*net.UDPConn)
-	if !ok {
-		// not a UDP connection; nothing to do
-		return
-	}
-
-	sysConn, err := c.SyscallConn()
-	if err != nil {
-		logf("trySetUDPSocketOptions: getting SyscallConn failed: %v", err)
-		return
-	}
-
-	// Similar to https://github.com/golang/go/issues/5834 (which involved
-	// WSAECONNRESET), Windows can return a WSAENETRESET error, even on UDP
-	// reads. Disable this.
-	const SIO_UDP_NETRESET = windows.IOC_IN | windows.IOC_VENDOR | 15
-
-	var ioctlErr error
-	err = sysConn.Control(func(fd uintptr) {
-		ret := uint32(0)
-		flag := uint32(0)
-		size := uint32(unsafe.Sizeof(flag))
-		ioctlErr = windows.WSAIoctl(
-			windows.Handle(fd),
-			SIO_UDP_NETRESET,               // iocc
-			(*byte)(unsafe.Pointer(&flag)), // inbuf
-			size,                           // cbif
-			nil,                            // outbuf
-			0,                              // cbob
-			&ret,                           // cbbr
-			nil,                            // overlapped
-			0,                              // completionRoutine
-		)
-	})
-	if ioctlErr != nil {
-		logf("trySetUDPSocketOptions: could not set SIO_UDP_NETRESET: %v", ioctlErr)
-	}
-	if err != nil {
-		logf("trySetUDPSocketOptions: SyscallConn.Control failed: %v", err)
-	}
-}
--- a/vendor/tailscale.com/wgengine/magicsock/peermap.go
+++ b/vendor/tailscale.com/wgengine/magicsock/peermap.go
@@ -4,8 +4,6 @@
 package magicsock

 import (
-	"net/netip"
-
 	"tailscale.com/tailcfg"
 	"tailscale.com/types/key"
 	"tailscale.com/util/set"
@@ -15,17 +13,17 @@ import (
 // peer.
 type peerInfo struct {
 	ep *endpoint // always non-nil.
-	// ipPorts is an inverted version of peerMap.byIPPort (below), so
+	// epAddrs is an inverted version of peerMap.byEpAddr (below), so
 	// that when we're deleting this node, we can rapidly find out the
-	// keys that need deleting from peerMap.byIPPort without having to
-	// iterate over every IPPort known for any peer.
-	ipPorts set.Set[netip.AddrPort]
+	// keys that need deleting from peerMap.byEpAddr without having to
+	// iterate over every epAddr known for any peer.
+	epAddrs set.Set[epAddr]
 }

 func newPeerInfo(ep *endpoint) *peerInfo {
 	return &peerInfo{
 		ep:      ep,
-		ipPorts: set.Set[netip.AddrPort]{},
+		epAddrs: set.Set[epAddr]{},
 	}
 }

@@ -35,9 +33,21 @@ func newPeerInfo(ep *endpoint) *peerInfo {
 // It doesn't do any locking; all access must be done with Conn.mu held.
 type peerMap struct {
 	byNodeKey map[key.NodePublic]*peerInfo
-	byIPPort  map[netip.AddrPort]*peerInfo
+	byEpAddr  map[epAddr]*peerInfo
 	byNodeID  map[tailcfg.NodeID]*peerInfo

+	// relayEpAddrByNodeKey ensures we only hold a single relay
+	// [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them
+	// grow unbounded. Relay [epAddr]'s are dynamically created by
+	// [relayManager] during path discovery, and are only useful to track in
+	// peerMap so long as they are the endpoint.bestAddr. [relayManager] handles
+	// all creation and initial probing responsibilities otherwise, and it does
+	// not depend on [peerMap].
+	//
+	// Note: This doesn't address unbounded growth of non-relay epAddr's in
+	// byEpAddr. That issue is being tracked in http://go/corp/29422.
+	relayEpAddrByNodeKey map[key.NodePublic]epAddr
+
 	// nodesOfDisco contains the set of nodes that are using a
 	// DiscoKey. Usually those sets will be just one node.
 	nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
@@ -45,10 +55,11 @@ type peerMap struct {

 func newPeerMap() peerMap {
 	return peerMap{
-		byNodeKey:    map[key.NodePublic]*peerInfo{},
-		byIPPort:     map[netip.AddrPort]*peerInfo{},
-		byNodeID:     map[tailcfg.NodeID]*peerInfo{},
-		nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
+		byNodeKey:            map[key.NodePublic]*peerInfo{},
+		byEpAddr:             map[epAddr]*peerInfo{},
+		byNodeID:             map[tailcfg.NodeID]*peerInfo{},
+		relayEpAddrByNodeKey: map[key.NodePublic]epAddr{},
+		nodesOfDisco:         map[key.DiscoPublic]set.Set[key.NodePublic]{},
 	}
 }

@@ -88,10 +99,10 @@ func (m *peerMap) endpointForNodeID(nodeID tailcfg.NodeID) (ep *endpoint, ok boo
 	return nil, false
 }

-// endpointForIPPort returns the endpoint for the peer we
-// believe to be at ipp, or nil if we don't know of any such peer.
-func (m *peerMap) endpointForIPPort(ipp netip.AddrPort) (ep *endpoint, ok bool) {
-	if info, ok := m.byIPPort[ipp]; ok {
+// endpointForEpAddr returns the endpoint for the peer we
+// believe to be at addr, or nil if we don't know of any such peer.
+func (m *peerMap) endpointForEpAddr(addr epAddr) (ep *endpoint, ok bool) {
+	if info, ok := m.byEpAddr[addr]; ok {
 		return info.ep, true
 	}
 	return nil, false
@@ -148,10 +159,10 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
 		// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
 		// peer has, say, 192.168.0.2 and so does a tailscale peer, the
 		// wireguard one will win. That may not be the outcome that we want -
-		// perhaps we should prefer bestAddr.AddrPort if it is set?
+		// perhaps we should prefer bestAddr.epAddr.ap if it is set?
 		// see tailscale/tailscale#7994
 		for ipp := range ep.endpointState {
-			m.setNodeKeyForIPPort(ipp, ep.publicKey)
+			m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
 		}
 		return
 	}
@@ -163,20 +174,31 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
 	discoSet.Add(ep.publicKey)
 }

-// setNodeKeyForIPPort makes future peer lookups by ipp return the
+// setNodeKeyForEpAddr makes future peer lookups by addr return the
 // same endpoint as a lookup by nk.
 //
-// This should only be called with a fully verified mapping of ipp to
+// This should only be called with a fully verified mapping of addr to
 // nk, because calling this function defines the endpoint we hand to
-// WireGuard for packets received from ipp.
-func (m *peerMap) setNodeKeyForIPPort(ipp netip.AddrPort, nk key.NodePublic) {
-	if pi := m.byIPPort[ipp]; pi != nil {
-		delete(pi.ipPorts, ipp)
-		delete(m.byIPPort, ipp)
+// WireGuard for packets received from addr.
+func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
+	if pi := m.byEpAddr[addr]; pi != nil {
+		delete(pi.epAddrs, addr)
+		delete(m.byEpAddr, addr)
+		if addr.vni.IsSet() {
+			delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
+		}
 	}
 	if pi, ok := m.byNodeKey[nk]; ok {
-		pi.ipPorts.Add(ipp)
-		m.byIPPort[ipp] = pi
+		if addr.vni.IsSet() {
+			relay, ok := m.relayEpAddrByNodeKey[nk]
+			if ok {
+				delete(pi.epAddrs, relay)
+				delete(m.byEpAddr, relay)
+			}
+			m.relayEpAddrByNodeKey[nk] = addr
+		}
+		pi.epAddrs.Add(addr)
+		m.byEpAddr[addr] = pi
 	}
 }

@@ -203,7 +225,8 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
 		// Unexpected. But no logger plumbed here to log so.
 		return
 	}
-	for ip := range pi.ipPorts {
-		delete(m.byIPPort, ip)
+	for ip := range pi.epAddrs {
+		delete(m.byEpAddr, ip)
 	}
+	delete(m.relayEpAddrByNodeKey, ep.publicKey)
 }
--- a/vendor/tailscale.com/wgengine/magicsock/rebinding_conn.go
+++ b/vendor/tailscale.com/wgengine/magicsock/rebinding_conn.go
@@ -5,14 +5,17 @@ package magicsock

 import (
 	"errors"
+	"fmt"
 	"net"
 	"net/netip"
-	"sync"
 	"sync/atomic"
 	"syscall"

 	"golang.org/x/net/ipv6"
+	"tailscale.com/net/batching"
 	"tailscale.com/net/netaddr"
+	"tailscale.com/net/packet"
+	"tailscale.com/syncs"
 	"tailscale.com/types/nettype"
 )

@@ -28,7 +31,7 @@ type RebindingUDPConn struct {
 	// Neither is expected to be nil, sockets are bound on creation.
 	pconnAtomic atomic.Pointer[nettype.PacketConn]

-	mu    sync.Mutex // held while changing pconn (and pconnAtomic)
+	mu    syncs.Mutex // held while changing pconn (and pconnAtomic)
 	pconn nettype.PacketConn
 	port  uint16
 }
@@ -40,7 +43,7 @@ type RebindingUDPConn struct {
 // disrupting surrounding code that assumes nettype.PacketConn is a
 // *net.UDPConn.
 func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) {
-	upc := tryUpgradeToBatchingConn(p, network, batchSize)
+	upc := batching.TryUpgradeToConn(p, network, batchSize)
 	c.pconn = upc
 	c.pconnAtomic.Store(&upc)
 	c.port = uint16(c.localAddrLocked().Port)
@@ -70,21 +73,39 @@ func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, e
 	return c.readFromWithInitPconn(*c.pconnAtomic.Load(), b)
 }

-// WriteBatchTo writes buffs to addr.
-func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
+// WriteWireGuardBatchTo writes buffs to addr. It serves primarily as an alias
+// for [batching.Conn.WriteBatchTo], with fallback to single packet operations
+// if c.pconn is not a [batching.Conn].
+//
+// WriteWireGuardBatchTo assumes buffs are WireGuard packets, which is notable
+// for Geneve encapsulation: Geneve protocol is set to [packet.GeneveProtocolWireGuard],
+// and the control bit is left unset.
+func (c *RebindingUDPConn) WriteWireGuardBatchTo(buffs [][]byte, addr epAddr, offset int) error {
+	if offset != packet.GeneveFixedHeaderLength {
+		return fmt.Errorf("RebindingUDPConn.WriteWireGuardBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
+	}
+	gh := packet.GeneveHeader{
+		Protocol: packet.GeneveProtocolWireGuard,
+		VNI:      addr.vni,
+	}
 	for {
 		pconn := *c.pconnAtomic.Load()
-		b, ok := pconn.(batchingConn)
+		b, ok := pconn.(batching.Conn)
 		if !ok {
 			for _, buf := range buffs {
-				_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr)
+				if gh.VNI.IsSet() {
+					gh.Encode(buf)
+				} else {
+					buf = buf[offset:]
+				}
+				_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr.ap)
 				if err != nil {
 					return err
 				}
 			}
 			return nil
 		}
-		err := b.WriteBatchTo(buffs, addr)
+		err := b.WriteBatchTo(buffs, addr.ap, gh, offset)
 		if err != nil {
 			if pconn != c.currentConn() {
 				continue
@@ -95,13 +116,12 @@ func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) err
 	}
 }

-// ReadBatch reads messages from c into msgs. It returns the number of messages
-// the caller should evaluate for nonzero len, as a zero len message may fall
-// on either side of a nonzero.
+// ReadBatch is an alias for [batching.Conn.ReadBatch] with fallback to single
+// packet operations if c.pconn is not a [batching.Conn].
 func (c *RebindingUDPConn) ReadBatch(msgs []ipv6.Message, flags int) (int, error) {
 	for {
 		pconn := *c.pconnAtomic.Load()
-		b, ok := pconn.(batchingConn)
+		b, ok := pconn.(batching.Conn)
 		if !ok {
 			n, ap, err := c.readFromWithInitPconn(pconn, msgs[0].Buffers[0])
 			if err == nil {
--- a/vendor/tailscale.com/wgengine/magicsock/relaymanager.go
+++ b/vendor/tailscale.com/wgengine/magicsock/relaymanager.go