This commit is contained in:
2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions

View File

@@ -1,25 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net/netip"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/types/nettype"
)
var (
// This acts as a compile-time check for our usage of ipv6.Message in
// batchingConn for both IPv6 and IPv4 operations.
_ ipv6.Message = ipv4.Message{}
)
// batchingConn is a nettype.PacketConn that provides batched i/o.
type batchingConn interface {
nettype.PacketConn
ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error
}

View File

@@ -1,14 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
package magicsock
import (
"tailscale.com/types/nettype"
)
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, _ string, _ int) nettype.PacketConn {
return pconn
}

View File

@@ -1,424 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"encoding/binary"
"errors"
"fmt"
"net"
"net/netip"
"runtime"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"unsafe"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
"tailscale.com/hostinfo"
"tailscale.com/net/neterror"
"tailscale.com/types/nettype"
)
// xnetBatchReaderWriter defines the batching i/o methods of
// golang.org/x/net/ipv4.PacketConn (and ipv6.PacketConn).
// TODO(jwhited): This should eventually be replaced with the standard library
// implementation of https://github.com/golang/go/issues/45886
type xnetBatchReaderWriter interface {
xnetBatchReader
xnetBatchWriter
}
type xnetBatchReader interface {
ReadBatch([]ipv6.Message, int) (int, error)
}
type xnetBatchWriter interface {
WriteBatch([]ipv6.Message, int) (int, error)
}
// linuxBatchingConn is a UDP socket that provides batched i/o. It implements
// batchingConn.
type linuxBatchingConn struct {
pc nettype.PacketConn
xpc xnetBatchReaderWriter
rxOffload bool // supports UDP GRO or similar
txOffload atomic.Bool // supports UDP GSO or similar
setGSOSizeInControl func(control *[]byte, gsoSize uint16) // typically setGSOSizeInControl(); swappable for testing
getGSOSizeFromControl func(control []byte) (int, error) // typically getGSOSizeFromControl(); swappable for testing
sendBatchPool sync.Pool
}
func (c *linuxBatchingConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
if c.rxOffload {
// UDP_GRO is opt-in on Linux via setsockopt(). Once enabled you may
// receive a "monster datagram" from any read call. The ReadFrom() API
// does not support passing the GSO size and is unsafe to use in such a
// case. Other platforms may vary in behavior, but we go with the most
// conservative approach to prevent this from becoming a footgun in the
// future.
return 0, netip.AddrPort{}, errors.New("rx UDP offload is enabled on this socket, single packet reads are unavailable")
}
return c.pc.ReadFromUDPAddrPort(p)
}
func (c *linuxBatchingConn) SetDeadline(t time.Time) error {
return c.pc.SetDeadline(t)
}
func (c *linuxBatchingConn) SetReadDeadline(t time.Time) error {
return c.pc.SetReadDeadline(t)
}
func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
return c.pc.SetWriteDeadline(t)
}
const (
// This was initially established for Linux, but may split out to
// GOOS-specific values later. It originates as UDP_MAX_SEGMENTS in the
// kernel's TX path, and UDP_GRO_CNT_MAX for RX.
udpSegmentMaxDatagrams = 64
)
const (
// Exceeding these values results in EMSGSIZE.
maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
maxIPv6PayloadLen = 1<<16 - 1 - 8
)
// coalesceMessages iterates msgs, coalescing them where possible while
// maintaining datagram order. All msgs have their Addr field set to addr.
func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message) int {
var (
base = -1 // index of msg we are currently coalescing into
gsoSize int // segmentation size of msgs[base]
dgramCnt int // number of dgrams coalesced into msgs[base]
endBatch bool // tracking flag to start a new batch on next iteration of buffs
)
maxPayloadLen := maxIPv4PayloadLen
if addr.IP.To4() == nil {
maxPayloadLen = maxIPv6PayloadLen
}
for i, buff := range buffs {
if i > 0 {
msgLen := len(buff)
baseLenBefore := len(msgs[base].Buffers[0])
freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
if msgLen+baseLenBefore <= maxPayloadLen &&
msgLen <= gsoSize &&
msgLen <= freeBaseCap &&
dgramCnt < udpSegmentMaxDatagrams &&
!endBatch {
msgs[base].Buffers[0] = append(msgs[base].Buffers[0], make([]byte, msgLen)...)
copy(msgs[base].Buffers[0][baseLenBefore:], buff)
if i == len(buffs)-1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
dgramCnt++
if msgLen < gsoSize {
// A smaller than gsoSize packet on the tail is legal, but
// it must end the batch.
endBatch = true
}
continue
}
}
if dgramCnt > 1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
// Reset prior to incrementing base since we are preparing to start a
// new potential batch.
endBatch = false
base++
gsoSize = len(buff)
msgs[base].OOB = msgs[base].OOB[:0]
msgs[base].Buffers[0] = buff
msgs[base].Addr = addr
dgramCnt = 1
}
return base + 1
}
type sendBatch struct {
msgs []ipv6.Message
ua *net.UDPAddr
}
func (c *linuxBatchingConn) getSendBatch() *sendBatch {
batch := c.sendBatchPool.Get().(*sendBatch)
return batch
}
func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
for i := range batch.msgs {
batch.msgs[i] = ipv6.Message{Buffers: batch.msgs[i].Buffers, OOB: batch.msgs[i].OOB}
}
c.sendBatchPool.Put(batch)
}
func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
batch := c.getSendBatch()
defer c.putSendBatch(batch)
if addr.Addr().Is6() {
as16 := addr.Addr().As16()
copy(batch.ua.IP, as16[:])
batch.ua.IP = batch.ua.IP[:16]
} else {
as4 := addr.Addr().As4()
copy(batch.ua.IP, as4[:])
batch.ua.IP = batch.ua.IP[:4]
}
batch.ua.Port = int(addr.Port())
var (
n int
retried bool
)
retry:
if c.txOffload.Load() {
n = c.coalesceMessages(batch.ua, buffs, batch.msgs)
} else {
for i := range buffs {
batch.msgs[i].Buffers[0] = buffs[i]
batch.msgs[i].Addr = batch.ua
batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
}
n = len(buffs)
}
err := c.writeBatch(batch.msgs[:n])
if err != nil && c.txOffload.Load() && neterror.ShouldDisableUDPGSO(err) {
c.txOffload.Store(false)
retried = true
goto retry
}
if retried {
return neterror.ErrUDPGSODisabled{OnLaddr: c.pc.LocalAddr().String(), RetryErr: err}
}
return err
}
func (c *linuxBatchingConn) SyscallConn() (syscall.RawConn, error) {
sc, ok := c.pc.(syscall.Conn)
if !ok {
return nil, errUnsupportedConnType
}
return sc.SyscallConn()
}
func (c *linuxBatchingConn) writeBatch(msgs []ipv6.Message) error {
var head int
for {
n, err := c.xpc.WriteBatch(msgs[head:], 0)
if err != nil || n == len(msgs[head:]) {
// Returning the number of packets written would require
// unraveling individual msg len and gso size during a coalesced
// write. The top of the call stack disregards partial success,
// so keep this simple for now.
return err
}
head += n
}
}
// splitCoalescedMessages splits coalesced messages from the tail of dst
// beginning at index 'firstMsgAt' into the head of the same slice. It reports
// the number of elements to evaluate in msgs for nonzero len (msgs[i].N). An
// error is returned if a socket control message cannot be parsed or a split
// operation would overflow msgs.
func (c *linuxBatchingConn) splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int) (n int, err error) {
for i := firstMsgAt; i < len(msgs); i++ {
msg := &msgs[i]
if msg.N == 0 {
return n, err
}
var (
gsoSize int
start int
end = msg.N
numToSplit = 1
)
gsoSize, err = c.getGSOSizeFromControl(msg.OOB[:msg.NN])
if err != nil {
return n, err
}
if gsoSize > 0 {
numToSplit = (msg.N + gsoSize - 1) / gsoSize
end = gsoSize
}
for j := 0; j < numToSplit; j++ {
if n > i {
return n, errors.New("splitting coalesced packet resulted in overflow")
}
copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
msgs[n].N = copied
msgs[n].Addr = msg.Addr
start = end
end += gsoSize
if end > msg.N {
end = msg.N
}
n++
}
if i != n-1 {
// It is legal for bytes to move within msg.Buffers[0] as a result
// of splitting, so we only zero the source msg len when it is not
// the destination of the last split operation above.
msg.N = 0
}
}
return n, nil
}
func (c *linuxBatchingConn) ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) {
if !c.rxOffload || len(msgs) < 2 {
return c.xpc.ReadBatch(msgs, flags)
}
// Read into the tail of msgs, split into the head.
readAt := len(msgs) - 2
numRead, err := c.xpc.ReadBatch(msgs[readAt:], 0)
if err != nil || numRead == 0 {
return 0, err
}
return c.splitCoalescedMessages(msgs, readAt)
}
func (c *linuxBatchingConn) LocalAddr() net.Addr {
return c.pc.LocalAddr().(*net.UDPAddr)
}
func (c *linuxBatchingConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
return c.pc.WriteToUDPAddrPort(b, addr)
}
func (c *linuxBatchingConn) Close() error {
return c.pc.Close()
}
// tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn,
// and returns two booleans indicating TX and RX UDP offload support.
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err != nil {
return
}
err = rc.Control(func(fd uintptr) {
_, errSyscall := syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
hasTX = errSyscall == nil
errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)
hasRX = errSyscall == nil
})
if err != nil {
return false, false
}
}
return hasTX, hasRX
}
// getGSOSizeFromControl returns the GSO size found in control. If no GSO size
// is found or the len(control) < unix.SizeofCmsghdr, this function returns 0.
// A non-nil error will be returned if len(control) > unix.SizeofCmsghdr but
// its contents cannot be parsed as a socket control message.
func getGSOSizeFromControl(control []byte) (int, error) {
var (
hdr unix.Cmsghdr
data []byte
rem = control
err error
)
for len(rem) > unix.SizeofCmsghdr {
hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
if err != nil {
return 0, fmt.Errorf("error parsing socket control message: %w", err)
}
if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= 2 {
return int(binary.NativeEndian.Uint16(data[:2])), nil
}
}
return 0, nil
}
// setGSOSizeInControl sets a socket control message in control containing
// gsoSize. If len(control) < controlMessageSize control's len will be set to 0.
func setGSOSizeInControl(control *[]byte, gsoSize uint16) {
*control = (*control)[:0]
if cap(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
return
}
if cap(*control) < controlMessageSize {
return
}
*control = (*control)[:cap(*control)]
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
hdr.Level = unix.SOL_UDP
hdr.Type = unix.UDP_SEGMENT
hdr.SetLen(unix.CmsgLen(2))
binary.NativeEndian.PutUint16((*control)[unix.SizeofCmsghdr:], gsoSize)
*control = (*control)[:unix.CmsgSpace(2)]
}
// tryUpgradeToBatchingConn probes the capabilities of the OS and pconn, and
// upgrades pconn to a *linuxBatchingConn if appropriate.
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, network string, batchSize int) nettype.PacketConn {
if runtime.GOOS != "linux" {
// Exclude Android.
return pconn
}
if network != "udp4" && network != "udp6" {
return pconn
}
if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
// As a cheap heuristic: if the Linux kernel starts with "2", just
// consider it too old for mmsg. Nobody who cares about performance runs
// such ancient kernels. UDP offload was added much later, so no
// upgrades are available.
return pconn
}
uc, ok := pconn.(*net.UDPConn)
if !ok {
return pconn
}
b := &linuxBatchingConn{
pc: pconn,
getGSOSizeFromControl: getGSOSizeFromControl,
setGSOSizeInControl: setGSOSizeInControl,
sendBatchPool: sync.Pool{
New: func() any {
ua := &net.UDPAddr{
IP: make([]byte, 16),
}
msgs := make([]ipv6.Message, batchSize)
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].Addr = ua
msgs[i].OOB = make([]byte, controlMessageSize)
}
return &sendBatch{
ua: ua,
msgs: msgs,
}
},
},
}
switch network {
case "udp4":
b.xpc = ipv4.NewPacketConn(uc)
case "udp6":
b.xpc = ipv6.NewPacketConn(uc)
default:
panic("bogus network")
}
var txOffload bool
txOffload, b.rxOffload = tryEnableUDPOffload(uc)
b.txOffload.Store(txOffload)
return b
}

View File

@@ -10,11 +10,13 @@ import (
"sync"
"syscall"
"time"
"tailscale.com/syncs"
)
// blockForeverConn is a net.PacketConn whose reads block until it is closed.
type blockForeverConn struct {
mu sync.Mutex
mu syncs.Mutex
cond *sync.Cond
closed bool
}

View File

@@ -1,182 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(ios || android || js)
package magicsock
import (
"context"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"slices"
"strings"
"time"
"tailscale.com/types/logger"
"tailscale.com/util/cloudenv"
)
const maxCloudInfoWait = 2 * time.Second
type cloudInfo struct {
client http.Client
logf logger.Logf
// The following parameters are fixed for the lifetime of the cloudInfo
// object, but are used for testing.
cloud cloudenv.Cloud
endpoint string
}
func newCloudInfo(logf logger.Logf) *cloudInfo {
tr := &http.Transport{
DisableKeepAlives: true,
Dial: (&net.Dialer{
Timeout: maxCloudInfoWait,
}).Dial,
}
return &cloudInfo{
client: http.Client{Transport: tr},
logf: logf,
cloud: cloudenv.Get(),
endpoint: "http://" + cloudenv.CommonNonRoutableMetadataIP,
}
}
// GetPublicIPs returns any public IPs attached to the current cloud instance,
// if the tailscaled process is running in a known cloud and there are any such
// IPs present.
func (ci *cloudInfo) GetPublicIPs(ctx context.Context) ([]netip.Addr, error) {
switch ci.cloud {
case cloudenv.AWS:
ret, err := ci.getAWS(ctx)
ci.logf("[v1] cloudinfo.GetPublicIPs: AWS: %v, %v", ret, err)
return ret, err
}
return nil, nil
}
// getAWSMetadata makes a request to the AWS metadata service at the given
// path, authenticating with the provided IMDSv2 token. The returned metadata
// is split by newline and returned as a slice.
func (ci *cloudInfo) getAWSMetadata(ctx context.Context, token, path string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", ci.endpoint+path, nil)
if err != nil {
return nil, fmt.Errorf("creating request to %q: %w", path, err)
}
req.Header.Set("X-aws-ec2-metadata-token", token)
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request to metadata service %q: %w", path, err)
}
defer resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
// Good
case http.StatusNotFound:
// Nothing found, but this isn't an error; just return
return nil, nil
default:
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("reading response body for %q: %w", path, err)
}
return strings.Split(strings.TrimSpace(string(body)), "\n"), nil
}
// getAWS returns all public IPv4 and IPv6 addresses present in the AWS instance metadata.
func (ci *cloudInfo) getAWS(ctx context.Context) ([]netip.Addr, error) {
ctx, cancel := context.WithTimeout(ctx, maxCloudInfoWait)
defer cancel()
// Get a token so we can query the metadata service.
req, err := http.NewRequestWithContext(ctx, "PUT", ci.endpoint+"/latest/api/token", nil)
if err != nil {
return nil, fmt.Errorf("creating token request: %w", err)
}
req.Header.Set("X-Aws-Ec2-Metadata-Token-Ttl-Seconds", "10")
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making token request to metadata service: %w", err)
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("reading token response body: %w", err)
}
token := string(body)
server := resp.Header.Get("Server")
if server != "EC2ws" {
return nil, fmt.Errorf("unexpected server header: %q", server)
}
// Iterate over all interfaces and get their public IP addresses, both IPv4 and IPv6.
macAddrs, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/")
if err != nil {
return nil, fmt.Errorf("getting interface MAC addresses: %w", err)
}
var (
addrs []netip.Addr
errs []error
)
addAddr := func(addr string) {
ip, err := netip.ParseAddr(addr)
if err != nil {
errs = append(errs, fmt.Errorf("parsing IP address %q: %w", addr, err))
return
}
addrs = append(addrs, ip)
}
for _, mac := range macAddrs {
ips, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/public-ipv4s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv4 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
// Try querying for IPv6 addresses.
ips, err = ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/ipv6s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv6 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
}
// Sort the returned addresses for determinism.
slices.SortFunc(addrs, func(a, b netip.Addr) int {
return a.Compare(b)
})
// Preferentially return any addresses we found, even if there were errors.
if len(addrs) > 0 {
return addrs, nil
}
if len(errs) > 0 {
return nil, fmt.Errorf("getting IP addresses: %w", errors.Join(errs...))
}
return nil, nil
}

View File

@@ -1,23 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || android || js
package magicsock
import (
"context"
"net/netip"
"tailscale.com/types/logger"
)
type cloudInfo struct{}
func newCloudInfo(_ logger.Logf) *cloudInfo {
return &cloudInfo{}
}
func (ci *cloudInfo) GetPublicIPs(_ context.Context) ([]netip.Addr, error) {
return nil, nil
}

View File

@@ -13,6 +13,8 @@ import (
"strings"
"time"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
@@ -24,6 +26,11 @@ import (
// /debug/magicsock) or via peerapi to a peer that's owned by the same
// user (so they can e.g. inspect their phones).
func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
if !buildfeatures.HasDebug {
http.Error(w, feature.ErrUnavailable.Error(), http.StatusNotImplemented)
return
}
c.mu.Lock()
defer c.mu.Unlock()
@@ -72,18 +79,18 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "<h2 id=ipport><a href=#ipport>#</a> ip:port to endpoint</h2><ul>")
{
type kv struct {
ipp netip.AddrPort
pi *peerInfo
addr epAddr
pi *peerInfo
}
ent := make([]kv, 0, len(c.peerMap.byIPPort))
for k, v := range c.peerMap.byIPPort {
ent := make([]kv, 0, len(c.peerMap.byEpAddr))
for k, v := range c.peerMap.byEpAddr {
ent = append(ent, kv{k, v})
}
sort.Slice(ent, func(i, j int) bool { return ipPortLess(ent[i].ipp, ent[j].ipp) })
sort.Slice(ent, func(i, j int) bool { return epAddrLess(ent[i].addr, ent[j].addr) })
for _, e := range ent {
ep := e.pi.ep
shortStr := ep.publicKey.ShortString()
fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.ipp, strings.Trim(shortStr, "[]"), shortStr)
fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.addr, strings.Trim(shortStr, "[]"), shortStr)
}
}
@@ -148,11 +155,11 @@ func printEndpointHTML(w io.Writer, ep *endpoint) {
for ipp := range ep.endpointState {
eps = append(eps, ipp)
}
sort.Slice(eps, func(i, j int) bool { return ipPortLess(eps[i], eps[j]) })
sort.Slice(eps, func(i, j int) bool { return addrPortLess(eps[i], eps[j]) })
io.WriteString(w, "<p>Endpoints:</p><ul>")
for _, ipp := range eps {
s := ep.endpointState[ipp]
if ipp == ep.bestAddr.AddrPort {
if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.IsSet() {
fmt.Fprintf(w, "<li><b>%s</b>: (best)<ul>", ipp)
} else {
fmt.Fprintf(w, "<li>%s: ...<ul>", ipp)
@@ -196,9 +203,19 @@ func peerDebugName(p tailcfg.NodeView) string {
return p.Hostinfo().Hostname()
}
func ipPortLess(a, b netip.AddrPort) bool {
func addrPortLess(a, b netip.AddrPort) bool {
if v := a.Addr().Compare(b.Addr()); v != 0 {
return v < 0
}
return a.Port() < b.Port()
}
func epAddrLess(a, b epAddr) bool {
if v := a.ap.Addr().Compare(b.ap.Addr()); v != 0 {
return v < 0
}
if a.ap.Port() == b.ap.Port() {
return a.vni.Get() < b.vni.Get()
}
return a.ap.Port() < b.ap.Port()
}

View File

@@ -62,6 +62,9 @@ var (
//
//lint:ignore U1000 used on Linux/Darwin only
debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
// debugNeverDirectUDP disables the use of direct UDP connections, forcing
// all peer communication over DERP or peer relay.
debugNeverDirectUDP = envknob.RegisterBool("TS_DEBUG_NEVER_DIRECT_UDP")
// Hey you! Adding a new debugknob? Make sure to stub it out in the
// debugknobs_stubs.go file too.
)

View File

@@ -31,3 +31,4 @@ func debugRingBufferMaxSizeBytes() int { return 0 }
func inTest() bool { return false }
func debugPeerMap() bool { return false }
func pretendpoints() []netip.AddrPort { return []netip.AddrPort{} }
func debugNeverDirectUDP() bool { return false }

View File

@@ -11,9 +11,7 @@ import (
"net"
"net/netip"
"reflect"
"runtime"
"slices"
"sync"
"time"
"unsafe"
@@ -21,7 +19,6 @@ import (
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/net/dnscache"
"tailscale.com/net/netcheck"
"tailscale.com/net/tsaddr"
@@ -30,9 +27,9 @@ import (
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/backoff"
"tailscale.com/util/mak"
"tailscale.com/util/rands"
"tailscale.com/util/sysresources"
"tailscale.com/util/testenv"
)
@@ -94,7 +91,7 @@ func (c *Conn) fallbackDERPRegionForPeer(peer key.NodePublic) (regionID int) {
type activeDerp struct {
c *derphttp.Client
cancel context.CancelFunc
writeCh chan<- derpWriteRequest
writeCh chan derpWriteRequest
// lastWrite is the time of the last request for its write
// channel (currently even if there was no write).
// It is always non-nil and initialized to a non-zero Time.
@@ -219,17 +216,28 @@ func (c *Conn) derpRegionCodeLocked(regionID int) string {
return ""
}
// setHomeDERPGaugeLocked updates the home DERP gauge metric.
//
// c.mu must be held.
func (c *Conn) setHomeDERPGaugeLocked(derpNum int) {
if c.homeDERPGauge != nil {
c.homeDERPGauge.Set(float64(derpNum))
}
}
// c.mu must NOT be held.
func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
c.mu.Lock()
defer c.mu.Unlock()
if !c.wantDerpLocked() {
c.myDerp = 0
c.setHomeDERPGaugeLocked(0)
c.health.SetMagicSockDERPHome(0, c.homeless)
return false
}
if c.homeless {
c.myDerp = 0
c.setHomeDERPGaugeLocked(0)
c.health.SetMagicSockDERPHome(0, c.homeless)
return false
}
@@ -241,6 +249,7 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
metricDERPHomeChange.Add(1)
}
c.myDerp = derpNum
c.setHomeDERPGaugeLocked(derpNum)
c.health.SetMagicSockDERPHome(derpNum, c.homeless)
if c.privateKey.IsZero() {
@@ -282,59 +291,20 @@ func (c *Conn) goDerpConnect(regionID int) {
go c.derpWriteChanForRegion(regionID, key.NodePublic{})
}
var (
bufferedDerpWrites int
bufferedDerpWritesOnce sync.Once
)
// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued
// up the DERP client to write on the wire before we start dropping.
func bufferedDerpWritesBeforeDrop() int {
// For mobile devices, always return the previous minimum value of 32;
// we can do this outside the sync.Once to avoid that overhead.
if runtime.GOOS == "ios" || runtime.GOOS == "android" {
return 32
}
bufferedDerpWritesOnce.Do(func() {
// Some rough sizing: for the previous fixed value of 32, the
// total consumed memory can be:
// = numDerpRegions * messages/region * sizeof(message)
//
// For sake of this calculation, assume 100 DERP regions; at
// time of writing (2023-04-03), we have 24.
//
// A reasonable upper bound for the worst-case average size of
// a message is a *disco.CallMeMaybe message with 16 endpoints;
// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
// = 100 * 32 * 512
// = 1638400 (1.6MiB)
//
// On a reasonably-small node with 4GiB of memory that's
// connected to each region and handling a lot of load, 1.6MiB
// is about 0.04% of the total system memory.
//
// For sake of this calculation, then, let's double that memory
// usage to 0.08% and scale based on total system memory.
//
// For a 16GiB Linux box, this should buffer just over 256
// messages.
systemMemory := sysresources.TotalMemory()
memoryUsable := float64(systemMemory) * 0.0008
const (
theoreticalDERPRegions = 100
messageMaximumSizeBytes = 512
)
bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
// Never drop below the previous minimum value.
if bufferedDerpWrites < 32 {
bufferedDerpWrites = 32
}
})
return bufferedDerpWrites
}
// derpWriteQueueDepth is the depth of the in-process write queue to a single
// DERP region. DERP connections are TCP, and so the actual write queue depth is
// substantially larger than this suggests - often scaling into megabytes
// depending on dynamic TCP parameters and platform TCP tuning. This queue is
// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
// and does not want to be deep - if there are key situations where a node can't
// keep up, either the TCP link to DERP is too slow, or there is a
// synchronization issue in the write path, fixes should be focused on those
// paths, rather than extending this queue.
// TODO(raggi): make this even shorter, ideally this should be a fairly direct
// line into a socket TCP buffer. The challenge at present is that connect and
// reconnect are in the write path and we don't want to block other write
// operations on those.
const derpWriteQueueDepth = 32
// derpWriteChanForRegion returns a channel to which to send DERP packet write
// requests. It creates a new DERP connection to regionID if necessary.
@@ -344,7 +314,7 @@ func bufferedDerpWritesBeforeDrop() int {
//
// It returns nil if the network is down, the Conn is closed, or the regionID is
// not known.
func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<- derpWriteRequest {
func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan derpWriteRequest {
if c.networkDown() {
return nil
}
@@ -429,7 +399,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
dc.DNSCache = dnscache.Get()
ctx, cancel := context.WithCancel(c.connCtx)
ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop())
ch := make(chan derpWriteRequest, derpWriteQueueDepth)
ad.c = dc
ad.writeCh = ch
@@ -740,8 +710,11 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
ipp := netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
if c.handleDiscoMessage(b[:n], ipp, dm.src, discoRXPathDERP) {
srcAddr := epAddr{ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))}
pt, isGeneveEncap := packetLooksLike(b[:n])
if pt == packetLooksLikeDisco &&
!isGeneveEncap { // We should never receive Geneve-encapsulated disco over DERP.
c.handleDiscoMessage(b[:n], srcAddr, false, dm.src, discoRXPathDERP)
return 0, nil
}
@@ -755,9 +728,9 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
ep.noteRecvActivity(ipp, mono.Now())
if stats := c.stats.Load(); stats != nil {
stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, dm.n)
ep.noteRecvActivity(srcAddr, mono.Now())
if update := c.connCounter.Load(); update != nil {
update(0, netip.AddrPortFrom(ep.nodeAddr, 0), srcAddr.ap, 1, dm.n, true)
}
c.metrics.inboundPacketsDERPTotal.Add(1)
@@ -875,7 +848,6 @@ func (c *Conn) maybeCloseDERPsOnRebind(okayLocalIPs []netip.Prefix) {
c.closeOrReconnectDERPLocked(regionID, "rebind-default-route-change")
continue
}
regionID := regionID
dc := ad.c
go func() {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)

View File

@@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"sync/atomic"
"tailscale.com/types/key"
)
type discoKeyPair struct {
private key.DiscoPrivate
public key.DiscoPublic
short string // public.ShortString()
}
// discoAtomic is an atomic container for a disco private key, public key, and
// the public key's ShortString. The private and public keys are always kept
// synchronized.
//
// The zero value is not ready for use. Use [Set] to provide a usable value.
type discoAtomic struct {
pair atomic.Pointer[discoKeyPair]
}
// Pair returns the private and public keys together atomically.
// Code that needs both the private and public keys synchronized should
// use Pair instead of calling Private and Public separately.
func (dk *discoAtomic) Pair() (key.DiscoPrivate, key.DiscoPublic) {
p := dk.pair.Load()
return p.private, p.public
}
// Private returns the private key.
func (dk *discoAtomic) Private() key.DiscoPrivate {
return dk.pair.Load().private
}
// Public returns the public key.
func (dk *discoAtomic) Public() key.DiscoPublic {
return dk.pair.Load().public
}
// Short returns the short string of the public key (see [DiscoPublic.ShortString]).
func (dk *discoAtomic) Short() string {
return dk.pair.Load().short
}
// Set updates the private key (and the cached public key and short string).
func (dk *discoAtomic) Set(private key.DiscoPrivate) {
public := private.Public()
dk.pair.Store(&discoKeyPair{
private: private,
public: public,
short: public.ShortString(),
})
}

View File

@@ -22,8 +22,9 @@ const _discoPingPurpose_name = "DiscoveryHeartbeatCLIHeartbeatForUDPLifetime"
var _discoPingPurpose_index = [...]uint8{0, 9, 18, 21, 44}
func (i discoPingPurpose) String() string {
if i < 0 || i >= discoPingPurpose(len(_discoPingPurpose_index)-1) {
idx := int(i) - 0
if i < 0 || idx >= len(_discoPingPurpose_index)-1 {
return "discoPingPurpose(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _discoPingPurpose_name[_discoPingPurpose_index[i]:_discoPingPurpose_index[i+1]]
return _discoPingPurpose_name[_discoPingPurpose_index[idx]:_discoPingPurpose_index[idx+1]]
}

View File

@@ -17,7 +17,6 @@ import (
"reflect"
"runtime"
"slices"
"sync"
"sync/atomic"
"time"
@@ -25,14 +24,16 @@ import (
"golang.org/x/net/ipv6"
"tailscale.com/disco"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/packet"
"tailscale.com/net/stun"
"tailscale.com/net/tstun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/mak"
"tailscale.com/util/ringbuffer"
"tailscale.com/util/ringlog"
"tailscale.com/util/slicesx"
)
@@ -59,7 +60,7 @@ type endpoint struct {
lastRecvWG mono.Time // last time there were incoming packets from this peer destined for wireguard-go (e.g. not disco)
lastRecvUDPAny mono.Time // last time there were incoming UDP packets from this peer of any kind
numStopAndResetAtomic int64
debugUpdates *ringbuffer.RingBuffer[EndpointChange]
debugUpdates *ringlog.RingLog[EndpointChange]
// These fields are initialized once and never modified.
c *Conn
@@ -72,19 +73,20 @@ type endpoint struct {
disco atomic.Pointer[endpointDisco] // if the peer supports disco, the key and short string
// mu protects all following fields.
mu sync.Mutex // Lock ordering: Conn.mu, then endpoint.mu
mu syncs.Mutex // Lock ordering: Conn.mu, then endpoint.mu
heartBeatTimer *time.Timer // nil when idle
lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
heartBeatTimer *time.Timer // nil when idle
lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints
lastUDPRelayPathDiscovery mono.Time // last time we ran UDP relay path discovery
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked()
bestAddrAt mono.Time // time best address re-confirmed
trustBestAddrUntil mono.Time // time when bestAddr expires
sentPing map[stun.TxID]sentPing
endpointState map[netip.AddrPort]*endpointState
endpointState map[netip.AddrPort]*endpointState // netip.AddrPort type for key (instead of [epAddr]) as [endpointState] is irrelevant for Geneve-encapsulated paths
isCallMeMaybeEP map[netip.AddrPort]bool
// The following fields are related to the new "silent disco"
@@ -95,10 +97,40 @@ type endpoint struct {
expired bool // whether the node has expired
isWireguardOnly bool // whether the endpoint is WireGuard only
relayCapable bool // whether the node is capable of speaking via a [tailscale.com/net/udprelay.Server]
}
// udpRelayEndpointReady determines whether the given relay [addrQuality] should
// be installed as de.bestAddr. It is only called by [relayManager] once it has
// determined maybeBest is functional via [disco.Pong] reception.
func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
de.mu.Lock()
defer de.mu.Unlock()
now := mono.Now()
curBestAddrTrusted := now.Before(de.trustBestAddrUntil)
sameRelayServer := de.bestAddr.vni.IsSet() && maybeBest.relayServerDisco.Compare(de.bestAddr.relayServerDisco) == 0
if !curBestAddrTrusted ||
sameRelayServer ||
betterAddr(maybeBest, de.bestAddr) {
// We must set maybeBest as de.bestAddr if:
// 1. de.bestAddr is untrusted. betterAddr does not consider
// time-based trust.
// 2. maybeBest & de.bestAddr are on the same relay. If the maybeBest
// handshake happened to use a different source address/transport,
// the relay will drop packets from the 'old' de.bestAddr's.
// 3. maybeBest is a 'betterAddr'.
//
// TODO(jwhited): add observability around !curBestAddrTrusted and sameRelayServer
// TODO(jwhited): collapse path change logging with endpoint.handlePongConnLocked()
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v", de.publicKey.ShortString(), de.discoShort(), maybeBest.epAddr, maybeBest.wireMTU)
de.setBestAddrLocked(maybeBest)
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
}
}
func (de *endpoint) setBestAddrLocked(v addrQuality) {
if v.AddrPort != de.bestAddr.AddrPort {
if v.epAddr != de.bestAddr.epAddr {
de.probeUDPLifetime.resetCycleEndpointLocked()
}
de.bestAddr = v
@@ -134,11 +166,11 @@ type probeUDPLifetime struct {
// timeout cliff in the future.
timer *time.Timer
// bestAddr contains the endpoint.bestAddr.AddrPort at the time a cycle was
// bestAddr contains the endpoint.bestAddr.epAddr at the time a cycle was
// scheduled to start. A probing cycle is 1:1 with the current
// endpoint.bestAddr.AddrPort in the interest of simplicity. When
// endpoint.bestAddr.AddrPort changes, any active probing cycle will reset.
bestAddr netip.AddrPort
// endpoint.bestAddr.epAddr in the interest of simplicity. When
// endpoint.bestAddr.epAddr changes, any active probing cycle will reset.
bestAddr epAddr
// cycleStartedAt contains the time at which the first cliff
// (ProbeUDPLifetimeConfig.Cliffs[0]) was pinged for the current/last cycle.
cycleStartedAt time.Time
@@ -190,7 +222,7 @@ func (p *probeUDPLifetime) resetCycleEndpointLocked() {
}
p.cycleActive = false
p.currentCliff = 0
p.bestAddr = netip.AddrPort{}
p.bestAddr = epAddr{}
}
// ProbeUDPLifetimeConfig represents the configuration for probing UDP path
@@ -333,7 +365,7 @@ type endpointDisco struct {
}
type sentPing struct {
to netip.AddrPort
to epAddr
at mono.Time
timer *time.Timer // timeout timer
purpose discoPingPurpose
@@ -445,7 +477,8 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
From: ep,
})
delete(de.endpointState, ep)
if de.bestAddr.AddrPort == ep {
asEpAddr := epAddr{ap: ep}
if de.bestAddr.epAddr == asEpAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "deleteEndpointLocked-bestAddr-" + why,
@@ -467,11 +500,12 @@ func (de *endpoint) initFakeUDPAddr() {
}
// noteRecvActivity records receive activity on de, and invokes
// Conn.noteRecvActivity no more than once every 10s.
func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
// Conn.noteRecvActivity no more than once every 10s, returning true if it
// was called, otherwise false.
func (de *endpoint) noteRecvActivity(src epAddr, now mono.Time) bool {
if de.isWireguardOnly {
de.mu.Lock()
de.bestAddr.AddrPort = ipp
de.bestAddr.ap = src.ap
de.bestAddrAt = now
de.trustBestAddrUntil = now.Add(5 * time.Second)
de.mu.Unlock()
@@ -481,7 +515,7 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
// kick off discovery disco pings every trustUDPAddrDuration and mirror
// to DERP.
de.mu.Lock()
if de.heartbeatDisabled && de.bestAddr.AddrPort == ipp {
if de.heartbeatDisabled && de.bestAddr.epAddr == src {
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
}
de.mu.Unlock()
@@ -492,10 +526,12 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
de.lastRecvWG.StoreAtomic(now)
if de.c.noteRecvActivity == nil {
return
return false
}
de.c.noteRecvActivity(de.publicKey)
return true
}
return false
}
func (de *endpoint) discoShort() string {
@@ -529,10 +565,10 @@ func (de *endpoint) DstToBytes() []byte { return packIPPort(de.fakeWGAddr) }
// de.mu must be held.
//
// TODO(val): Rewrite the addrFor*Locked() variations to share code.
func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) {
udpAddr = de.bestAddr.AddrPort
func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr epAddr, derpAddr netip.AddrPort, sendWGPing bool) {
udpAddr = de.bestAddr.epAddr
if udpAddr.IsValid() && !now.After(de.trustBestAddrUntil) {
if udpAddr.ap.IsValid() && !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}, false
}
@@ -551,12 +587,12 @@ func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.Ad
// addrForWireGuardSendLocked returns the address that should be used for
// sending the next packet. If a packet has never or not recently been sent to
// the endpoint, then a randomly selected address for the endpoint is returned,
// as well as a bool indiciating that WireGuard discovery pings should be started.
// as well as a bool indicating that WireGuard discovery pings should be started.
// If the addresses have latency information available, then the address with the
// best latency is used.
//
// de.mu must be held.
func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.AddrPort, shouldPing bool) {
func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr epAddr, shouldPing bool) {
if len(de.endpointState) == 0 {
de.c.logf("magicsock: addrForSendWireguardLocked: [unexpected] no candidates available for endpoint")
return udpAddr, false
@@ -580,22 +616,22 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// TODO(catzkorn): Consider a small increase in latency to use
// IPv6 in comparison to IPv4, when possible.
lowestLatency = latency
udpAddr = ipp
udpAddr.ap = ipp
}
}
}
needPing := len(de.endpointState) > 1 && now.Sub(oldestPing) > wireguardPingInterval
if !udpAddr.IsValid() {
if !udpAddr.ap.IsValid() {
candidates := slicesx.MapKeys(de.endpointState)
// Randomly select an address to use until we retrieve latency information
// and give it a short trustBestAddrUntil time so we avoid flapping between
// addresses while waiting on latency information to be populated.
udpAddr = candidates[rand.IntN(len(candidates))]
udpAddr.ap = candidates[rand.IntN(len(candidates))]
}
de.bestAddr.AddrPort = udpAddr
de.bestAddr.epAddr = epAddr{ap: udpAddr.ap}
// Only extend trustBestAddrUntil by one second to avoid packet
// reordering and/or CPU usage from random selection during the first
// second. We should receive a response due to a WireGuard handshake in
@@ -613,18 +649,18 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// both of the returned UDP address and DERP address may be non-zero.
//
// de.mu must be held.
func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) {
func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr epAddr, derpAddr netip.AddrPort) {
if size == 0 {
udpAddr, derpAddr, _ = de.addrForSendLocked(now)
return
}
udpAddr = de.bestAddr.AddrPort
udpAddr = de.bestAddr.epAddr
pathMTU := de.bestAddr.wireMTU
requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
requestedMTU := pingSizeToPktLen(size, udpAddr)
mtuOk := requestedMTU <= pathMTU
if udpAddr.IsValid() && mtuOk {
if udpAddr.ap.IsValid() && mtuOk {
if !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}
}
@@ -637,7 +673,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
// for the packet. Return a zero-value udpAddr to signal that we should
// keep probing the path MTU to all addresses for this endpoint, and a
// valid DERP addr to signal that we should also send via DERP.
return netip.AddrPort{}, de.derpAddr
return epAddr{}, de.derpAddr
}
// maybeProbeUDPLifetimeLocked returns an afterInactivityFor duration and true
@@ -648,7 +684,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
if p == nil {
return afterInactivityFor, false
}
if !de.bestAddr.IsValid() {
if !de.bestAddr.ap.IsValid() {
return afterInactivityFor, false
}
epDisco := de.disco.Load()
@@ -661,7 +697,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
// shuffling probing probability where the local node ends up with a large
// key value lexicographically relative to the other nodes it tends to
// communicate with. If de's disco key changes, the cycle will reset.
if de.c.discoPublic.Compare(epDisco.key) >= 0 {
if de.c.discoAtomic.Public().Compare(epDisco.key) >= 0 {
// lower disco pub key node probes higher
return afterInactivityFor, false
}
@@ -700,7 +736,7 @@ func (de *endpoint) scheduleHeartbeatForLifetimeLocked(after time.Duration, via
}
de.c.dlogf("[v1] magicsock: disco: scheduling UDP lifetime probe for cliff=%v via=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), via, de.publicKey.ShortString(), de.discoShort())
p.bestAddr = de.bestAddr.AddrPort
p.bestAddr = de.bestAddr.epAddr
p.timer = time.AfterFunc(after, de.heartbeatForLifetime)
if via == heartbeatForLifetimeViaSelf {
metricUDPLifetimeCliffsRescheduled.Add(1)
@@ -728,7 +764,7 @@ func (de *endpoint) heartbeatForLifetime() {
return
}
p.timer = nil
if !p.bestAddr.IsValid() || de.bestAddr.AddrPort != p.bestAddr {
if !p.bestAddr.ap.IsValid() || de.bestAddr.epAddr != p.bestAddr {
// best path changed
p.resetCycleEndpointLocked()
return
@@ -760,7 +796,7 @@ func (de *endpoint) heartbeatForLifetime() {
}
de.c.dlogf("[v1] magicsock: disco: sending disco ping for UDP lifetime probe cliff=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), de.publicKey.ShortString(), de.discoShort())
de.startDiscoPingLocked(de.bestAddr.AddrPort, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
de.startDiscoPingLocked(de.bestAddr.epAddr, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
}
// heartbeat is called every heartbeatInterval to keep the best UDP path alive,
@@ -818,8 +854,8 @@ func (de *endpoint) heartbeat() {
}
udpAddr, _, _ := de.addrForSendLocked(now)
if udpAddr.IsValid() {
// We have a preferred path. Ping that every 2 seconds.
if udpAddr.ap.IsValid() {
// We have a preferred path. Ping that every 'heartbeatInterval'.
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0, nil)
}
@@ -827,6 +863,10 @@ func (de *endpoint) heartbeat() {
de.sendDiscoPingsLocked(now, true)
}
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat)
}
@@ -837,6 +877,53 @@ func (de *endpoint) setHeartbeatDisabled(v bool) {
de.heartbeatDisabled = v
}
// discoverUDPRelayPathsLocked starts UDP relay path discovery.
func (de *endpoint) discoverUDPRelayPathsLocked(now mono.Time) {
de.lastUDPRelayPathDiscovery = now
lastBest := de.bestAddr
lastBestIsTrusted := mono.Now().Before(de.trustBestAddrUntil)
de.c.relayManager.startUDPRelayPathDiscoveryFor(de, lastBest, lastBestIsTrusted)
}
// wantUDPRelayPathDiscoveryLocked reports whether we should kick off UDP relay
// path discovery.
func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
if !de.c.hasPeerRelayServers.Load() {
// Changes in this value between its access and a call to
// [endpoint.discoverUDPRelayPathsLocked] are fine, we will eventually
// do the "right" thing during future path discovery. The worst case is
// we suppress path discovery for the current cycle, or we unnecessarily
// call into [relayManager] and do some wasted work.
return false
}
if !de.relayCapable {
return false
}
if de.bestAddr.isDirect() && now.Before(de.trustBestAddrUntil) {
return false
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) < discoverUDPRelayPathsInterval {
return false
}
// TODO(jwhited): consider applying 'goodEnoughLatency' suppression here,
// but not until we have a strategy for triggering CallMeMaybeVia regularly
// and/or enabling inbound packets to act as a UDP relay path discovery
// trigger, otherwise clients without relay servers may fall off a UDP
// relay path and never come back. They are dependent on the remote side
// regularly TX'ing CallMeMaybeVia, which currently only happens as part
// of full UDP relay path discovery.
if now.After(de.trustBestAddrUntil) {
return true
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) >= upgradeUDPRelayInterval {
return true
}
return false
}
// wantFullPingLocked reports whether we should ping to all our peers looking for
// a better path.
//
@@ -845,7 +932,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
if !de.bestAddr.IsValid() || de.lastFullPing.IsZero() {
if !de.bestAddr.isDirect() || de.lastFullPing.IsZero() {
return true
}
if now.After(de.trustBestAddrUntil) {
@@ -854,7 +941,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
if de.bestAddr.latency <= goodEnoughLatency {
return false
}
if now.Sub(de.lastFullPing) >= upgradeInterval {
if now.Sub(de.lastFullPing) >= upgradeUDPDirectInterval {
return true
}
return false
@@ -905,17 +992,38 @@ func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnst
udpAddr, derpAddr := de.addrForPingSizeLocked(now, size)
if derpAddr.IsValid() {
de.startDiscoPingLocked(derpAddr, now, pingCLI, size, resCB)
de.startDiscoPingLocked(epAddr{ap: derpAddr}, now, pingCLI, size, resCB)
}
if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
// Already have an active session, so just ping the address we're using.
// Otherwise "tailscale ping" results to a node on the local network
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
// IPv6 address, both 1ms away, and it's random who replies first.
switch {
case udpAddr.ap.IsValid() && now.Before(de.trustBestAddrUntil):
// We have a "trusted" direct OR peer relay address, ping it.
de.startDiscoPingLocked(udpAddr, now, pingCLI, size, resCB)
} else {
if !udpAddr.vni.IsSet() {
// If the path is direct we do not want to fallthrough to pinging
// all candidate direct paths, otherwise "tailscale ping" results to
// a node on the local network can look like they're bouncing
// between, say 10.0.0.0/8 and the peer's IPv6 address, both 1ms
// away, and it's random who replies first. cb() is called with the
// first reply, vs background path discovery that is subject to
// betterAddr() comparison and hysteresis
break
}
// If the trusted path is via a peer relay we want to fallthrough in
// order to also try all candidate direct paths.
fallthrough
default:
// Ping all candidate direct paths and start peer relay path discovery,
// if appropriate. This work overlaps with what [de.heartbeat] will
// periodically fire when it calls [de.sendDiscoPingsLocked] and
// [de.discoveryUDPRelayPathsLocked], but a user-initiated [pingCLI] is
// a "do it now" operation that should not be subject to
// [heartbeatInterval] tick or [discoPingInterval] rate-limiting.
for ep := range de.endpointState {
de.startDiscoPingLocked(ep, now, pingCLI, size, resCB)
de.startDiscoPingLocked(epAddr{ap: ep}, now, pingCLI, size, resCB)
}
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
}
}
@@ -926,7 +1034,7 @@ var (
errPingTooBig = errors.New("ping size too big")
)
func (de *endpoint) send(buffs [][]byte) error {
func (de *endpoint) send(buffs [][]byte, offset int) error {
de.mu.Lock()
if de.expired {
de.mu.Unlock()
@@ -940,14 +1048,17 @@ func (de *endpoint) send(buffs [][]byte) error {
if startWGPing {
de.sendWireGuardOnlyPingsLocked(now)
}
} else if !udpAddr.IsValid() || now.After(de.trustBestAddrUntil) {
} else if !udpAddr.isDirect() || now.After(de.trustBestAddrUntil) {
de.sendDiscoPingsLocked(now, true)
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
}
de.noteTxActivityExtTriggerLocked(now)
de.lastSendAny = now
de.mu.Unlock()
if !udpAddr.IsValid() && !derpAddr.IsValid() {
if !udpAddr.ap.IsValid() && !derpAddr.IsValid() {
// Make a last ditch effort to see if we have a DERP route for them. If
// they contacted us over DERP and we don't know their UDP endpoints or
// their DERP home, we can at least assume they're reachable over the
@@ -959,8 +1070,8 @@ func (de *endpoint) send(buffs [][]byte) error {
}
}
var err error
if udpAddr.IsValid() {
_, err = de.c.sendUDPBatch(udpAddr, buffs)
if udpAddr.ap.IsValid() {
_, err = de.c.sendUDPBatch(udpAddr, buffs, offset)
// If the error is known to indicate that the endpoint is no longer
// usable, clear the endpoint statistics so that the next send will
@@ -971,37 +1082,49 @@ func (de *endpoint) send(buffs [][]byte) error {
var txBytes int
for _, b := range buffs {
txBytes += len(b)
txBytes += len(b[offset:])
}
switch {
case udpAddr.Addr().Is4():
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
case udpAddr.Addr().Is6():
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
case udpAddr.ap.Addr().Is4():
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv4Total.Add(int64(txBytes))
} else {
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
}
case udpAddr.ap.Addr().Is6():
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv6Total.Add(int64(txBytes))
} else {
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
}
}
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.
if stats := de.c.stats.Load(); err == nil && stats != nil {
stats.UpdateTxPhysical(de.nodeAddr, udpAddr, len(buffs), txBytes)
if update := de.c.connCounter.Load(); err == nil && update != nil {
update(0, netip.AddrPortFrom(de.nodeAddr, 0), udpAddr.ap, len(buffs), txBytes, false)
}
}
if derpAddr.IsValid() {
allOk := true
var txBytes int
for _, buff := range buffs {
buff = buff[offset:]
const isDisco = false
ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco)
const isGeneveEncap = false
ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco, isGeneveEncap)
txBytes += len(buff)
if !ok {
allOk = false
}
}
if stats := de.c.stats.Load(); stats != nil {
stats.UpdateTxPhysical(de.nodeAddr, derpAddr, len(buffs), txBytes)
if update := de.c.connCounter.Load(); update != nil {
update(0, netip.AddrPortFrom(de.nodeAddr, 0), derpAddr, len(buffs), txBytes, false)
}
if allOk {
return nil
@@ -1053,7 +1176,12 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
if !ok {
return
}
if debugDisco() || !de.bestAddr.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
bestUntrusted := mono.Now().After(de.trustBestAddrUntil)
if sp.to == de.bestAddr.epAddr && sp.to.vni.IsSet() && bestUntrusted {
// TODO(jwhited): consider applying this to direct UDP paths as well
de.clearBestAddrLocked()
}
if debugDisco() || !de.bestAddr.ap.IsValid() || bestUntrusted {
de.c.dlogf("[v1] magicsock: disco: timeout waiting for pong %x from %v (%v, %v)", txid[:6], sp.to, de.publicKey.ShortString(), de.discoShort())
}
de.removeSentDiscoPingLocked(txid, sp, discoPingTimedOut)
@@ -1107,7 +1235,7 @@ const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
func (de *endpoint) sendDiscoPing(ep epAddr, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
size = min(size, MaxDiscoPingSize)
padding := max(size-discoPingSize, 0)
@@ -1123,7 +1251,7 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t
if size != 0 {
metricSentDiscoPeerMTUProbes.Add(1)
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep)))
}
}
@@ -1154,16 +1282,20 @@ const (
// if non-nil, means that a caller external to the magicsock package internals
// is interested in the result (such as a CLI "tailscale ping" or a c2n ping
// request, etc)
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
func (de *endpoint) startDiscoPingLocked(ep epAddr, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
if runtime.GOOS == "js" {
return
}
if debugNeverDirectUDP() && !ep.vni.IsSet() && ep.ap.Addr() != tailcfg.DerpMagicIPAddr {
return
}
epDisco := de.disco.Load()
if epDisco == nil {
return
}
if purpose != pingCLI {
st, ok := de.endpointState[ep]
if purpose != pingCLI &&
!ep.vni.IsSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
st, ok := de.endpointState[ep.ap]
if !ok {
// Shouldn't happen. But don't ping an endpoint that's
// not active for us.
@@ -1180,11 +1312,11 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
// Default to sending a single ping of the specified size
sizes := []int{size}
if de.c.PeerMTUEnabled() {
isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
isDerp := ep.ap.Addr() == tailcfg.DerpMagicIPAddr
if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
de.c.dlogf("[v1] magicsock: starting MTU probe")
sizes = mtuProbePingSizesV4
if ep.Addr().Is6() {
if ep.ap.Addr().Is6() {
sizes = mtuProbePingSizesV6
}
}
@@ -1239,7 +1371,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
}
de.startDiscoPingLocked(ep, now, pingDiscovery, 0, nil)
de.startDiscoPingLocked(epAddr{ap: ep}, now, pingDiscovery, 0, nil)
}
derpAddr := de.derpAddr
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@@ -1253,7 +1385,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
}
// sendWireGuardOnlyPingsLocked evaluates all available addresses for
// a WireGuard only endpoint and initates an ICMP ping for useable
// a WireGuard only endpoint and initiates an ICMP ping for useable
// addresses.
func (de *endpoint) sendWireGuardOnlyPingsLocked(now mono.Time) {
if runtime.GOOS == "js" {
@@ -1390,6 +1522,8 @@ func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, p
}
de.setEndpointsLocked(n.Endpoints())
de.relayCapable = capVerIsRelayCapable(n.Cap())
}
func (de *endpoint) setEndpointsLocked(eps interface {
@@ -1472,7 +1606,7 @@ func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.T
}
}
size2 := len(de.endpointState)
de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v candidate set from %v to %v entries", size, size2)
de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v (%s) candidate set from %v to %v entries", de.discoShort(), de.publicKey.ShortString(), size, size2)
}
return false
}
@@ -1487,17 +1621,19 @@ func (de *endpoint) clearBestAddrLocked() {
de.trustBestAddrUntil = 0
}
// noteBadEndpoint marks ipp as a bad endpoint that would need to be
// noteBadEndpoint marks udpAddr as a bad endpoint that would need to be
// re-evaluated before future use, this should be called for example if a send
// to ipp fails due to a host unreachable error or similar.
func (de *endpoint) noteBadEndpoint(ipp netip.AddrPort) {
// to udpAddr fails due to a host unreachable error or similar.
func (de *endpoint) noteBadEndpoint(udpAddr epAddr) {
de.mu.Lock()
defer de.mu.Unlock()
de.clearBestAddrLocked()
if st, ok := de.endpointState[ipp]; ok {
st.clear()
if !udpAddr.vni.IsSet() {
if st, ok := de.endpointState[udpAddr.ap]; ok {
st.clear()
}
}
}
@@ -1517,17 +1653,20 @@ func (de *endpoint) noteConnectivityChange() {
// pingSizeToPktLen calculates the minimum path MTU that would permit
// a disco ping message of length size to reach its target at
// addr. size is the length of the entire disco message including
// udpAddr. size is the length of the entire disco message including
// disco headers. If size is zero, assume it is the safe wire MTU.
func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
func pingSizeToPktLen(size int, udpAddr epAddr) tstun.WireMTU {
if size == 0 {
return tstun.SafeWireMTU()
}
headerLen := ipv4.HeaderLen
if is6 {
if udpAddr.ap.Addr().Is6() {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
if udpAddr.vni.IsSet() {
headerLen += packet.GeneveFixedHeaderLength
}
return tstun.WireMTU(size + headerLen)
}
@@ -1554,11 +1693,11 @@ func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
// It should be called with the Conn.mu held.
//
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAddr) (knownTxID bool) {
de.mu.Lock()
defer de.mu.Unlock()
isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr
sp, ok := de.sentPing[m.TxID]
if !ok {
@@ -1568,7 +1707,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
knownTxID = true // for naked returns below
de.removeSentDiscoPingLocked(m.TxID, sp, discoPongReceived)
pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
pktLen := int(pingSizeToPktLen(sp.size, src))
if sp.size != 0 {
m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
m.Add(1)
@@ -1580,25 +1719,27 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
now := mono.Now()
latency := now.Sub(sp.at)
if !isDerp {
st, ok := de.endpointState[sp.to]
if !isDerp && !src.vni.IsSet() {
// Note: we check vni.isSet() as relay [epAddr]'s are not stored in
// endpointState, they are either de.bestAddr or not.
st, ok := de.endpointState[sp.to.ap]
if !ok {
// This is no longer an endpoint we care about.
return
}
de.c.peerMap.setNodeKeyForIPPort(src, de.publicKey)
de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)
st.addPongReplyLocked(pongReply{
latency: latency,
pongAt: now,
from: src,
from: src.ap,
pongSrc: m.Src,
})
}
if sp.purpose != pingHeartbeat && sp.purpose != pingHeartbeatForUDPLifetime {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoAtomic.Short(), de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to)
}
@@ -1616,21 +1757,30 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp {
thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
thisPong := addrQuality{
epAddr: sp.to,
latency: latency,
wireMTU: pingSizeToPktLen(sp.size, sp.to),
}
// TODO(jwhited): consider checking de.trustBestAddrUntil as well. If
// de.bestAddr is untrusted we may want to clear it, otherwise we could
// get stuck with a forever untrusted bestAddr that blackholes, since
// we don't clear direct UDP paths on disco ping timeout (see
// discoPingTimeout).
if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-update",
What: "handlePongConnLocked-bestAddr-update",
From: de.bestAddr,
To: thisPong,
})
de.setBestAddrLocked(thisPong)
}
if de.bestAddr.AddrPort == thisPong.AddrPort {
if de.bestAddr.epAddr == thisPong.epAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-latency",
What: "handlePongConnLocked-bestAddr-latency",
From: de.bestAddr,
To: thisPong,
})
@@ -1642,20 +1792,43 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
return
}
// addrQuality is an IPPort with an associated latency and path mtu.
// epAddr is a [netip.AddrPort] with an optional Geneve header (RFC8926)
// [packet.VirtualNetworkID].
type epAddr struct {
ap netip.AddrPort // if ap == tailcfg.DerpMagicIPAddr then vni is never set
vni packet.VirtualNetworkID // vni.IsSet() indicates if this [epAddr] involves a Geneve header
}
// isDirect returns true if e.ap is valid and not tailcfg.DerpMagicIPAddr,
// and a VNI is not set.
func (e epAddr) isDirect() bool {
return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.IsSet()
}
func (e epAddr) String() string {
if !e.vni.IsSet() {
return e.ap.String()
}
return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.Get())
}
// addrQuality is an [epAddr], an optional [key.DiscoPublic] if a relay server
// is associated, a round-trip latency measurement, and path mtu.
type addrQuality struct {
netip.AddrPort
latency time.Duration
wireMTU tstun.WireMTU
epAddr
relayServerDisco key.DiscoPublic // only relevant if epAddr.vni.isSet(), otherwise zero value
latency time.Duration
wireMTU tstun.WireMTU
}
func (a addrQuality) String() string {
return fmt.Sprintf("%v@%v+%v", a.AddrPort, a.latency, a.wireMTU)
// TODO(jwhited): consider including relayServerDisco
return fmt.Sprintf("%v@%v+%v", a.epAddr, a.latency, a.wireMTU)
}
// betterAddr reports whether a is a better addr to use than b.
func betterAddr(a, b addrQuality) bool {
if a.AddrPort == b.AddrPort {
if a.epAddr == b.epAddr {
if a.wireMTU > b.wireMTU {
// TODO(val): Think harder about the case of lower
// latency and smaller or unknown MTU, and higher
@@ -1666,10 +1839,19 @@ func betterAddr(a, b addrQuality) bool {
}
return false
}
if !b.IsValid() {
if !b.ap.IsValid() {
return true
}
if !a.IsValid() {
if !a.ap.IsValid() {
return false
}
// Geneve-encapsulated paths (UDP relay servers) are lower preference in
// relation to non.
if !a.vni.IsSet() && b.vni.IsSet() {
return true
}
if a.vni.IsSet() && !b.vni.IsSet() {
return false
}
@@ -1693,27 +1875,27 @@ func betterAddr(a, b addrQuality) bool {
// addresses, and prefer link-local unicast addresses over other types
// of private IP addresses since it's definitionally more likely that
// they'll be on the same network segment than a general private IP.
if a.Addr().IsLoopback() {
if a.ap.Addr().IsLoopback() {
aPoints += 50
} else if a.Addr().IsLinkLocalUnicast() {
} else if a.ap.Addr().IsLinkLocalUnicast() {
aPoints += 30
} else if a.Addr().IsPrivate() {
} else if a.ap.Addr().IsPrivate() {
aPoints += 20
}
if b.Addr().IsLoopback() {
if b.ap.Addr().IsLoopback() {
bPoints += 50
} else if b.Addr().IsLinkLocalUnicast() {
} else if b.ap.Addr().IsLinkLocalUnicast() {
bPoints += 30
} else if b.Addr().IsPrivate() {
} else if b.ap.Addr().IsPrivate() {
bPoints += 20
}
// Prefer IPv6 for being a bit more robust, as long as
// the latencies are roughly equivalent.
if a.Addr().Is6() {
if a.ap.Addr().Is6() {
aPoints += 10
}
if b.Addr().Is6() {
if b.ap.Addr().Is6() {
bPoints += 10
}
@@ -1797,7 +1979,25 @@ func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
for _, st := range de.endpointState {
st.lastPing = 0
}
de.sendDiscoPingsLocked(mono.Now(), false)
monoNow := mono.Now()
de.sendDiscoPingsLocked(monoNow, false)
// This hook is required to trigger peer relay path discovery around
// disco "tailscale ping" initiated by de. We may be configured with peer
// relay servers that differ from de.
//
// The only other peer relay path discovery hook is in [endpoint.heartbeat],
// which is kicked off around outbound WireGuard packet flow, or if you are
// the "tailscale ping" initiator. Disco "tailscale ping" does not propagate
// into wireguard-go.
//
// We choose not to hook this around disco ping reception since peer relay
// path discovery can also trigger disco ping transmission, which *could*
// lead to an infinite loop of peer relay path discovery between two peers,
// absent intended triggers.
if de.wantUDPRelayPathDiscoveryLocked(monoNow) {
de.discoverUDPRelayPathsLocked(monoNow)
}
}
func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
@@ -1814,8 +2014,12 @@ func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
ps.LastWrite = de.lastSendExt.WallTime()
ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.IsValid() && !derpAddr.IsValid() {
ps.CurAddr = udpAddr.String()
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.ap.IsValid() && !derpAddr.IsValid() {
if udpAddr.vni.IsSet() {
ps.PeerRelay = udpAddr.String()
} else {
ps.CurAddr = udpAddr.String()
}
}
}
@@ -1863,14 +2067,22 @@ func (de *endpoint) resetLocked() {
}
}
de.probeUDPLifetime.resetCycleEndpointLocked()
de.c.relayManager.stopWork(de)
}
func (de *endpoint) numStopAndReset() int64 {
return atomic.LoadInt64(&de.numStopAndResetAtomic)
}
// setDERPHome sets the provided regionID as home for de. Calls to setDERPHome
// must never run concurrent to [Conn.updateRelayServersSet], otherwise
// [candidatePeerRelay] DERP home changes may be missed from the perspective of
// [relayManager].
func (de *endpoint) setDERPHome(regionID uint16) {
de.mu.Lock()
defer de.mu.Unlock()
de.derpAddr = netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
if de.c.hasPeerRelayServers.Load() {
de.c.relayManager.handleDERPHomeChange(de.publicKey, regionID)
}
}

View File

@@ -6,9 +6,9 @@ package magicsock
import (
"net/netip"
"slices"
"sync"
"time"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tempfork/heap"
"tailscale.com/util/mak"
@@ -107,7 +107,7 @@ func (eh endpointHeap) Min() *endpointTrackerEntry {
//
// See tailscale/tailscale#7877 for more information.
type endpointTracker struct {
mu sync.Mutex
mu syncs.Mutex
endpoints map[netip.Addr]*endpointHeap
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
//go:build !linux || ts_omit_listenrawdisco
package magicsock
@@ -9,19 +9,8 @@ import (
"errors"
"fmt"
"io"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
return nil, fmt.Errorf("raw disco listening not supported on this OS: %w", errors.ErrUnsupported)
}
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
portableTrySetSocketBuffer(pconn, logf)
}
const (
controlMessageSize = 0
)

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux && !ts_omit_listenrawdisco
package magicsock
import (
@@ -13,7 +15,6 @@ import (
"net"
"net/netip"
"strings"
"syscall"
"time"
"github.com/mdlayher/socket"
@@ -28,7 +29,6 @@ import (
"tailscale.com/types/ipproto"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
const (
@@ -66,10 +66,10 @@ var (
// fragmented, and we don't want to handle reassembly.
bpf.LoadAbsolute{Off: 6, Size: 2},
// More Fragments bit set means this is part of a fragmented packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 7, SkipFalse: 0},
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 8, SkipFalse: 0},
// Non-zero fragment offset with MF=0 means this is the last
// fragment of packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6, SkipFalse: 0},
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 7, SkipFalse: 0},
// Load IP header length into X register.
bpf.LoadMemShift{Off: 0},
@@ -453,7 +453,13 @@ func (c *Conn) receiveDisco(pc *socket.Conn, isIPV6 bool) {
metricRecvDiscoPacketIPv4.Add(1)
}
c.handleDiscoMessage(payload, srcAddr, key.NodePublic{}, discoRXPathRawSocket)
pt, isGeneveEncap := packetLooksLike(payload)
if pt == packetLooksLikeDisco && !isGeneveEncap {
// The BPF program matching on disco does not currently support
// Geneve encapsulation. isGeneveEncap should not return true if
// payload is disco.
c.handleDiscoMessage(payload, epAddr{ap: srcAddr}, false, key.NodePublic{}, discoRXPathRawSocket)
}
}
}
@@ -483,38 +489,3 @@ func printSockaddr(sa unix.Sockaddr) string {
return fmt.Sprintf("unknown(%T)", sa)
}
}
// trySetSocketBuffer attempts to set SO_SNDBUFFORCE and SO_RECVBUFFORCE which
// can overcome the limit of net.core.{r,w}mem_max, but require CAP_NET_ADMIN.
// It falls back to the portable implementation if that fails, which may be
// silently capped to net.core.{r,w}mem_max.
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
if c, ok := pconn.(*net.UDPConn); ok {
var errRcv, errSnd error
rc, err := c.SyscallConn()
if err == nil {
rc.Control(func(fd uintptr) {
errRcv = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, socketBufferSize)
if errRcv != nil {
logf("magicsock: [warning] failed to force-set UDP read buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errRcv)
}
errSnd = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, socketBufferSize)
if errSnd != nil {
logf("magicsock: [warning] failed to force-set UDP write buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errSnd)
}
})
}
if err != nil || errRcv != nil || errSnd != nil {
portableTrySetSocketBuffer(pconn, logf)
}
}
}
var controlMessageSize = -1 // bomb if used for allocation before init
func init() {
// controlMessageSize is set to hold a UDP_GRO or UDP_SEGMENT control
// message. These contain a single uint16 of data.
controlMessageSize = unix.CmsgSpace(2)
}

View File

@@ -1,13 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !windows
package magicsock
import (
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {}

View File

@@ -1,58 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build windows
package magicsock
import (
"net"
"unsafe"
"golang.org/x/sys/windows"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
c, ok := pconn.(*net.UDPConn)
if !ok {
// not a UDP connection; nothing to do
return
}
sysConn, err := c.SyscallConn()
if err != nil {
logf("trySetUDPSocketOptions: getting SyscallConn failed: %v", err)
return
}
// Similar to https://github.com/golang/go/issues/5834 (which involved
// WSAECONNRESET), Windows can return a WSAENETRESET error, even on UDP
// reads. Disable this.
const SIO_UDP_NETRESET = windows.IOC_IN | windows.IOC_VENDOR | 15
var ioctlErr error
err = sysConn.Control(func(fd uintptr) {
ret := uint32(0)
flag := uint32(0)
size := uint32(unsafe.Sizeof(flag))
ioctlErr = windows.WSAIoctl(
windows.Handle(fd),
SIO_UDP_NETRESET, // iocc
(*byte)(unsafe.Pointer(&flag)), // inbuf
size, // cbif
nil, // outbuf
0, // cbob
&ret, // cbbr
nil, // overlapped
0, // completionRoutine
)
})
if ioctlErr != nil {
logf("trySetUDPSocketOptions: could not set SIO_UDP_NETRESET: %v", ioctlErr)
}
if err != nil {
logf("trySetUDPSocketOptions: SyscallConn.Control failed: %v", err)
}
}

View File

@@ -4,8 +4,6 @@
package magicsock
import (
"net/netip"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/util/set"
@@ -15,17 +13,17 @@ import (
// peer.
type peerInfo struct {
ep *endpoint // always non-nil.
// ipPorts is an inverted version of peerMap.byIPPort (below), so
// epAddrs is an inverted version of peerMap.byEpAddr (below), so
// that when we're deleting this node, we can rapidly find out the
// keys that need deleting from peerMap.byIPPort without having to
// iterate over every IPPort known for any peer.
ipPorts set.Set[netip.AddrPort]
// keys that need deleting from peerMap.byEpAddr without having to
// iterate over every epAddr known for any peer.
epAddrs set.Set[epAddr]
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
ep: ep,
ipPorts: set.Set[netip.AddrPort]{},
epAddrs: set.Set[epAddr]{},
}
}
@@ -35,9 +33,21 @@ func newPeerInfo(ep *endpoint) *peerInfo {
// It doesn't do any locking; all access must be done with Conn.mu held.
type peerMap struct {
byNodeKey map[key.NodePublic]*peerInfo
byIPPort map[netip.AddrPort]*peerInfo
byEpAddr map[epAddr]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
// relayEpAddrByNodeKey ensures we only hold a single relay
// [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them
// grow unbounded. Relay [epAddr]'s are dynamically created by
// [relayManager] during path discovery, and are only useful to track in
// peerMap so long as they are the endpoint.bestAddr. [relayManager] handles
// all creation and initial probing responsibilities otherwise, and it does
// not depend on [peerMap].
//
// Note: This doesn't address unbounded growth of non-relay epAddr's in
// byEpAddr. That issue is being tracked in http://go/corp/29422.
relayEpAddrByNodeKey map[key.NodePublic]epAddr
// nodesOfDisco contains the set of nodes that are using a
// DiscoKey. Usually those sets will be just one node.
nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
@@ -45,10 +55,11 @@ type peerMap struct {
func newPeerMap() peerMap {
return peerMap{
byNodeKey: map[key.NodePublic]*peerInfo{},
byIPPort: map[netip.AddrPort]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
byNodeKey: map[key.NodePublic]*peerInfo{},
byEpAddr: map[epAddr]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
relayEpAddrByNodeKey: map[key.NodePublic]epAddr{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
}
@@ -88,10 +99,10 @@ func (m *peerMap) endpointForNodeID(nodeID tailcfg.NodeID) (ep *endpoint, ok boo
return nil, false
}
// endpointForIPPort returns the endpoint for the peer we
// believe to be at ipp, or nil if we don't know of any such peer.
func (m *peerMap) endpointForIPPort(ipp netip.AddrPort) (ep *endpoint, ok bool) {
if info, ok := m.byIPPort[ipp]; ok {
// endpointForEpAddr returns the endpoint for the peer we
// believe to be at addr, or nil if we don't know of any such peer.
func (m *peerMap) endpointForEpAddr(addr epAddr) (ep *endpoint, ok bool) {
if info, ok := m.byEpAddr[addr]; ok {
return info.ep, true
}
return nil, false
@@ -148,10 +159,10 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
// peer has, say, 192.168.0.2 and so does a tailscale peer, the
// wireguard one will win. That may not be the outcome that we want -
// perhaps we should prefer bestAddr.AddrPort if it is set?
// perhaps we should prefer bestAddr.epAddr.ap if it is set?
// see tailscale/tailscale#7994
for ipp := range ep.endpointState {
m.setNodeKeyForIPPort(ipp, ep.publicKey)
m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
}
return
}
@@ -163,20 +174,31 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
discoSet.Add(ep.publicKey)
}
// setNodeKeyForIPPort makes future peer lookups by ipp return the
// setNodeKeyForEpAddr makes future peer lookups by addr return the
// same endpoint as a lookup by nk.
//
// This should only be called with a fully verified mapping of ipp to
// This should only be called with a fully verified mapping of addr to
// nk, because calling this function defines the endpoint we hand to
// WireGuard for packets received from ipp.
func (m *peerMap) setNodeKeyForIPPort(ipp netip.AddrPort, nk key.NodePublic) {
if pi := m.byIPPort[ipp]; pi != nil {
delete(pi.ipPorts, ipp)
delete(m.byIPPort, ipp)
// WireGuard for packets received from addr.
func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
if pi := m.byEpAddr[addr]; pi != nil {
delete(pi.epAddrs, addr)
delete(m.byEpAddr, addr)
if addr.vni.IsSet() {
delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
}
}
if pi, ok := m.byNodeKey[nk]; ok {
pi.ipPorts.Add(ipp)
m.byIPPort[ipp] = pi
if addr.vni.IsSet() {
relay, ok := m.relayEpAddrByNodeKey[nk]
if ok {
delete(pi.epAddrs, relay)
delete(m.byEpAddr, relay)
}
m.relayEpAddrByNodeKey[nk] = addr
}
pi.epAddrs.Add(addr)
m.byEpAddr[addr] = pi
}
}
@@ -203,7 +225,8 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
// Unexpected. But no logger plumbed here to log so.
return
}
for ip := range pi.ipPorts {
delete(m.byIPPort, ip)
for ip := range pi.epAddrs {
delete(m.byEpAddr, ip)
}
delete(m.relayEpAddrByNodeKey, ep.publicKey)
}

View File

@@ -5,14 +5,17 @@ package magicsock
import (
"errors"
"fmt"
"net"
"net/netip"
"sync"
"sync/atomic"
"syscall"
"golang.org/x/net/ipv6"
"tailscale.com/net/batching"
"tailscale.com/net/netaddr"
"tailscale.com/net/packet"
"tailscale.com/syncs"
"tailscale.com/types/nettype"
)
@@ -28,7 +31,7 @@ type RebindingUDPConn struct {
// Neither is expected to be nil, sockets are bound on creation.
pconnAtomic atomic.Pointer[nettype.PacketConn]
mu sync.Mutex // held while changing pconn (and pconnAtomic)
mu syncs.Mutex // held while changing pconn (and pconnAtomic)
pconn nettype.PacketConn
port uint16
}
@@ -40,7 +43,7 @@ type RebindingUDPConn struct {
// disrupting surrounding code that assumes nettype.PacketConn is a
// *net.UDPConn.
func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) {
upc := tryUpgradeToBatchingConn(p, network, batchSize)
upc := batching.TryUpgradeToConn(p, network, batchSize)
c.pconn = upc
c.pconnAtomic.Store(&upc)
c.port = uint16(c.localAddrLocked().Port)
@@ -70,21 +73,39 @@ func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, e
return c.readFromWithInitPconn(*c.pconnAtomic.Load(), b)
}
// WriteBatchTo writes buffs to addr.
func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
// WriteWireGuardBatchTo writes buffs to addr. It serves primarily as an alias
// for [batching.Conn.WriteBatchTo], with fallback to single packet operations
// if c.pconn is not a [batching.Conn].
//
// WriteWireGuardBatchTo assumes buffs are WireGuard packets, which is notable
// for Geneve encapsulation: Geneve protocol is set to [packet.GeneveProtocolWireGuard],
// and the control bit is left unset.
func (c *RebindingUDPConn) WriteWireGuardBatchTo(buffs [][]byte, addr epAddr, offset int) error {
if offset != packet.GeneveFixedHeaderLength {
return fmt.Errorf("RebindingUDPConn.WriteWireGuardBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
}
gh := packet.GeneveHeader{
Protocol: packet.GeneveProtocolWireGuard,
VNI: addr.vni,
}
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
for _, buf := range buffs {
_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr)
if gh.VNI.IsSet() {
gh.Encode(buf)
} else {
buf = buf[offset:]
}
_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr.ap)
if err != nil {
return err
}
}
return nil
}
err := b.WriteBatchTo(buffs, addr)
err := b.WriteBatchTo(buffs, addr.ap, gh, offset)
if err != nil {
if pconn != c.currentConn() {
continue
@@ -95,13 +116,12 @@ func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) err
}
}
// ReadBatch reads messages from c into msgs. It returns the number of messages
// the caller should evaluate for nonzero len, as a zero len message may fall
// on either side of a nonzero.
// ReadBatch is an alias for [batching.Conn.ReadBatch] with fallback to single
// packet operations if c.pconn is not a [batching.Conn].
func (c *RebindingUDPConn) ReadBatch(msgs []ipv6.Message, flags int) (int, error) {
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
n, ap, err := c.readFromWithInitPconn(pconn, msgs[0].Buffers[0])
if err == nil {

1071
vendor/tailscale.com/wgengine/magicsock/relaymanager.go generated vendored Normal file

File diff suppressed because it is too large Load Diff