Update dependencies

This commit is contained in:
bluepython508
2024-11-01 17:33:34 +00:00
parent 033ac0b400
commit 5cdfab398d
3596 changed files with 1033483 additions and 259 deletions

View File

@@ -0,0 +1,25 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net/netip"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/types/nettype"
)
var (
// This acts as a compile-time check for our usage of ipv6.Message in
// batchingConn for both IPv6 and IPv4 operations.
_ ipv6.Message = ipv4.Message{}
)
// batchingConn is a nettype.PacketConn that provides batched i/o.
type batchingConn interface {
nettype.PacketConn
ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
package magicsock
import (
"tailscale.com/types/nettype"
)
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, _ string, _ int) nettype.PacketConn {
return pconn
}

View File

@@ -0,0 +1,424 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"encoding/binary"
"errors"
"fmt"
"net"
"net/netip"
"runtime"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"unsafe"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
"tailscale.com/hostinfo"
"tailscale.com/net/neterror"
"tailscale.com/types/nettype"
)
// xnetBatchReaderWriter defines the batching i/o methods of
// golang.org/x/net/ipv4.PacketConn (and ipv6.PacketConn).
// TODO(jwhited): This should eventually be replaced with the standard library
// implementation of https://github.com/golang/go/issues/45886
type xnetBatchReaderWriter interface {
xnetBatchReader
xnetBatchWriter
}
type xnetBatchReader interface {
ReadBatch([]ipv6.Message, int) (int, error)
}
type xnetBatchWriter interface {
WriteBatch([]ipv6.Message, int) (int, error)
}
// linuxBatchingConn is a UDP socket that provides batched i/o. It implements
// batchingConn.
type linuxBatchingConn struct {
pc nettype.PacketConn
xpc xnetBatchReaderWriter
rxOffload bool // supports UDP GRO or similar
txOffload atomic.Bool // supports UDP GSO or similar
setGSOSizeInControl func(control *[]byte, gsoSize uint16) // typically setGSOSizeInControl(); swappable for testing
getGSOSizeFromControl func(control []byte) (int, error) // typically getGSOSizeFromControl(); swappable for testing
sendBatchPool sync.Pool
}
func (c *linuxBatchingConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
if c.rxOffload {
// UDP_GRO is opt-in on Linux via setsockopt(). Once enabled you may
// receive a "monster datagram" from any read call. The ReadFrom() API
// does not support passing the GSO size and is unsafe to use in such a
// case. Other platforms may vary in behavior, but we go with the most
// conservative approach to prevent this from becoming a footgun in the
// future.
return 0, netip.AddrPort{}, errors.New("rx UDP offload is enabled on this socket, single packet reads are unavailable")
}
return c.pc.ReadFromUDPAddrPort(p)
}
func (c *linuxBatchingConn) SetDeadline(t time.Time) error {
return c.pc.SetDeadline(t)
}
func (c *linuxBatchingConn) SetReadDeadline(t time.Time) error {
return c.pc.SetReadDeadline(t)
}
func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
return c.pc.SetWriteDeadline(t)
}
const (
// This was initially established for Linux, but may split out to
// GOOS-specific values later. It originates as UDP_MAX_SEGMENTS in the
// kernel's TX path, and UDP_GRO_CNT_MAX for RX.
udpSegmentMaxDatagrams = 64
)
const (
// Exceeding these values results in EMSGSIZE.
maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
maxIPv6PayloadLen = 1<<16 - 1 - 8
)
// coalesceMessages iterates msgs, coalescing them where possible while
// maintaining datagram order. All msgs have their Addr field set to addr.
func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message) int {
var (
base = -1 // index of msg we are currently coalescing into
gsoSize int // segmentation size of msgs[base]
dgramCnt int // number of dgrams coalesced into msgs[base]
endBatch bool // tracking flag to start a new batch on next iteration of buffs
)
maxPayloadLen := maxIPv4PayloadLen
if addr.IP.To4() == nil {
maxPayloadLen = maxIPv6PayloadLen
}
for i, buff := range buffs {
if i > 0 {
msgLen := len(buff)
baseLenBefore := len(msgs[base].Buffers[0])
freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
if msgLen+baseLenBefore <= maxPayloadLen &&
msgLen <= gsoSize &&
msgLen <= freeBaseCap &&
dgramCnt < udpSegmentMaxDatagrams &&
!endBatch {
msgs[base].Buffers[0] = append(msgs[base].Buffers[0], make([]byte, msgLen)...)
copy(msgs[base].Buffers[0][baseLenBefore:], buff)
if i == len(buffs)-1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
dgramCnt++
if msgLen < gsoSize {
// A smaller than gsoSize packet on the tail is legal, but
// it must end the batch.
endBatch = true
}
continue
}
}
if dgramCnt > 1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
// Reset prior to incrementing base since we are preparing to start a
// new potential batch.
endBatch = false
base++
gsoSize = len(buff)
msgs[base].OOB = msgs[base].OOB[:0]
msgs[base].Buffers[0] = buff
msgs[base].Addr = addr
dgramCnt = 1
}
return base + 1
}
type sendBatch struct {
msgs []ipv6.Message
ua *net.UDPAddr
}
func (c *linuxBatchingConn) getSendBatch() *sendBatch {
batch := c.sendBatchPool.Get().(*sendBatch)
return batch
}
func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
for i := range batch.msgs {
batch.msgs[i] = ipv6.Message{Buffers: batch.msgs[i].Buffers, OOB: batch.msgs[i].OOB}
}
c.sendBatchPool.Put(batch)
}
func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
batch := c.getSendBatch()
defer c.putSendBatch(batch)
if addr.Addr().Is6() {
as16 := addr.Addr().As16()
copy(batch.ua.IP, as16[:])
batch.ua.IP = batch.ua.IP[:16]
} else {
as4 := addr.Addr().As4()
copy(batch.ua.IP, as4[:])
batch.ua.IP = batch.ua.IP[:4]
}
batch.ua.Port = int(addr.Port())
var (
n int
retried bool
)
retry:
if c.txOffload.Load() {
n = c.coalesceMessages(batch.ua, buffs, batch.msgs)
} else {
for i := range buffs {
batch.msgs[i].Buffers[0] = buffs[i]
batch.msgs[i].Addr = batch.ua
batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
}
n = len(buffs)
}
err := c.writeBatch(batch.msgs[:n])
if err != nil && c.txOffload.Load() && neterror.ShouldDisableUDPGSO(err) {
c.txOffload.Store(false)
retried = true
goto retry
}
if retried {
return neterror.ErrUDPGSODisabled{OnLaddr: c.pc.LocalAddr().String(), RetryErr: err}
}
return err
}
func (c *linuxBatchingConn) SyscallConn() (syscall.RawConn, error) {
sc, ok := c.pc.(syscall.Conn)
if !ok {
return nil, errUnsupportedConnType
}
return sc.SyscallConn()
}
func (c *linuxBatchingConn) writeBatch(msgs []ipv6.Message) error {
var head int
for {
n, err := c.xpc.WriteBatch(msgs[head:], 0)
if err != nil || n == len(msgs[head:]) {
// Returning the number of packets written would require
// unraveling individual msg len and gso size during a coalesced
// write. The top of the call stack disregards partial success,
// so keep this simple for now.
return err
}
head += n
}
}
// splitCoalescedMessages splits coalesced messages from the tail of dst
// beginning at index 'firstMsgAt' into the head of the same slice. It reports
// the number of elements to evaluate in msgs for nonzero len (msgs[i].N). An
// error is returned if a socket control message cannot be parsed or a split
// operation would overflow msgs.
func (c *linuxBatchingConn) splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int) (n int, err error) {
for i := firstMsgAt; i < len(msgs); i++ {
msg := &msgs[i]
if msg.N == 0 {
return n, err
}
var (
gsoSize int
start int
end = msg.N
numToSplit = 1
)
gsoSize, err = c.getGSOSizeFromControl(msg.OOB[:msg.NN])
if err != nil {
return n, err
}
if gsoSize > 0 {
numToSplit = (msg.N + gsoSize - 1) / gsoSize
end = gsoSize
}
for j := 0; j < numToSplit; j++ {
if n > i {
return n, errors.New("splitting coalesced packet resulted in overflow")
}
copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
msgs[n].N = copied
msgs[n].Addr = msg.Addr
start = end
end += gsoSize
if end > msg.N {
end = msg.N
}
n++
}
if i != n-1 {
// It is legal for bytes to move within msg.Buffers[0] as a result
// of splitting, so we only zero the source msg len when it is not
// the destination of the last split operation above.
msg.N = 0
}
}
return n, nil
}
func (c *linuxBatchingConn) ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) {
if !c.rxOffload || len(msgs) < 2 {
return c.xpc.ReadBatch(msgs, flags)
}
// Read into the tail of msgs, split into the head.
readAt := len(msgs) - 2
numRead, err := c.xpc.ReadBatch(msgs[readAt:], 0)
if err != nil || numRead == 0 {
return 0, err
}
return c.splitCoalescedMessages(msgs, readAt)
}
func (c *linuxBatchingConn) LocalAddr() net.Addr {
return c.pc.LocalAddr().(*net.UDPAddr)
}
func (c *linuxBatchingConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
return c.pc.WriteToUDPAddrPort(b, addr)
}
func (c *linuxBatchingConn) Close() error {
return c.pc.Close()
}
// tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn,
// and returns two booleans indicating TX and RX UDP offload support.
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err != nil {
return
}
err = rc.Control(func(fd uintptr) {
_, errSyscall := syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
hasTX = errSyscall == nil
errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)
hasRX = errSyscall == nil
})
if err != nil {
return false, false
}
}
return hasTX, hasRX
}
// getGSOSizeFromControl returns the GSO size found in control. If no GSO size
// is found or the len(control) < unix.SizeofCmsghdr, this function returns 0.
// A non-nil error will be returned if len(control) > unix.SizeofCmsghdr but
// its contents cannot be parsed as a socket control message.
func getGSOSizeFromControl(control []byte) (int, error) {
var (
hdr unix.Cmsghdr
data []byte
rem = control
err error
)
for len(rem) > unix.SizeofCmsghdr {
hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
if err != nil {
return 0, fmt.Errorf("error parsing socket control message: %w", err)
}
if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= 2 {
return int(binary.NativeEndian.Uint16(data[:2])), nil
}
}
return 0, nil
}
// setGSOSizeInControl sets a socket control message in control containing
// gsoSize. If len(control) < controlMessageSize control's len will be set to 0.
func setGSOSizeInControl(control *[]byte, gsoSize uint16) {
*control = (*control)[:0]
if cap(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
return
}
if cap(*control) < controlMessageSize {
return
}
*control = (*control)[:cap(*control)]
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
hdr.Level = unix.SOL_UDP
hdr.Type = unix.UDP_SEGMENT
hdr.SetLen(unix.CmsgLen(2))
binary.NativeEndian.PutUint16((*control)[unix.SizeofCmsghdr:], gsoSize)
*control = (*control)[:unix.CmsgSpace(2)]
}
// tryUpgradeToBatchingConn probes the capabilities of the OS and pconn, and
// upgrades pconn to a *linuxBatchingConn if appropriate.
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, network string, batchSize int) nettype.PacketConn {
if runtime.GOOS != "linux" {
// Exclude Android.
return pconn
}
if network != "udp4" && network != "udp6" {
return pconn
}
if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
// As a cheap heuristic: if the Linux kernel starts with "2", just
// consider it too old for mmsg. Nobody who cares about performance runs
// such ancient kernels. UDP offload was added much later, so no
// upgrades are available.
return pconn
}
uc, ok := pconn.(*net.UDPConn)
if !ok {
return pconn
}
b := &linuxBatchingConn{
pc: pconn,
getGSOSizeFromControl: getGSOSizeFromControl,
setGSOSizeInControl: setGSOSizeInControl,
sendBatchPool: sync.Pool{
New: func() any {
ua := &net.UDPAddr{
IP: make([]byte, 16),
}
msgs := make([]ipv6.Message, batchSize)
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].Addr = ua
msgs[i].OOB = make([]byte, controlMessageSize)
}
return &sendBatch{
ua: ua,
msgs: msgs,
}
},
},
}
switch network {
case "udp4":
b.xpc = ipv4.NewPacketConn(uc)
case "udp6":
b.xpc = ipv6.NewPacketConn(uc)
default:
panic("bogus network")
}
var txOffload bool
txOffload, b.rxOffload = tryEnableUDPOffload(uc)
b.txOffload.Store(txOffload)
return b
}

View File

@@ -0,0 +1,55 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"errors"
"net"
"net/netip"
"sync"
"syscall"
"time"
)
// blockForeverConn is a net.PacketConn whose reads block until it is closed.
type blockForeverConn struct {
mu sync.Mutex
cond *sync.Cond
closed bool
}
func (c *blockForeverConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
c.mu.Lock()
for !c.closed {
c.cond.Wait()
}
c.mu.Unlock()
return 0, netip.AddrPort{}, net.ErrClosed
}
func (c *blockForeverConn) WriteToUDPAddrPort(p []byte, addr netip.AddrPort) (int, error) {
// Silently drop writes.
return len(p), nil
}
func (c *blockForeverConn) LocalAddr() net.Addr {
// Return a *net.UDPAddr because lots of code assumes that it will.
return new(net.UDPAddr)
}
func (c *blockForeverConn) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.closed {
return net.ErrClosed
}
c.closed = true
c.cond.Broadcast()
return nil
}
func (c *blockForeverConn) SetDeadline(t time.Time) error { return errors.New("unimplemented") }
func (c *blockForeverConn) SetReadDeadline(t time.Time) error { return errors.New("unimplemented") }
func (c *blockForeverConn) SetWriteDeadline(t time.Time) error { return errors.New("unimplemented") }
func (c *blockForeverConn) SyscallConn() (syscall.RawConn, error) { return nil, errUnsupportedConnType }

182
vendor/tailscale.com/wgengine/magicsock/cloudinfo.go generated vendored Normal file
View File

@@ -0,0 +1,182 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(ios || android || js)
package magicsock
import (
"context"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"slices"
"strings"
"time"
"tailscale.com/types/logger"
"tailscale.com/util/cloudenv"
)
const maxCloudInfoWait = 2 * time.Second
type cloudInfo struct {
client http.Client
logf logger.Logf
// The following parameters are fixed for the lifetime of the cloudInfo
// object, but are used for testing.
cloud cloudenv.Cloud
endpoint string
}
func newCloudInfo(logf logger.Logf) *cloudInfo {
tr := &http.Transport{
DisableKeepAlives: true,
Dial: (&net.Dialer{
Timeout: maxCloudInfoWait,
}).Dial,
}
return &cloudInfo{
client: http.Client{Transport: tr},
logf: logf,
cloud: cloudenv.Get(),
endpoint: "http://" + cloudenv.CommonNonRoutableMetadataIP,
}
}
// GetPublicIPs returns any public IPs attached to the current cloud instance,
// if the tailscaled process is running in a known cloud and there are any such
// IPs present.
func (ci *cloudInfo) GetPublicIPs(ctx context.Context) ([]netip.Addr, error) {
switch ci.cloud {
case cloudenv.AWS:
ret, err := ci.getAWS(ctx)
ci.logf("[v1] cloudinfo.GetPublicIPs: AWS: %v, %v", ret, err)
return ret, err
}
return nil, nil
}
// getAWSMetadata makes a request to the AWS metadata service at the given
// path, authenticating with the provided IMDSv2 token. The returned metadata
// is split by newline and returned as a slice.
func (ci *cloudInfo) getAWSMetadata(ctx context.Context, token, path string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", ci.endpoint+path, nil)
if err != nil {
return nil, fmt.Errorf("creating request to %q: %w", path, err)
}
req.Header.Set("X-aws-ec2-metadata-token", token)
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request to metadata service %q: %w", path, err)
}
defer resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
// Good
case http.StatusNotFound:
// Nothing found, but this isn't an error; just return
return nil, nil
default:
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("reading response body for %q: %w", path, err)
}
return strings.Split(strings.TrimSpace(string(body)), "\n"), nil
}
// getAWS returns all public IPv4 and IPv6 addresses present in the AWS instance metadata.
func (ci *cloudInfo) getAWS(ctx context.Context) ([]netip.Addr, error) {
ctx, cancel := context.WithTimeout(ctx, maxCloudInfoWait)
defer cancel()
// Get a token so we can query the metadata service.
req, err := http.NewRequestWithContext(ctx, "PUT", ci.endpoint+"/latest/api/token", nil)
if err != nil {
return nil, fmt.Errorf("creating token request: %w", err)
}
req.Header.Set("X-Aws-Ec2-Metadata-Token-Ttl-Seconds", "10")
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making token request to metadata service: %w", err)
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("reading token response body: %w", err)
}
token := string(body)
server := resp.Header.Get("Server")
if server != "EC2ws" {
return nil, fmt.Errorf("unexpected server header: %q", server)
}
// Iterate over all interfaces and get their public IP addresses, both IPv4 and IPv6.
macAddrs, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/")
if err != nil {
return nil, fmt.Errorf("getting interface MAC addresses: %w", err)
}
var (
addrs []netip.Addr
errs []error
)
addAddr := func(addr string) {
ip, err := netip.ParseAddr(addr)
if err != nil {
errs = append(errs, fmt.Errorf("parsing IP address %q: %w", addr, err))
return
}
addrs = append(addrs, ip)
}
for _, mac := range macAddrs {
ips, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/public-ipv4s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv4 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
// Try querying for IPv6 addresses.
ips, err = ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/ipv6s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv6 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
}
// Sort the returned addresses for determinism.
slices.SortFunc(addrs, func(a, b netip.Addr) int {
return a.Compare(b)
})
// Preferentially return any addresses we found, even if there were errors.
if len(addrs) > 0 {
return addrs, nil
}
if len(errs) > 0 {
return nil, fmt.Errorf("getting IP addresses: %w", errors.Join(errs...))
}
return nil, nil
}

View File

@@ -0,0 +1,23 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || android || js
package magicsock
import (
"context"
"net/netip"
"tailscale.com/types/logger"
)
type cloudInfo struct{}
func newCloudInfo(_ logger.Logf) *cloudInfo {
return &cloudInfo{}
}
func (ci *cloudInfo) GetPublicIPs(_ context.Context) ([]netip.Addr, error) {
return nil, nil
}

205
vendor/tailscale.com/wgengine/magicsock/debughttp.go generated vendored Normal file
View File

@@ -0,0 +1,205 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"fmt"
"html"
"io"
"net/http"
"net/netip"
"sort"
"strings"
"time"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
)
// ServeHTTPDebug serves an HTML representation of the innards of c for debugging.
//
// It's accessible either from tailscaled's debug port (at
// /debug/magicsock) or via peerapi to a peer that's owned by the same
// user (so they can e.g. inspect their phones).
func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
c.mu.Lock()
defer c.mu.Unlock()
now := time.Now()
w.Header().Set("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h1>magicsock</h1>")
fmt.Fprintf(w, "<h2 id=derp><a href=#derp>#</a> DERP</h2><ul>")
if c.derpMap != nil {
type D struct {
regionID int
lastWrite time.Time
createTime time.Time
}
ent := make([]D, 0, len(c.activeDerp))
for rid, ad := range c.activeDerp {
ent = append(ent, D{
regionID: rid,
lastWrite: *ad.lastWrite,
createTime: ad.createTime,
})
}
sort.Slice(ent, func(i, j int) bool {
return ent[i].regionID < ent[j].regionID
})
for _, e := range ent {
r, ok := c.derpMap.Regions[e.regionID]
if !ok {
continue
}
home := ""
if e.regionID == c.myDerp {
home = "🏠"
}
fmt.Fprintf(w, "<li>%s %d - %v: created %v ago, write %v ago</li>\n",
home, e.regionID, html.EscapeString(r.RegionCode),
now.Sub(e.createTime).Round(time.Second),
now.Sub(e.lastWrite).Round(time.Second),
)
}
}
fmt.Fprintf(w, "</ul>\n")
fmt.Fprintf(w, "<h2 id=ipport><a href=#ipport>#</a> ip:port to endpoint</h2><ul>")
{
type kv struct {
ipp netip.AddrPort
pi *peerInfo
}
ent := make([]kv, 0, len(c.peerMap.byIPPort))
for k, v := range c.peerMap.byIPPort {
ent = append(ent, kv{k, v})
}
sort.Slice(ent, func(i, j int) bool { return ipPortLess(ent[i].ipp, ent[j].ipp) })
for _, e := range ent {
ep := e.pi.ep
shortStr := ep.publicKey.ShortString()
fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.ipp, strings.Trim(shortStr, "[]"), shortStr)
}
}
fmt.Fprintf(w, "</ul>\n")
fmt.Fprintf(w, "<h2 id=bykey><a href=#bykey>#</a> endpoints by key</h2>")
{
type kv struct {
pub key.NodePublic
pi *peerInfo
}
ent := make([]kv, 0, len(c.peerMap.byNodeKey))
for k, v := range c.peerMap.byNodeKey {
ent = append(ent, kv{k, v})
}
sort.Slice(ent, func(i, j int) bool { return ent[i].pub.Less(ent[j].pub) })
peers := map[key.NodePublic]tailcfg.NodeView{}
for i := range c.peers.Len() {
p := c.peers.At(i)
peers[p.Key()] = p
}
for _, e := range ent {
ep := e.pi.ep
shortStr := e.pub.ShortString()
name := peerDebugName(peers[e.pub])
fmt.Fprintf(w, "<h3 id=%v><a href='#%v'>%v</a> - %s</h3>\n",
strings.Trim(shortStr, "[]"),
strings.Trim(shortStr, "[]"),
shortStr,
html.EscapeString(name))
printEndpointHTML(w, ep)
}
}
}
func printEndpointHTML(w io.Writer, ep *endpoint) {
lastRecv := ep.lastRecvWG.LoadAtomic()
ep.mu.Lock()
defer ep.mu.Unlock()
if ep.lastSendExt == 0 && lastRecv == 0 {
return // no activity ever
}
now := time.Now()
mnow := mono.Now()
fmtMono := func(m mono.Time) string {
if m == 0 {
return "-"
}
return mnow.Sub(m).Round(time.Millisecond).String()
}
fmt.Fprintf(w, "<p>Best: <b>%+v</b>, %v ago (for %v)</p>\n", ep.bestAddr, fmtMono(ep.bestAddrAt), ep.trustBestAddrUntil.Sub(mnow).Round(time.Millisecond))
fmt.Fprintf(w, "<p>heartbeating: %v</p>\n", ep.heartBeatTimer != nil)
fmt.Fprintf(w, "<p>lastSend: %v ago</p>\n", fmtMono(ep.lastSendExt))
fmt.Fprintf(w, "<p>lastFullPing: %v ago</p>\n", fmtMono(ep.lastFullPing))
eps := make([]netip.AddrPort, 0, len(ep.endpointState))
for ipp := range ep.endpointState {
eps = append(eps, ipp)
}
sort.Slice(eps, func(i, j int) bool { return ipPortLess(eps[i], eps[j]) })
io.WriteString(w, "<p>Endpoints:</p><ul>")
for _, ipp := range eps {
s := ep.endpointState[ipp]
if ipp == ep.bestAddr.AddrPort {
fmt.Fprintf(w, "<li><b>%s</b>: (best)<ul>", ipp)
} else {
fmt.Fprintf(w, "<li>%s: ...<ul>", ipp)
}
fmt.Fprintf(w, "<li>lastPing: %v ago</li>\n", fmtMono(s.lastPing))
if s.lastGotPing.IsZero() {
fmt.Fprintf(w, "<li>disco-learned-at: -</li>\n")
} else {
fmt.Fprintf(w, "<li>disco-learned-at: %v ago</li>\n", now.Sub(s.lastGotPing).Round(time.Second))
}
fmt.Fprintf(w, "<li>callMeMaybeTime: %v</li>\n", s.callMeMaybeTime)
for i := range s.recentPongs {
if i == 5 {
break
}
pos := (int(s.recentPong) - i) % len(s.recentPongs)
// If s.recentPongs wraps around pos will be negative, so start
// again from the end of the slice.
if pos < 0 {
pos += len(s.recentPongs)
}
pr := s.recentPongs[pos]
fmt.Fprintf(w, "<li>pong %v ago: in %v, from %v src %v</li>\n",
fmtMono(pr.pongAt), pr.latency.Round(time.Millisecond/10),
pr.from, pr.pongSrc)
}
fmt.Fprintf(w, "</ul></li>\n")
}
io.WriteString(w, "</ul>")
}
func peerDebugName(p tailcfg.NodeView) string {
if !p.Valid() {
return ""
}
n := p.Name()
if base, _, ok := strings.Cut(n, "."); ok {
return base
}
return p.Hostinfo().Hostname()
}
func ipPortLess(a, b netip.AddrPort) bool {
if v := a.Addr().Compare(b.Addr()); v != 0 {
return v < 0
}
return a.Port() < b.Port()
}

97
vendor/tailscale.com/wgengine/magicsock/debugknobs.go generated vendored Normal file
View File

@@ -0,0 +1,97 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ios && !js
package magicsock
import (
"log"
"net/netip"
"strings"
"sync"
"tailscale.com/envknob"
)
// Various debugging and experimental tweakables, set by environment
// variable.
var (
// debugDisco prints verbose logs of active discovery events as
// they happen.
debugDisco = envknob.RegisterBool("TS_DEBUG_DISCO")
// debugPeerMap prints verbose logs of changes to the peermap.
debugPeerMap = envknob.RegisterBool("TS_DEBUG_MAGICSOCK_PEERMAP")
// debugOmitLocalAddresses removes all local interface addresses
// from magicsock's discovered local endpoints. Used in some tests.
debugOmitLocalAddresses = envknob.RegisterBool("TS_DEBUG_OMIT_LOCAL_ADDRS")
// logDerpVerbose logs all received DERP packets, including their
// full payload.
logDerpVerbose = envknob.RegisterBool("TS_DEBUG_DERP")
// debugReSTUNStopOnIdle unconditionally enables the "shut down
// STUN if magicsock is idle" behavior that normally only triggers
// on mobile devices, lowers the shutdown interval, and logs more
// verbosely about idle measurements.
debugReSTUNStopOnIdle = envknob.RegisterBool("TS_DEBUG_RESTUN_STOP_ON_IDLE")
// debugAlwaysDERP disables the use of UDP, forcing all peer communication over DERP.
debugAlwaysDERP = envknob.RegisterBool("TS_DEBUG_ALWAYS_USE_DERP")
// debugDERPAddr sets the derp address manually, overriding the DERP map from control.
debugUseDERPAddr = envknob.RegisterString("TS_DEBUG_USE_DERP_ADDR")
// debugDERPUseHTTP tells clients to connect to DERP via HTTP on port 3340 instead of
// HTTPS on 443.
debugUseDERPHTTP = envknob.RegisterBool("TS_DEBUG_USE_DERP_HTTP")
// debugEnableSilentDisco disables the use of heartbeatTimer on the endpoint struct
// and attempts to handle disco silently. See issue #540 for details.
debugEnableSilentDisco = envknob.RegisterBool("TS_DEBUG_ENABLE_SILENT_DISCO")
// debugSendCallMeUnknownPeer sends a CallMeMaybe to a non-existent destination every
// time we send a real CallMeMaybe to test the PeerGoneNotHere logic.
debugSendCallMeUnknownPeer = envknob.RegisterBool("TS_DEBUG_SEND_CALLME_UNKNOWN_PEER")
// debugBindSocket prints extra debugging about socket rebinding in magicsock.
debugBindSocket = envknob.RegisterBool("TS_DEBUG_MAGICSOCK_BIND_SOCKET")
// debugRingBufferMaxSizeBytes overrides the default size of the endpoint
// history ringbuffer.
debugRingBufferMaxSizeBytes = envknob.RegisterInt("TS_DEBUG_MAGICSOCK_RING_BUFFER_MAX_SIZE_BYTES")
// debugEnablePMTUD enables the peer MTU feature, which does path MTU
// discovery on UDP connections between peers. Currently (2023-09-05)
// this only turns on the don't fragment bit for the magicsock UDP
// sockets.
//
//lint:ignore U1000 used on Linux/Darwin only
debugEnablePMTUD = envknob.RegisterOptBool("TS_DEBUG_ENABLE_PMTUD")
// debugPMTUD prints extra debugging about peer MTU path discovery.
//
//lint:ignore U1000 used on Linux/Darwin only
debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
// Hey you! Adding a new debugknob? Make sure to stub it out in the
// debugknobs_stubs.go file too.
)
// inTest reports whether the running program is a test that set the
// IN_TS_TEST environment variable.
//
// Unlike the other debug tweakables above, this one needs to be
// checked every time at runtime, because tests set this after program
// startup.
func inTest() bool { return envknob.Bool("IN_TS_TEST") }
// pretendpoints returns TS_DEBUG_PRETENDPOINT as []AddrPort, if set.
// See https://github.com/tailscale/tailscale/issues/12578 and
// https://github.com/tailscale/tailscale/pull/12735.
//
// It can be between 0 and 3 comma-separated AddrPorts.
var pretendpoints = sync.OnceValue(func() (ret []netip.AddrPort) {
all := envknob.String("TS_DEBUG_PRETENDPOINT")
const max = 3
remain := all
for remain != "" && len(ret) < max {
var s string
s, remain, _ = strings.Cut(remain, ",")
ap, err := netip.ParseAddrPort(s)
if err != nil {
log.Printf("ignoring invalid AddrPort %q in TS_DEBUG_PRETENDPOINT %q: %v", s, all, err)
continue
}
ret = append(ret, ap)
}
return
})

View File

@@ -0,0 +1,33 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || js
package magicsock
import (
"net/netip"
"tailscale.com/types/opt"
)
// All knobs are disabled on iOS and Wasm.
//
// They're inlinable and the linker can deadcode that's guarded by them to make
// smaller binaries.
func debugBindSocket() bool { return false }
func debugDisco() bool { return false }
func debugOmitLocalAddresses() bool { return false }
func logDerpVerbose() bool { return false }
func debugReSTUNStopOnIdle() bool { return false }
func debugAlwaysDERP() bool { return false }
func debugUseDERPHTTP() bool { return false }
func debugEnableSilentDisco() bool { return false }
func debugSendCallMeUnknownPeer() bool { return false }
func debugPMTUD() bool { return false }
func debugUseDERPAddr() string { return "" }
func debugEnablePMTUD() opt.Bool { return "" }
func debugRingBufferMaxSizeBytes() int { return 0 }
func inTest() bool { return false }
func debugPeerMap() bool { return false }
func pretendpoints() []netip.AddrPort { return []netip.AddrPort{} }

1004
vendor/tailscale.com/wgengine/magicsock/derp.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Code generated by "stringer -type=discoPingPurpose -trimprefix=ping"; DO NOT EDIT.
package magicsock
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[pingDiscovery-0]
_ = x[pingHeartbeat-1]
_ = x[pingCLI-2]
_ = x[pingHeartbeatForUDPLifetime-3]
}
const _discoPingPurpose_name = "DiscoveryHeartbeatCLIHeartbeatForUDPLifetime"
var _discoPingPurpose_index = [...]uint8{0, 9, 18, 21, 44}
func (i discoPingPurpose) String() string {
if i < 0 || i >= discoPingPurpose(len(_discoPingPurpose_index)-1) {
return "discoPingPurpose(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _discoPingPurpose_name[_discoPingPurpose_index[i]:_discoPingPurpose_index[i+1]]
}

1855
vendor/tailscale.com/wgengine/magicsock/endpoint.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !js && !wasm && !plan9
package magicsock
import (
"errors"
"syscall"
)
// errHOSTUNREACH wraps unix.EHOSTUNREACH in an interface type to pass to
// errors.Is while avoiding an allocation per call.
var errHOSTUNREACH error = syscall.EHOSTUNREACH
// isBadEndpointErr checks if err is one which is known to report that an
// endpoint can no longer be sent to. It is not exhaustive, and for unknown
// errors always reports false.
func isBadEndpointErr(err error) bool {
return errors.Is(err, errHOSTUNREACH)
}

View File

@@ -0,0 +1,13 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build wasm || plan9
package magicsock
// isBadEndpointErr checks if err is one which is known to report that an
// endpoint can no longer be sent to. It is not exhaustive, but covers known
// cases.
func isBadEndpointErr(err error) bool {
return false
}

View File

@@ -0,0 +1,248 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net/netip"
"slices"
"sync"
"time"
"tailscale.com/tailcfg"
"tailscale.com/tempfork/heap"
"tailscale.com/util/mak"
"tailscale.com/util/set"
)
const (
// endpointTrackerLifetime is how long we continue advertising an
// endpoint after we last see it. This is intentionally chosen to be
// slightly longer than a full netcheck period.
endpointTrackerLifetime = 5*time.Minute + 10*time.Second
// endpointTrackerMaxPerAddr is how many cached addresses we track for
// a given netip.Addr. This allows e.g. restricting the number of STUN
// endpoints we cache (which usually have the same netip.Addr but
// different ports).
//
// The value of 6 is chosen because we can advertise up to 3 endpoints
// based on the STUN IP:
// 1. The STUN endpoint itself (EndpointSTUN)
// 2. The STUN IP with the local Tailscale port (EndpointSTUN4LocalPort)
// 3. The STUN IP with a portmapped port (EndpointPortmapped)
//
// Storing 6 endpoints in the cache means we can store up to 2 previous
// sets of endpoints.
endpointTrackerMaxPerAddr = 6
)
// endpointTrackerEntry is an entry in an endpointHeap that stores the state of
// a given cached endpoint.
type endpointTrackerEntry struct {
// endpoint is the cached endpoint.
endpoint tailcfg.Endpoint
// until is the time until which this endpoint is being cached.
until time.Time
// index is the index within the containing endpointHeap.
index int
}
// endpointHeap is an ordered heap of endpointTrackerEntry structs, ordered in
// ascending order by the 'until' expiry time (i.e. oldest first).
type endpointHeap []*endpointTrackerEntry
var _ heap.Interface[*endpointTrackerEntry] = (*endpointHeap)(nil)
// Len implements heap.Interface.
func (eh endpointHeap) Len() int { return len(eh) }
// Less implements heap.Interface.
func (eh endpointHeap) Less(i, j int) bool {
// We want to store items so that the lowest item in the heap is the
// oldest, so that heap.Pop()-ing from the endpointHeap will remove the
// oldest entry.
return eh[i].until.Before(eh[j].until)
}
// Swap implements heap.Interface.
func (eh endpointHeap) Swap(i, j int) {
eh[i], eh[j] = eh[j], eh[i]
eh[i].index = i
eh[j].index = j
}
// Push implements heap.Interface.
func (eh *endpointHeap) Push(item *endpointTrackerEntry) {
n := len(*eh)
item.index = n
*eh = append(*eh, item)
}
// Pop implements heap.Interface.
func (eh *endpointHeap) Pop() *endpointTrackerEntry {
old := *eh
n := len(old)
item := old[n-1]
old[n-1] = nil // avoid memory leak
item.index = -1 // for safety
*eh = old[0 : n-1]
return item
}
// Min returns a pointer to the minimum element in the heap, without removing
// it. Since this is a min-heap ordered by the 'until' field, this returns the
// chronologically "earliest" element in the heap.
//
// Len() must be non-zero.
func (eh endpointHeap) Min() *endpointTrackerEntry {
return eh[0]
}
// endpointTracker caches endpoints that are advertised to peers. This allows
// peers to still reach this node if there's a temporary endpoint flap; rather
// than withdrawing an endpoint and then re-advertising it the next time we run
// a netcheck, we keep advertising the endpoint until it's not present for a
// defined timeout.
//
// See tailscale/tailscale#7877 for more information.
type endpointTracker struct {
mu sync.Mutex
endpoints map[netip.Addr]*endpointHeap
}
// update takes as input the current sent of discovered endpoints and the
// current time, and returns the set of endpoints plus any previous-cached and
// non-expired endpoints that should be advertised to peers.
func (et *endpointTracker) update(now time.Time, eps []tailcfg.Endpoint) (epsPlusCached []tailcfg.Endpoint) {
var inputEps set.Slice[netip.AddrPort]
for _, ep := range eps {
inputEps.Add(ep.Addr)
}
et.mu.Lock()
defer et.mu.Unlock()
// Extend endpoints that already exist in the cache. We do this before
// we remove expired endpoints, below, so we don't remove something
// that would otherwise have survived by extending.
until := now.Add(endpointTrackerLifetime)
for _, ep := range eps {
et.extendLocked(ep, until)
}
// Now that we've extended existing endpoints, remove everything that
// has expired.
et.removeExpiredLocked(now)
// Add entries from the input set of endpoints into the cache; we do
// this after removing expired ones so that we can store as many as
// possible, with space freed by the entries removed after expiry.
for _, ep := range eps {
et.addLocked(now, ep, until)
}
// Finally, add entries to the return array that aren't already there.
epsPlusCached = eps
for _, heap := range et.endpoints {
for _, ep := range *heap {
// If the endpoint was in the input list, or has expired, skip it.
if inputEps.Contains(ep.endpoint.Addr) {
continue
} else if now.After(ep.until) {
// Defense-in-depth; should never happen since
// we removed expired entries above, but ignore
// it anyway.
continue
}
// We haven't seen this endpoint; add to the return array
epsPlusCached = append(epsPlusCached, ep.endpoint)
}
}
return epsPlusCached
}
// extendLocked will update the expiry time of the provided endpoint in the
// cache, if it is present. If it is not present, nothing will be done.
//
// et.mu must be held.
func (et *endpointTracker) extendLocked(ep tailcfg.Endpoint, until time.Time) {
key := ep.Addr.Addr()
epHeap, found := et.endpoints[key]
if !found {
return
}
// Find the entry for this exact address; this loop is quick since we
// bound the number of items in the heap.
//
// TODO(andrew): this means we iterate over the entire heap once per
// endpoint; even if the heap is small, if we have a lot of input
// endpoints this can be expensive?
for i, entry := range *epHeap {
if entry.endpoint == ep {
entry.until = until
heap.Fix(epHeap, i)
return
}
}
}
// addLocked will store the provided endpoint(s) in the cache for a fixed
// period of time, ensuring that the size of the endpoint cache remains below
// the maximum.
//
// et.mu must be held.
func (et *endpointTracker) addLocked(now time.Time, ep tailcfg.Endpoint, until time.Time) {
key := ep.Addr.Addr()
// Create or get the heap for this endpoint's addr
epHeap := et.endpoints[key]
if epHeap == nil {
epHeap = new(endpointHeap)
mak.Set(&et.endpoints, key, epHeap)
}
// Find the entry for this exact address; this loop is quick
// since we bound the number of items in the heap.
found := slices.ContainsFunc(*epHeap, func(v *endpointTrackerEntry) bool {
return v.endpoint == ep
})
if !found {
// Add address to heap; either the endpoint is new, or the heap
// was newly-created and thus empty.
heap.Push(epHeap, &endpointTrackerEntry{endpoint: ep, until: until})
}
// Now that we've added everything, pop from our heap until we're below
// the limit. This is a min-heap, so popping removes the lowest (and
// thus oldest) endpoint.
for epHeap.Len() > endpointTrackerMaxPerAddr {
heap.Pop(epHeap)
}
}
// removeExpired will remove all expired entries from the cache.
//
// et.mu must be held.
func (et *endpointTracker) removeExpiredLocked(now time.Time) {
for k, epHeap := range et.endpoints {
// The minimum element is oldest/earliest endpoint; repeatedly
// pop from the heap while it's in the past.
for epHeap.Len() > 0 {
minElem := epHeap.Min()
if now.After(minElem.until) {
heap.Pop(epHeap)
} else {
break
}
}
if epHeap.Len() == 0 {
// Free up space in the map by removing the empty heap.
delete(et.endpoints, k)
}
}
}

3071
vendor/tailscale.com/wgengine/magicsock/magicsock.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
package magicsock
import (
"errors"
"fmt"
"io"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
return nil, fmt.Errorf("raw disco listening not supported on this OS: %w", errors.ErrUnsupported)
}
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
portableTrySetSocketBuffer(pconn, logf)
}
const (
controlMessageSize = 0
)

View File

@@ -0,0 +1,520 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"net/netip"
"strings"
"syscall"
"time"
"github.com/mdlayher/socket"
"golang.org/x/net/bpf"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"golang.org/x/sys/cpu"
"golang.org/x/sys/unix"
"tailscale.com/disco"
"tailscale.com/envknob"
"tailscale.com/net/netns"
"tailscale.com/types/ipproto"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
const (
udpHeaderSize = 8
// discoMinHeaderSize is the minimum size of the disco header in bytes.
discoMinHeaderSize = len(disco.Magic) + 32 /* key length */ + disco.NonceLen
)
var (
// Opt-in for using raw sockets to receive disco traffic; added for
// #13140 and replaces the older "TS_DEBUG_DISABLE_RAW_DISCO".
envknobEnableRawDisco = envknob.RegisterBool("TS_ENABLE_RAW_DISCO")
)
// debugRawDiscoReads enables logging of raw disco reads.
var debugRawDiscoReads = envknob.RegisterBool("TS_DEBUG_RAW_DISCO")
// These are our BPF filters that we use for testing packets.
var (
magicsockFilterV4 = []bpf.Instruction{
// For raw sockets (with ETH_P_IP set), the BPF program
// receives the entire IPv4 packet, but not the Ethernet
// header.
// Double-check that this is a UDP packet; we shouldn't be
// seeing anything else given how we create our AF_PACKET
// socket, but an extra check here is cheap, and matches the
// check that we do in the IPv6 path.
bpf.LoadAbsolute{Off: 9, Size: 1},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: uint32(ipproto.UDP), SkipTrue: 1, SkipFalse: 0},
bpf.RetConstant{Val: 0x0},
// Disco packets are so small they should never get
// fragmented, and we don't want to handle reassembly.
bpf.LoadAbsolute{Off: 6, Size: 2},
// More Fragments bit set means this is part of a fragmented packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 7, SkipFalse: 0},
// Non-zero fragment offset with MF=0 means this is the last
// fragment of packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6, SkipFalse: 0},
// Load IP header length into X register.
bpf.LoadMemShift{Off: 0},
// Verify that we have a packet that's big enough to (possibly)
// contain a disco packet.
//
// The length of an IPv4 disco packet is composed of:
// - 8 bytes for the UDP header
// - N bytes for the disco packet header
//
// bpf will implicitly return 0 ("skip") if attempting an
// out-of-bounds load, so we can check the length of the packet
// loading a byte from that offset here. We subtract 1 byte
// from the offset to ensure that we accept a packet that's
// exactly the minimum size.
//
// We use LoadIndirect; since we loaded the start of the packet's
// payload into the X register, above, we don't need to add
// ipv4.HeaderLen to the offset (and this properly handles IPv4
// extensions).
bpf.LoadIndirect{Off: uint32(udpHeaderSize + discoMinHeaderSize - 1), Size: 1},
// Get the first 4 bytes of the UDP packet, compare with our magic number
bpf.LoadIndirect{Off: udpHeaderSize, Size: 4},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: discoMagic1, SkipTrue: 0, SkipFalse: 3},
// Compare the next 2 bytes
bpf.LoadIndirect{Off: udpHeaderSize + 4, Size: 2},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: uint32(discoMagic2), SkipTrue: 0, SkipFalse: 1},
// Accept the whole packet
bpf.RetConstant{Val: 0xFFFFFFFF},
// Skip the packet
bpf.RetConstant{Val: 0x0},
}
// IPv6 is more complicated to filter, since we can have 0-to-N
// extension headers following the IPv6 header. Since BPF can't
// loop, we can't really parse these in a general way; instead, we
// simply handle the case where we have no extension headers; any
// packets with headers will be skipped. IPv6 extension headers
// are sufficiently uncommon that we're willing to accept false
// negatives here.
//
// The "proper" way to handle this would be to do minimal parsing in
// BPF and more in-depth parsing of all IPv6 packets in userspace, but
// on systems with a high volume of UDP that would be unacceptably slow
// and thus we'd rather be conservative here and possibly not receive
// disco packets rather than slow down the system.
magicsockFilterV6 = []bpf.Instruction{
// Do a bounds check to ensure we have enough space for a disco
// packet; see the comment in the IPv4 BPF program for more
// details.
bpf.LoadAbsolute{Off: uint32(ipv6.HeaderLen + udpHeaderSize + discoMinHeaderSize - 1), Size: 1},
// Verify that the 'next header' value of the IPv6 packet is
// UDP, which is what we're expecting; if it's anything else
// (including extension headers), we skip the packet.
bpf.LoadAbsolute{Off: 6, Size: 1},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: uint32(ipproto.UDP), SkipTrue: 0, SkipFalse: 5},
// Compare with our magic number. Start by loading and
// comparing the first 4 bytes of the UDP payload.
bpf.LoadAbsolute{Off: ipv6.HeaderLen + udpHeaderSize, Size: 4},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: discoMagic1, SkipTrue: 0, SkipFalse: 3},
// Compare the next 2 bytes
bpf.LoadAbsolute{Off: ipv6.HeaderLen + udpHeaderSize + 4, Size: 2},
bpf.JumpIf{Cond: bpf.JumpEqual, Val: discoMagic2, SkipTrue: 0, SkipFalse: 1},
// Accept the whole packet
bpf.RetConstant{Val: 0xFFFFFFFF},
// Skip the packet
bpf.RetConstant{Val: 0x0},
}
testDiscoPacket = []byte{
// Disco magic
0x54, 0x53, 0xf0, 0x9f, 0x92, 0xac,
// Sender key
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
// Nonce
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
}
)
// listenRawDisco starts listening for disco packets on the given
// address family, which must be "ip4" or "ip6", using a raw socket
// and BPF filter.
// https://github.com/tailscale/tailscale/issues/3824
func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
if !envknobEnableRawDisco() {
// Return an 'errors.ErrUnsupported' to prevent the callee from
// logging; when we switch this to an opt-out (vs. an opt-in),
// drop the ErrUnsupported so that the callee logs that it was
// disabled.
return nil, fmt.Errorf("raw disco not enabled: %w", errors.ErrUnsupported)
}
// https://github.com/tailscale/tailscale/issues/5607
if !netns.UseSocketMark() {
return nil, errors.New("raw disco listening disabled, SO_MARK unavailable")
}
var (
udpnet string
addr string
proto int
testAddr netip.AddrPort
prog []bpf.Instruction
)
switch family {
case "ip4":
udpnet = "udp4"
addr = "0.0.0.0"
proto = ethernetProtoIPv4()
testAddr = netip.AddrPortFrom(netip.AddrFrom4([4]byte{127, 0, 0, 1}), 1)
prog = magicsockFilterV4
case "ip6":
udpnet = "udp6"
addr = "::"
proto = ethernetProtoIPv6()
testAddr = netip.AddrPortFrom(netip.IPv6Loopback(), 1)
prog = magicsockFilterV6
default:
return nil, fmt.Errorf("unsupported address family %q", family)
}
asm, err := bpf.Assemble(prog)
if err != nil {
return nil, fmt.Errorf("assembling filter: %w", err)
}
sock, err := socket.Socket(
unix.AF_PACKET,
unix.SOCK_DGRAM,
proto,
"afpacket",
nil, // no config
)
if err != nil {
return nil, fmt.Errorf("creating AF_PACKET socket: %w", err)
}
if err := sock.SetBPF(asm); err != nil {
sock.Close()
return nil, fmt.Errorf("installing BPF filter: %w", err)
}
// If all the above succeeds, we should be ready to receive. Just
// out of paranoia, check that we do receive a well-formed disco
// packet.
tc, err := net.ListenPacket(udpnet, net.JoinHostPort(addr, "0"))
if err != nil {
sock.Close()
return nil, fmt.Errorf("creating disco test socket: %w", err)
}
defer tc.Close()
if _, err := tc.(*net.UDPConn).WriteToUDPAddrPort(testDiscoPacket, testAddr); err != nil {
sock.Close()
return nil, fmt.Errorf("writing disco test packet: %w", err)
}
const selfTestTimeout = 100 * time.Millisecond
if err := sock.SetReadDeadline(time.Now().Add(selfTestTimeout)); err != nil {
sock.Close()
return nil, fmt.Errorf("setting socket timeout: %w", err)
}
var (
ctx = context.Background()
buf [1500]byte
)
for {
n, _, err := sock.Recvfrom(ctx, buf[:], 0)
if err != nil {
sock.Close()
return nil, fmt.Errorf("reading during raw disco self-test: %w", err)
}
_ /* src */, _ /* dst */, payload := parseUDPPacket(buf[:n], family == "ip6")
if payload == nil {
continue
}
if !bytes.Equal(payload, testDiscoPacket) {
c.discoLogf("listenRawDisco: self-test: received mismatched UDP packet of %d bytes", len(payload))
continue
}
c.logf("[v1] listenRawDisco: self-test passed for %s", family)
break
}
sock.SetReadDeadline(time.Time{})
go c.receiveDisco(sock, family == "ip6")
return sock, nil
}
// parseUDPPacket is a basic parser for UDP packets that returns the source and
// destination addresses, and the payload. The returned payload is a sub-slice
// of the input buffer.
//
// It expects to be called with a buffer that contains the entire UDP packet,
// including the IP header, and one that has been filtered with the BPF
// programs above.
//
// If an error occurs, it will return the zero values for all return values.
func parseUDPPacket(buf []byte, isIPv6 bool) (src, dst netip.AddrPort, payload []byte) {
// First, parse the IPv4 or IPv6 header to get to the UDP header. Since
// we assume this was filtered with BPF, we know that there will be no
// IPv6 extension headers.
var (
srcIP, dstIP netip.Addr
udp []byte
)
if isIPv6 {
// Basic length check to ensure that we don't panic
if len(buf) < ipv6.HeaderLen+udpHeaderSize {
return
}
// Extract the source and destination addresses from the IPv6
// header.
srcIP, _ = netip.AddrFromSlice(buf[8:24])
dstIP, _ = netip.AddrFromSlice(buf[24:40])
// We know that the UDP packet starts immediately after the IPv6
// packet.
udp = buf[ipv6.HeaderLen:]
} else {
// This is an IPv4 packet; read the length field from the header.
if len(buf) < ipv4.HeaderLen {
return
}
udpOffset := int((buf[0] & 0x0F) << 2)
if udpOffset+udpHeaderSize > len(buf) {
return
}
// Parse the source and destination IPs.
srcIP, _ = netip.AddrFromSlice(buf[12:16])
dstIP, _ = netip.AddrFromSlice(buf[16:20])
udp = buf[udpOffset:]
}
// Parse the ports
srcPort := binary.BigEndian.Uint16(udp[0:2])
dstPort := binary.BigEndian.Uint16(udp[2:4])
// The payload starts after the UDP header.
payload = udp[8:]
return netip.AddrPortFrom(srcIP, srcPort), netip.AddrPortFrom(dstIP, dstPort), payload
}
// ethernetProtoIPv4 returns the constant unix.ETH_P_IP, in network byte order.
// packet(7) sockets require that the 'protocol' argument be in network byte
// order; see:
//
// https://man7.org/linux/man-pages/man7/packet.7.html
//
// Instead of using htons at runtime, we can just hardcode the value here...
// but we also have a test that verifies that this is correct.
func ethernetProtoIPv4() int {
if cpu.IsBigEndian {
return 0x0800
} else {
return 0x0008
}
}
// ethernetProtoIPv6 returns the constant unix.ETH_P_IPV6, and is otherwise the
// same as ethernetProtoIPv4.
func ethernetProtoIPv6() int {
if cpu.IsBigEndian {
return 0x86dd
} else {
return 0xdd86
}
}
func (c *Conn) discoLogf(format string, args ...any) {
// Enable debug logging if we're debugging raw disco reads or if the
// magicsock component logs are on.
if debugRawDiscoReads() {
c.logf(format, args...)
} else {
c.dlogf(format, args...)
}
}
func (c *Conn) receiveDisco(pc *socket.Conn, isIPV6 bool) {
// Given that we're parsing raw packets, be extra careful and recover
// from any panics in this function.
//
// If we didn't have a recover() here and panic'd, we'd take down the
// entire process since this function is the top of a goroutine, and Go
// will kill the process if a goroutine panics and it unwinds past the
// top-level function.
defer func() {
if err := recover(); err != nil {
c.logf("[unexpected] recovered from panic in receiveDisco(isIPv6=%v): %v", isIPV6, err)
}
}()
ctx := context.Background()
// Set up our loggers
var family string
if isIPV6 {
family = "ip6"
} else {
family = "ip4"
}
var (
prefix string = "disco raw " + family + ": "
logf logger.Logf = logger.WithPrefix(c.logf, prefix)
dlogf logger.Logf = logger.WithPrefix(c.discoLogf, prefix)
)
var buf [1500]byte
for {
n, src, err := pc.Recvfrom(ctx, buf[:], 0)
if debugRawDiscoReads() {
logf("read from %s = (%v, %v)", printSockaddr(src), n, err)
}
if err != nil && (errors.Is(err, net.ErrClosed) || err.Error() == "use of closed file") {
// EOF; no need to print an error
return
} else if err != nil {
logf("reader failed: %v", err)
return
}
srcAddr, dstAddr, payload := parseUDPPacket(buf[:n], family == "ip6")
if payload == nil {
// callee logged
continue
}
dstPort := dstAddr.Port()
if dstPort == 0 {
logf("[unexpected] received packet for port 0")
}
var acceptPort uint16
if isIPV6 {
acceptPort = c.pconn6.Port()
} else {
acceptPort = c.pconn4.Port()
}
if acceptPort == 0 {
// This should only typically happen if the receiving address family
// was recently disabled.
dlogf("[v1] dropping packet for port %d as acceptPort=0", dstPort)
continue
}
// If the packet isn't destined for our local port, then we
// should drop it since it might be for another Tailscale
// process on the same machine, or NATed to a different machine
// if this is a router, etc.
//
// We get the local port to compare against inside the receive
// loop; we can't cache this beforehand because it can change
// if/when we rebind.
if dstPort != acceptPort {
dlogf("[v1] dropping packet for port %d that isn't our local port", dstPort)
continue
}
if isIPV6 {
metricRecvDiscoPacketIPv6.Add(1)
} else {
metricRecvDiscoPacketIPv4.Add(1)
}
c.handleDiscoMessage(payload, srcAddr, key.NodePublic{}, discoRXPathRawSocket)
}
}
// printSockaddr is a helper function to pretty-print various sockaddr types.
func printSockaddr(sa unix.Sockaddr) string {
switch sa := sa.(type) {
case *unix.SockaddrInet4:
addr := netip.AddrFrom4(sa.Addr)
return netip.AddrPortFrom(addr, uint16(sa.Port)).String()
case *unix.SockaddrInet6:
addr := netip.AddrFrom16(sa.Addr)
return netip.AddrPortFrom(addr, uint16(sa.Port)).String()
case *unix.SockaddrLinklayer:
hwaddr := sa.Addr[:sa.Halen]
var buf strings.Builder
fmt.Fprintf(&buf, "link(ty=0x%04x,if=%d):[", sa.Protocol, sa.Ifindex)
for i, b := range hwaddr {
if i > 0 {
buf.WriteByte(':')
}
fmt.Fprintf(&buf, "%02x", b)
}
buf.WriteByte(']')
return buf.String()
default:
return fmt.Sprintf("unknown(%T)", sa)
}
}
// trySetSocketBuffer attempts to set SO_SNDBUFFORCE and SO_RECVBUFFORCE which
// can overcome the limit of net.core.{r,w}mem_max, but require CAP_NET_ADMIN.
// It falls back to the portable implementation if that fails, which may be
// silently capped to net.core.{r,w}mem_max.
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
if c, ok := pconn.(*net.UDPConn); ok {
var errRcv, errSnd error
rc, err := c.SyscallConn()
if err == nil {
rc.Control(func(fd uintptr) {
errRcv = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, socketBufferSize)
if errRcv != nil {
logf("magicsock: [warning] failed to force-set UDP read buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errRcv)
}
errSnd = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, socketBufferSize)
if errSnd != nil {
logf("magicsock: [warning] failed to force-set UDP write buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errSnd)
}
})
}
if err != nil || errRcv != nil || errSnd != nil {
portableTrySetSocketBuffer(pconn, logf)
}
}
}
var controlMessageSize = -1 // bomb if used for allocation before init
func init() {
// controlMessageSize is set to hold a UDP_GRO or UDP_SEGMENT control
// message. These contain a single uint16 of data.
controlMessageSize = unix.CmsgSpace(2)
}

View File

@@ -0,0 +1,13 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !windows
package magicsock
import (
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {}

View File

@@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build windows
package magicsock
import (
"net"
"unsafe"
"golang.org/x/sys/windows"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
c, ok := pconn.(*net.UDPConn)
if !ok {
// not a UDP connection; nothing to do
return
}
sysConn, err := c.SyscallConn()
if err != nil {
logf("trySetUDPSocketOptions: getting SyscallConn failed: %v", err)
return
}
// Similar to https://github.com/golang/go/issues/5834 (which involved
// WSAECONNRESET), Windows can return a WSAENETRESET error, even on UDP
// reads. Disable this.
const SIO_UDP_NETRESET = windows.IOC_IN | windows.IOC_VENDOR | 15
var ioctlErr error
err = sysConn.Control(func(fd uintptr) {
ret := uint32(0)
flag := uint32(0)
size := uint32(unsafe.Sizeof(flag))
ioctlErr = windows.WSAIoctl(
windows.Handle(fd),
SIO_UDP_NETRESET, // iocc
(*byte)(unsafe.Pointer(&flag)), // inbuf
size, // cbif
nil, // outbuf
0, // cbob
&ret, // cbbr
nil, // overlapped
0, // completionRoutine
)
})
if ioctlErr != nil {
logf("trySetUDPSocketOptions: could not set SIO_UDP_NETRESET: %v", ioctlErr)
}
if err != nil {
logf("trySetUDPSocketOptions: SyscallConn.Control failed: %v", err)
}
}

209
vendor/tailscale.com/wgengine/magicsock/peermap.go generated vendored Normal file
View File

@@ -0,0 +1,209 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net/netip"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/util/set"
)
// peerInfo is all the information magicsock tracks about a particular
// peer.
type peerInfo struct {
ep *endpoint // always non-nil.
// ipPorts is an inverted version of peerMap.byIPPort (below), so
// that when we're deleting this node, we can rapidly find out the
// keys that need deleting from peerMap.byIPPort without having to
// iterate over every IPPort known for any peer.
ipPorts set.Set[netip.AddrPort]
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
ep: ep,
ipPorts: set.Set[netip.AddrPort]{},
}
}
// peerMap is an index of peerInfos by node (WireGuard) key, disco
// key, and discovered ip:port endpoints.
//
// It doesn't do any locking; all access must be done with Conn.mu held.
type peerMap struct {
byNodeKey map[key.NodePublic]*peerInfo
byIPPort map[netip.AddrPort]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
// nodesOfDisco contains the set of nodes that are using a
// DiscoKey. Usually those sets will be just one node.
nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
}
func newPeerMap() peerMap {
return peerMap{
byNodeKey: map[key.NodePublic]*peerInfo{},
byIPPort: map[netip.AddrPort]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
}
// nodeCount returns the number of nodes currently in m.
func (m *peerMap) nodeCount() int {
if len(m.byNodeKey) != len(m.byNodeID) {
devPanicf("internal error: peerMap.byNodeKey and byNodeID out of sync")
}
return len(m.byNodeKey)
}
// knownPeerDiscoKey reports whether there exists any peer with the disco key
// dk.
func (m *peerMap) knownPeerDiscoKey(dk key.DiscoPublic) bool {
_, ok := m.nodesOfDisco[dk]
return ok
}
// endpointForNodeKey returns the endpoint for nk, or nil if
// nk is not known to us.
func (m *peerMap) endpointForNodeKey(nk key.NodePublic) (ep *endpoint, ok bool) {
if nk.IsZero() {
return nil, false
}
if info, ok := m.byNodeKey[nk]; ok {
return info.ep, true
}
return nil, false
}
// endpointForNodeID returns the endpoint for nodeID, or nil if
// nodeID is not known to us.
func (m *peerMap) endpointForNodeID(nodeID tailcfg.NodeID) (ep *endpoint, ok bool) {
if info, ok := m.byNodeID[nodeID]; ok {
return info.ep, true
}
return nil, false
}
// endpointForIPPort returns the endpoint for the peer we
// believe to be at ipp, or nil if we don't know of any such peer.
func (m *peerMap) endpointForIPPort(ipp netip.AddrPort) (ep *endpoint, ok bool) {
if info, ok := m.byIPPort[ipp]; ok {
return info.ep, true
}
return nil, false
}
// forEachEndpoint invokes f on every endpoint in m.
func (m *peerMap) forEachEndpoint(f func(ep *endpoint)) {
for _, pi := range m.byNodeKey {
f(pi.ep)
}
}
// forEachEndpointWithDiscoKey invokes f on every endpoint in m that has the
// provided DiscoKey until f returns false or there are no endpoints left to
// iterate.
func (m *peerMap) forEachEndpointWithDiscoKey(dk key.DiscoPublic, f func(*endpoint) (keepGoing bool)) {
for nk := range m.nodesOfDisco[dk] {
pi, ok := m.byNodeKey[nk]
if !ok {
// Unexpected. Data structures would have to
// be out of sync. But we don't have a logger
// here to log [unexpected], so just skip.
// Maybe log later once peerMap is merged back
// into Conn.
continue
}
if !f(pi.ep) {
return
}
}
}
// upsertEndpoint stores endpoint in the peerInfo for
// ep.publicKey, and updates indexes. m must already have a
// tailcfg.Node for ep.publicKey.
func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
if ep.nodeID == 0 {
panic("internal error: upsertEndpoint called with zero NodeID")
}
pi, ok := m.byNodeKey[ep.publicKey]
if !ok {
pi = newPeerInfo(ep)
m.byNodeKey[ep.publicKey] = pi
}
m.byNodeID[ep.nodeID] = pi
epDisco := ep.disco.Load()
if epDisco == nil || oldDiscoKey != epDisco.key {
delete(m.nodesOfDisco[oldDiscoKey], ep.publicKey)
}
if ep.isWireguardOnly {
// If the peer is a WireGuard only peer, add all of its endpoints.
// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
// peer has, say, 192.168.0.2 and so does a tailscale peer, the
// wireguard one will win. That may not be the outcome that we want -
// perhaps we should prefer bestAddr.AddrPort if it is set?
// see tailscale/tailscale#7994
for ipp := range ep.endpointState {
m.setNodeKeyForIPPort(ipp, ep.publicKey)
}
return
}
discoSet := m.nodesOfDisco[epDisco.key]
if discoSet == nil {
discoSet = set.Set[key.NodePublic]{}
m.nodesOfDisco[epDisco.key] = discoSet
}
discoSet.Add(ep.publicKey)
}
// setNodeKeyForIPPort makes future peer lookups by ipp return the
// same endpoint as a lookup by nk.
//
// This should only be called with a fully verified mapping of ipp to
// nk, because calling this function defines the endpoint we hand to
// WireGuard for packets received from ipp.
func (m *peerMap) setNodeKeyForIPPort(ipp netip.AddrPort, nk key.NodePublic) {
if pi := m.byIPPort[ipp]; pi != nil {
delete(pi.ipPorts, ipp)
delete(m.byIPPort, ipp)
}
if pi, ok := m.byNodeKey[nk]; ok {
pi.ipPorts.Add(ipp)
m.byIPPort[ipp] = pi
}
}
// deleteEndpoint deletes the peerInfo associated with ep, and
// updates indexes.
func (m *peerMap) deleteEndpoint(ep *endpoint) {
if ep == nil {
return
}
ep.stopAndReset()
epDisco := ep.disco.Load()
pi := m.byNodeKey[ep.publicKey]
if epDisco != nil {
delete(m.nodesOfDisco[epDisco.key], ep.publicKey)
}
delete(m.byNodeKey, ep.publicKey)
if was, ok := m.byNodeID[ep.nodeID]; ok && was.ep == ep {
delete(m.byNodeID, ep.nodeID)
}
if pi == nil {
// Kneejerk paranoia from earlier issue 2801.
// Unexpected. But no logger plumbed here to log so.
return
}
for ip := range pi.ipPorts {
delete(m.byIPPort, ip)
}
}

130
vendor/tailscale.com/wgengine/magicsock/peermtu.go generated vendored Normal file
View File

@@ -0,0 +1,130 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build (darwin && !ios) || (linux && !android)
package magicsock
import (
"errors"
"golang.org/x/sys/unix"
"tailscale.com/disco"
"tailscale.com/net/tstun"
)
// Peer path MTU routines shared by platforms that implement it.
// DontFragSetting returns true if at least one of the underlying sockets of
// this connection is a UDP socket with the don't fragment bit set, otherwise it
// returns false. It also returns an error if either connection returned an error
// other than errUnsupportedConnType.
func (c *Conn) DontFragSetting() (bool, error) {
df4, err4 := c.getDontFragment("udp4")
df6, err6 := c.getDontFragment("udp6")
df := df4 || df6
err := err4
if err4 != nil && err4 != errUnsupportedConnType {
err = err6
}
if err == errUnsupportedConnType {
err = nil
}
return df, err
}
// ShouldPMTUD returns true if this client should try to enable peer MTU
// discovery, false otherwise.
func (c *Conn) ShouldPMTUD() bool {
if v, ok := debugEnablePMTUD().Get(); ok {
if debugPMTUD() {
c.logf("magicsock: peermtu: peer path MTU discovery set via envknob to %v", v)
}
return v
}
if c.controlKnobs != nil {
if v := c.controlKnobs.PeerMTUEnable.Load(); v {
if debugPMTUD() {
c.logf("magicsock: peermtu: peer path MTU discovery enabled by control")
}
return v
}
}
if debugPMTUD() {
c.logf("magicsock: peermtu: peer path MTU discovery set by default to false")
}
return false // Until we feel confident PMTUD is solid.
}
// PeerMTUEnabled reports whether peer path MTU discovery is enabled.
func (c *Conn) PeerMTUEnabled() bool {
return c.peerMTUEnabled.Load()
}
// UpdatePMTUD configures the underlying sockets of this Conn to enable or disable
// peer path MTU discovery according to the current configuration.
//
// Enabling or disabling peer path MTU discovery requires setting the don't
// fragment bit on its two underlying pconns. There are three distinct results
// for this operation on each pconn:
//
// 1. Success
// 2. Failure (not supported on this platform, or supported but failed)
// 3. Not a UDP socket (most likely one of IPv4 or IPv6 couldn't be used)
//
// To simplify the fast path for the most common case, we set the PMTUD status
// of the overall Conn according to the results of setting the sockopt on pconn
// as follows:
//
// 1. Both setsockopts succeed: PMTUD status update succeeds
// 2. One succeeds, one returns not a UDP socket: PMTUD status update succeeds
// 4. Neither setsockopt succeeds: PMTUD disabled
// 3. Either setsockopt fails: PMTUD disabled
//
// If the PMTUD settings changed, it resets the endpoint state so that it will
// re-probe path MTUs to this peer.
func (c *Conn) UpdatePMTUD() {
if debugPMTUD() {
df4, err4 := c.getDontFragment("udp4")
df6, err6 := c.getDontFragment("udp6")
c.logf("magicsock: peermtu: peer MTU status %v DF bit status: v4: %v (%v) v6: %v (%v)", c.peerMTUEnabled.Load(), df4, err4, df6, err6)
}
enable := c.ShouldPMTUD()
if c.peerMTUEnabled.Load() == enable {
c.logf("[v1] magicsock: peermtu: peer MTU status is %v", enable)
return
}
newStatus := enable
err4 := c.setDontFragment("udp4", enable)
err6 := c.setDontFragment("udp6", enable)
anySuccess := err4 == nil || err6 == nil
noFailures := (err4 == nil || err4 == errUnsupportedConnType) && (err6 == nil || err6 == errUnsupportedConnType)
if anySuccess && noFailures {
c.logf("magicsock: peermtu: peer MTU status updated to %v", newStatus)
} else {
c.logf("[unexpected] magicsock: peermtu: updating peer MTU status to %v failed (v4: %v, v6: %v), disabling", enable, err4, err6)
_ = c.setDontFragment("udp4", false)
_ = c.setDontFragment("udp6", false)
newStatus = false
}
if debugPMTUD() {
c.logf("magicsock: peermtu: peer MTU probes are %v", tstun.WireMTUsToProbe)
}
c.peerMTUEnabled.Store(newStatus)
c.resetEndpointStates()
}
var errEMSGSIZE error = unix.EMSGSIZE
func pmtuShouldLogDiscoTxErr(m disco.Message, err error) bool {
// Large disco.Ping packets used to probe path MTU may result in
// an EMSGSIZE error fairly regularly which can pollute logs.
p, ok := m.(*disco.Ping)
if !ok || p.Padding == 0 || !errors.Is(err, errEMSGSIZE) || debugPMTUD() {
return true
}
return false
}

View File

@@ -0,0 +1,51 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build darwin && !ios
package magicsock
import (
"syscall"
"golang.org/x/sys/unix"
)
func getDontFragOpt(network string) int {
if network == "udp4" {
return unix.IP_DONTFRAG
}
return unix.IPV6_DONTFRAG
}
func (c *Conn) setDontFragment(network string, enable bool) error {
optArg := 1
if enable == false {
optArg = 0
}
var err error
rcErr := c.connControl(network, func(fd uintptr) {
err = syscall.SetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network), optArg)
})
if rcErr != nil {
return rcErr
}
return err
}
func (c *Conn) getDontFragment(network string) (bool, error) {
var v int
var err error
rcErr := c.connControl(network, func(fd uintptr) {
v, err = syscall.GetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network))
})
if rcErr != nil {
return false, rcErr
}
if v == 1 {
return true, err
}
return false, err
}

View File

@@ -0,0 +1,49 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux && !android
package magicsock
import (
"syscall"
)
func getDontFragOpt(network string) int {
if network == "udp4" {
return syscall.IP_MTU_DISCOVER
}
return syscall.IPV6_MTU_DISCOVER
}
func (c *Conn) setDontFragment(network string, enable bool) error {
optArg := syscall.IP_PMTUDISC_DO
if enable == false {
optArg = syscall.IP_PMTUDISC_DONT
}
var err error
rcErr := c.connControl(network, func(fd uintptr) {
err = syscall.SetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network), optArg)
})
if rcErr != nil {
return rcErr
}
return err
}
func (c *Conn) getDontFragment(network string) (bool, error) {
var v int
var err error
rcErr := c.connControl(network, func(fd uintptr) {
v, err = syscall.GetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network))
})
if rcErr != nil {
return false, rcErr
}
if v == syscall.IP_PMTUDISC_DO {
return true, err
}
return false, err
}

View File

@@ -0,0 +1,27 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build (!linux && !darwin) || android || ios
package magicsock
import "tailscale.com/disco"
func (c *Conn) DontFragSetting() (bool, error) {
return false, nil
}
func (c *Conn) ShouldPMTUD() bool {
return false
}
func (c *Conn) PeerMTUEnabled() bool {
return false
}
func (c *Conn) UpdatePMTUD() {
}
func pmtuShouldLogDiscoTxErr(m disco.Message, err error) bool {
return true
}

View File

@@ -0,0 +1,42 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build (darwin && !ios) || (linux && !android)
package magicsock
import (
"syscall"
)
// getIPProto returns the value of the get/setsockopt proto argument necessary
// to set an IP sockopt that corresponds with the string network, which must be
// "udp4" or "udp6".
func getIPProto(network string) int {
if network == "udp4" {
return syscall.IPPROTO_IP
}
return syscall.IPPROTO_IPV6
}
// connControl allows the caller to run a system call on the socket underlying
// Conn specified by the string network, which must be "udp4" or "udp6". If the
// pconn type implements the syscall method, this function returns the value of
// of the system call fn called with the fd of the socket as its arg (or the
// error from rc.Control() if that fails). Otherwise it returns the error
// errUnsupportedConnType.
func (c *Conn) connControl(network string, fn func(fd uintptr)) error {
pconn := c.pconn4.pconn
if network == "udp6" {
pconn = c.pconn6.pconn
}
sc, ok := pconn.(syscall.Conn)
if !ok {
return errUnsupportedConnType
}
rc, err := sc.SyscallConn()
if err != nil {
return err
}
return rc.Control(fn)
}

View File

@@ -0,0 +1,179 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"errors"
"net"
"net/netip"
"sync"
"sync/atomic"
"syscall"
"golang.org/x/net/ipv6"
"tailscale.com/net/netaddr"
"tailscale.com/types/nettype"
)
// RebindingUDPConn is a UDP socket that can be re-bound.
// Unix has no notion of re-binding a socket, so we swap it out for a new one.
type RebindingUDPConn struct {
// pconnAtomic is a pointer to the value stored in pconn, but doesn't
// require acquiring mu. It's used for reads/writes and only upon failure
// do the reads/writes then check pconn (after acquiring mu) to see if
// there's been a rebind meanwhile.
// pconn isn't really needed, but makes some of the code simpler
// to keep it distinct.
// Neither is expected to be nil, sockets are bound on creation.
pconnAtomic atomic.Pointer[nettype.PacketConn]
mu sync.Mutex // held while changing pconn (and pconnAtomic)
pconn nettype.PacketConn
port uint16
}
// setConnLocked sets the provided nettype.PacketConn. It should be called only
// after acquiring RebindingUDPConn.mu. It upgrades the provided
// nettype.PacketConn to a batchingConn when appropriate. This upgrade is
// intentionally pushed closest to where read/write ops occur in order to avoid
// disrupting surrounding code that assumes nettype.PacketConn is a
// *net.UDPConn.
func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) {
upc := tryUpgradeToBatchingConn(p, network, batchSize)
c.pconn = upc
c.pconnAtomic.Store(&upc)
c.port = uint16(c.localAddrLocked().Port)
}
// currentConn returns c's current pconn, acquiring c.mu in the process.
func (c *RebindingUDPConn) currentConn() nettype.PacketConn {
c.mu.Lock()
defer c.mu.Unlock()
return c.pconn
}
func (c *RebindingUDPConn) readFromWithInitPconn(pconn nettype.PacketConn, b []byte) (int, netip.AddrPort, error) {
for {
n, addr, err := pconn.ReadFromUDPAddrPort(b)
if err != nil && pconn != c.currentConn() {
pconn = *c.pconnAtomic.Load()
continue
}
return n, addr, err
}
}
// ReadFromUDPAddrPort reads a packet from c into b.
// It returns the number of bytes copied and the source address.
func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, error) {
return c.readFromWithInitPconn(*c.pconnAtomic.Load(), b)
}
// WriteBatchTo writes buffs to addr.
func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
if !ok {
for _, buf := range buffs {
_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr)
if err != nil {
return err
}
}
return nil
}
err := b.WriteBatchTo(buffs, addr)
if err != nil {
if pconn != c.currentConn() {
continue
}
return err
}
return err
}
}
// ReadBatch reads messages from c into msgs. It returns the number of messages
// the caller should evaluate for nonzero len, as a zero len message may fall
// on either side of a nonzero.
func (c *RebindingUDPConn) ReadBatch(msgs []ipv6.Message, flags int) (int, error) {
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
if !ok {
n, ap, err := c.readFromWithInitPconn(pconn, msgs[0].Buffers[0])
if err == nil {
msgs[0].N = n
msgs[0].Addr = net.UDPAddrFromAddrPort(netaddr.Unmap(ap))
return 1, nil
}
return 0, err
}
n, err := b.ReadBatch(msgs, flags)
if err != nil && pconn != c.currentConn() {
continue
}
return n, err
}
}
func (c *RebindingUDPConn) Port() uint16 {
c.mu.Lock()
defer c.mu.Unlock()
return c.port
}
func (c *RebindingUDPConn) LocalAddr() *net.UDPAddr {
c.mu.Lock()
defer c.mu.Unlock()
return c.localAddrLocked()
}
func (c *RebindingUDPConn) localAddrLocked() *net.UDPAddr {
return c.pconn.LocalAddr().(*net.UDPAddr)
}
// errNilPConn is returned by RebindingUDPConn.Close when there is no current pconn.
// It is for internal use only and should not be returned to users.
var errNilPConn = errors.New("nil pconn")
func (c *RebindingUDPConn) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
return c.closeLocked()
}
func (c *RebindingUDPConn) closeLocked() error {
if c.pconn == nil {
return errNilPConn
}
c.port = 0
return c.pconn.Close()
}
func (c *RebindingUDPConn) writeToUDPAddrPortWithInitPconn(pconn nettype.PacketConn, b []byte, addr netip.AddrPort) (int, error) {
for {
n, err := pconn.WriteToUDPAddrPort(b, addr)
if err != nil && pconn != c.currentConn() {
pconn = *c.pconnAtomic.Load()
continue
}
return n, err
}
}
func (c *RebindingUDPConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
return c.writeToUDPAddrPortWithInitPconn(*c.pconnAtomic.Load(), b, addr)
}
func (c *RebindingUDPConn) SyscallConn() (syscall.RawConn, error) {
c.mu.Lock()
defer c.mu.Unlock()
sc, ok := c.pconn.(syscall.Conn)
if !ok {
return nil, errUnsupportedConnType
}
return sc.SyscallConn()
}