This commit is contained in:
2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions

View File

@@ -1,25 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net/netip"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/types/nettype"
)
var (
// This acts as a compile-time check for our usage of ipv6.Message in
// batchingConn for both IPv6 and IPv4 operations.
_ ipv6.Message = ipv4.Message{}
)
// batchingConn is a nettype.PacketConn that provides batched i/o.
type batchingConn interface {
nettype.PacketConn
ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error
}

View File

@@ -1,14 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
package magicsock
import (
"tailscale.com/types/nettype"
)
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, _ string, _ int) nettype.PacketConn {
return pconn
}

View File

@@ -1,424 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"encoding/binary"
"errors"
"fmt"
"net"
"net/netip"
"runtime"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"unsafe"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
"tailscale.com/hostinfo"
"tailscale.com/net/neterror"
"tailscale.com/types/nettype"
)
// xnetBatchReaderWriter defines the batching i/o methods of
// golang.org/x/net/ipv4.PacketConn (and ipv6.PacketConn).
// TODO(jwhited): This should eventually be replaced with the standard library
// implementation of https://github.com/golang/go/issues/45886
type xnetBatchReaderWriter interface {
xnetBatchReader
xnetBatchWriter
}
type xnetBatchReader interface {
ReadBatch([]ipv6.Message, int) (int, error)
}
type xnetBatchWriter interface {
WriteBatch([]ipv6.Message, int) (int, error)
}
// linuxBatchingConn is a UDP socket that provides batched i/o. It implements
// batchingConn.
type linuxBatchingConn struct {
pc nettype.PacketConn
xpc xnetBatchReaderWriter
rxOffload bool // supports UDP GRO or similar
txOffload atomic.Bool // supports UDP GSO or similar
setGSOSizeInControl func(control *[]byte, gsoSize uint16) // typically setGSOSizeInControl(); swappable for testing
getGSOSizeFromControl func(control []byte) (int, error) // typically getGSOSizeFromControl(); swappable for testing
sendBatchPool sync.Pool
}
func (c *linuxBatchingConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
if c.rxOffload {
// UDP_GRO is opt-in on Linux via setsockopt(). Once enabled you may
// receive a "monster datagram" from any read call. The ReadFrom() API
// does not support passing the GSO size and is unsafe to use in such a
// case. Other platforms may vary in behavior, but we go with the most
// conservative approach to prevent this from becoming a footgun in the
// future.
return 0, netip.AddrPort{}, errors.New("rx UDP offload is enabled on this socket, single packet reads are unavailable")
}
return c.pc.ReadFromUDPAddrPort(p)
}
func (c *linuxBatchingConn) SetDeadline(t time.Time) error {
return c.pc.SetDeadline(t)
}
func (c *linuxBatchingConn) SetReadDeadline(t time.Time) error {
return c.pc.SetReadDeadline(t)
}
func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
return c.pc.SetWriteDeadline(t)
}
const (
// This was initially established for Linux, but may split out to
// GOOS-specific values later. It originates as UDP_MAX_SEGMENTS in the
// kernel's TX path, and UDP_GRO_CNT_MAX for RX.
udpSegmentMaxDatagrams = 64
)
const (
// Exceeding these values results in EMSGSIZE.
maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
maxIPv6PayloadLen = 1<<16 - 1 - 8
)
// coalesceMessages iterates msgs, coalescing them where possible while
// maintaining datagram order. All msgs have their Addr field set to addr.
func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message) int {
var (
base = -1 // index of msg we are currently coalescing into
gsoSize int // segmentation size of msgs[base]
dgramCnt int // number of dgrams coalesced into msgs[base]
endBatch bool // tracking flag to start a new batch on next iteration of buffs
)
maxPayloadLen := maxIPv4PayloadLen
if addr.IP.To4() == nil {
maxPayloadLen = maxIPv6PayloadLen
}
for i, buff := range buffs {
if i > 0 {
msgLen := len(buff)
baseLenBefore := len(msgs[base].Buffers[0])
freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
if msgLen+baseLenBefore <= maxPayloadLen &&
msgLen <= gsoSize &&
msgLen <= freeBaseCap &&
dgramCnt < udpSegmentMaxDatagrams &&
!endBatch {
msgs[base].Buffers[0] = append(msgs[base].Buffers[0], make([]byte, msgLen)...)
copy(msgs[base].Buffers[0][baseLenBefore:], buff)
if i == len(buffs)-1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
dgramCnt++
if msgLen < gsoSize {
// A smaller than gsoSize packet on the tail is legal, but
// it must end the batch.
endBatch = true
}
continue
}
}
if dgramCnt > 1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
// Reset prior to incrementing base since we are preparing to start a
// new potential batch.
endBatch = false
base++
gsoSize = len(buff)
msgs[base].OOB = msgs[base].OOB[:0]
msgs[base].Buffers[0] = buff
msgs[base].Addr = addr
dgramCnt = 1
}
return base + 1
}
type sendBatch struct {
msgs []ipv6.Message
ua *net.UDPAddr
}
func (c *linuxBatchingConn) getSendBatch() *sendBatch {
batch := c.sendBatchPool.Get().(*sendBatch)
return batch
}
func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
for i := range batch.msgs {
batch.msgs[i] = ipv6.Message{Buffers: batch.msgs[i].Buffers, OOB: batch.msgs[i].OOB}
}
c.sendBatchPool.Put(batch)
}
func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
batch := c.getSendBatch()
defer c.putSendBatch(batch)
if addr.Addr().Is6() {
as16 := addr.Addr().As16()
copy(batch.ua.IP, as16[:])
batch.ua.IP = batch.ua.IP[:16]
} else {
as4 := addr.Addr().As4()
copy(batch.ua.IP, as4[:])
batch.ua.IP = batch.ua.IP[:4]
}
batch.ua.Port = int(addr.Port())
var (
n int
retried bool
)
retry:
if c.txOffload.Load() {
n = c.coalesceMessages(batch.ua, buffs, batch.msgs)
} else {
for i := range buffs {
batch.msgs[i].Buffers[0] = buffs[i]
batch.msgs[i].Addr = batch.ua
batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
}
n = len(buffs)
}
err := c.writeBatch(batch.msgs[:n])
if err != nil && c.txOffload.Load() && neterror.ShouldDisableUDPGSO(err) {
c.txOffload.Store(false)
retried = true
goto retry
}
if retried {
return neterror.ErrUDPGSODisabled{OnLaddr: c.pc.LocalAddr().String(), RetryErr: err}
}
return err
}
func (c *linuxBatchingConn) SyscallConn() (syscall.RawConn, error) {
sc, ok := c.pc.(syscall.Conn)
if !ok {
return nil, errUnsupportedConnType
}
return sc.SyscallConn()
}
func (c *linuxBatchingConn) writeBatch(msgs []ipv6.Message) error {
var head int
for {
n, err := c.xpc.WriteBatch(msgs[head:], 0)
if err != nil || n == len(msgs[head:]) {
// Returning the number of packets written would require
// unraveling individual msg len and gso size during a coalesced
// write. The top of the call stack disregards partial success,
// so keep this simple for now.
return err
}
head += n
}
}
// splitCoalescedMessages splits coalesced messages from the tail of dst
// beginning at index 'firstMsgAt' into the head of the same slice. It reports
// the number of elements to evaluate in msgs for nonzero len (msgs[i].N). An
// error is returned if a socket control message cannot be parsed or a split
// operation would overflow msgs.
func (c *linuxBatchingConn) splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int) (n int, err error) {
for i := firstMsgAt; i < len(msgs); i++ {
msg := &msgs[i]
if msg.N == 0 {
return n, err
}
var (
gsoSize int
start int
end = msg.N
numToSplit = 1
)
gsoSize, err = c.getGSOSizeFromControl(msg.OOB[:msg.NN])
if err != nil {
return n, err
}
if gsoSize > 0 {
numToSplit = (msg.N + gsoSize - 1) / gsoSize
end = gsoSize
}
for j := 0; j < numToSplit; j++ {
if n > i {
return n, errors.New("splitting coalesced packet resulted in overflow")
}
copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
msgs[n].N = copied
msgs[n].Addr = msg.Addr
start = end
end += gsoSize
if end > msg.N {
end = msg.N
}
n++
}
if i != n-1 {
// It is legal for bytes to move within msg.Buffers[0] as a result
// of splitting, so we only zero the source msg len when it is not
// the destination of the last split operation above.
msg.N = 0
}
}
return n, nil
}
func (c *linuxBatchingConn) ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) {
if !c.rxOffload || len(msgs) < 2 {
return c.xpc.ReadBatch(msgs, flags)
}
// Read into the tail of msgs, split into the head.
readAt := len(msgs) - 2
numRead, err := c.xpc.ReadBatch(msgs[readAt:], 0)
if err != nil || numRead == 0 {
return 0, err
}
return c.splitCoalescedMessages(msgs, readAt)
}
func (c *linuxBatchingConn) LocalAddr() net.Addr {
return c.pc.LocalAddr().(*net.UDPAddr)
}
func (c *linuxBatchingConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
return c.pc.WriteToUDPAddrPort(b, addr)
}
func (c *linuxBatchingConn) Close() error {
return c.pc.Close()
}
// tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn,
// and returns two booleans indicating TX and RX UDP offload support.
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err != nil {
return
}
err = rc.Control(func(fd uintptr) {
_, errSyscall := syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
hasTX = errSyscall == nil
errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)
hasRX = errSyscall == nil
})
if err != nil {
return false, false
}
}
return hasTX, hasRX
}
// getGSOSizeFromControl returns the GSO size found in control. If no GSO size
// is found or the len(control) < unix.SizeofCmsghdr, this function returns 0.
// A non-nil error will be returned if len(control) > unix.SizeofCmsghdr but
// its contents cannot be parsed as a socket control message.
func getGSOSizeFromControl(control []byte) (int, error) {
var (
hdr unix.Cmsghdr
data []byte
rem = control
err error
)
for len(rem) > unix.SizeofCmsghdr {
hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
if err != nil {
return 0, fmt.Errorf("error parsing socket control message: %w", err)
}
if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= 2 {
return int(binary.NativeEndian.Uint16(data[:2])), nil
}
}
return 0, nil
}
// setGSOSizeInControl sets a socket control message in control containing
// gsoSize. If len(control) < controlMessageSize control's len will be set to 0.
func setGSOSizeInControl(control *[]byte, gsoSize uint16) {
*control = (*control)[:0]
if cap(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
return
}
if cap(*control) < controlMessageSize {
return
}
*control = (*control)[:cap(*control)]
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
hdr.Level = unix.SOL_UDP
hdr.Type = unix.UDP_SEGMENT
hdr.SetLen(unix.CmsgLen(2))
binary.NativeEndian.PutUint16((*control)[unix.SizeofCmsghdr:], gsoSize)
*control = (*control)[:unix.CmsgSpace(2)]
}
// tryUpgradeToBatchingConn probes the capabilities of the OS and pconn, and
// upgrades pconn to a *linuxBatchingConn if appropriate.
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, network string, batchSize int) nettype.PacketConn {
if runtime.GOOS != "linux" {
// Exclude Android.
return pconn
}
if network != "udp4" && network != "udp6" {
return pconn
}
if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
// As a cheap heuristic: if the Linux kernel starts with "2", just
// consider it too old for mmsg. Nobody who cares about performance runs
// such ancient kernels. UDP offload was added much later, so no
// upgrades are available.
return pconn
}
uc, ok := pconn.(*net.UDPConn)
if !ok {
return pconn
}
b := &linuxBatchingConn{
pc: pconn,
getGSOSizeFromControl: getGSOSizeFromControl,
setGSOSizeInControl: setGSOSizeInControl,
sendBatchPool: sync.Pool{
New: func() any {
ua := &net.UDPAddr{
IP: make([]byte, 16),
}
msgs := make([]ipv6.Message, batchSize)
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].Addr = ua
msgs[i].OOB = make([]byte, controlMessageSize)
}
return &sendBatch{
ua: ua,
msgs: msgs,
}
},
},
}
switch network {
case "udp4":
b.xpc = ipv4.NewPacketConn(uc)
case "udp6":
b.xpc = ipv6.NewPacketConn(uc)
default:
panic("bogus network")
}
var txOffload bool
txOffload, b.rxOffload = tryEnableUDPOffload(uc)
b.txOffload.Store(txOffload)
return b
}

View File

@@ -10,11 +10,13 @@ import (
"sync"
"syscall"
"time"
"tailscale.com/syncs"
)
// blockForeverConn is a net.PacketConn whose reads block until it is closed.
type blockForeverConn struct {
mu sync.Mutex
mu syncs.Mutex
cond *sync.Cond
closed bool
}

View File

@@ -1,182 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(ios || android || js)
package magicsock
import (
"context"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"slices"
"strings"
"time"
"tailscale.com/types/logger"
"tailscale.com/util/cloudenv"
)
const maxCloudInfoWait = 2 * time.Second
type cloudInfo struct {
client http.Client
logf logger.Logf
// The following parameters are fixed for the lifetime of the cloudInfo
// object, but are used for testing.
cloud cloudenv.Cloud
endpoint string
}
func newCloudInfo(logf logger.Logf) *cloudInfo {
tr := &http.Transport{
DisableKeepAlives: true,
Dial: (&net.Dialer{
Timeout: maxCloudInfoWait,
}).Dial,
}
return &cloudInfo{
client: http.Client{Transport: tr},
logf: logf,
cloud: cloudenv.Get(),
endpoint: "http://" + cloudenv.CommonNonRoutableMetadataIP,
}
}
// GetPublicIPs returns any public IPs attached to the current cloud instance,
// if the tailscaled process is running in a known cloud and there are any such
// IPs present.
func (ci *cloudInfo) GetPublicIPs(ctx context.Context) ([]netip.Addr, error) {
switch ci.cloud {
case cloudenv.AWS:
ret, err := ci.getAWS(ctx)
ci.logf("[v1] cloudinfo.GetPublicIPs: AWS: %v, %v", ret, err)
return ret, err
}
return nil, nil
}
// getAWSMetadata makes a request to the AWS metadata service at the given
// path, authenticating with the provided IMDSv2 token. The returned metadata
// is split by newline and returned as a slice.
func (ci *cloudInfo) getAWSMetadata(ctx context.Context, token, path string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", ci.endpoint+path, nil)
if err != nil {
return nil, fmt.Errorf("creating request to %q: %w", path, err)
}
req.Header.Set("X-aws-ec2-metadata-token", token)
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request to metadata service %q: %w", path, err)
}
defer resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
// Good
case http.StatusNotFound:
// Nothing found, but this isn't an error; just return
return nil, nil
default:
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("reading response body for %q: %w", path, err)
}
return strings.Split(strings.TrimSpace(string(body)), "\n"), nil
}
// getAWS returns all public IPv4 and IPv6 addresses present in the AWS instance metadata.
func (ci *cloudInfo) getAWS(ctx context.Context) ([]netip.Addr, error) {
ctx, cancel := context.WithTimeout(ctx, maxCloudInfoWait)
defer cancel()
// Get a token so we can query the metadata service.
req, err := http.NewRequestWithContext(ctx, "PUT", ci.endpoint+"/latest/api/token", nil)
if err != nil {
return nil, fmt.Errorf("creating token request: %w", err)
}
req.Header.Set("X-Aws-Ec2-Metadata-Token-Ttl-Seconds", "10")
resp, err := ci.client.Do(req)
if err != nil {
return nil, fmt.Errorf("making token request to metadata service: %w", err)
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("reading token response body: %w", err)
}
token := string(body)
server := resp.Header.Get("Server")
if server != "EC2ws" {
return nil, fmt.Errorf("unexpected server header: %q", server)
}
// Iterate over all interfaces and get their public IP addresses, both IPv4 and IPv6.
macAddrs, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/")
if err != nil {
return nil, fmt.Errorf("getting interface MAC addresses: %w", err)
}
var (
addrs []netip.Addr
errs []error
)
addAddr := func(addr string) {
ip, err := netip.ParseAddr(addr)
if err != nil {
errs = append(errs, fmt.Errorf("parsing IP address %q: %w", addr, err))
return
}
addrs = append(addrs, ip)
}
for _, mac := range macAddrs {
ips, err := ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/public-ipv4s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv4 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
// Try querying for IPv6 addresses.
ips, err = ci.getAWSMetadata(ctx, token, "/latest/meta-data/network/interfaces/macs/"+mac+"/ipv6s")
if err != nil {
errs = append(errs, fmt.Errorf("getting IPv6 addresses for %q: %w", mac, err))
continue
}
for _, ip := range ips {
addAddr(ip)
}
}
// Sort the returned addresses for determinism.
slices.SortFunc(addrs, func(a, b netip.Addr) int {
return a.Compare(b)
})
// Preferentially return any addresses we found, even if there were errors.
if len(addrs) > 0 {
return addrs, nil
}
if len(errs) > 0 {
return nil, fmt.Errorf("getting IP addresses: %w", errors.Join(errs...))
}
return nil, nil
}

View File

@@ -1,23 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || android || js
package magicsock
import (
"context"
"net/netip"
"tailscale.com/types/logger"
)
type cloudInfo struct{}
func newCloudInfo(_ logger.Logf) *cloudInfo {
return &cloudInfo{}
}
func (ci *cloudInfo) GetPublicIPs(_ context.Context) ([]netip.Addr, error) {
return nil, nil
}

View File

@@ -13,6 +13,8 @@ import (
"strings"
"time"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
@@ -24,6 +26,11 @@ import (
// /debug/magicsock) or via peerapi to a peer that's owned by the same
// user (so they can e.g. inspect their phones).
func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
if !buildfeatures.HasDebug {
http.Error(w, feature.ErrUnavailable.Error(), http.StatusNotImplemented)
return
}
c.mu.Lock()
defer c.mu.Unlock()
@@ -72,18 +79,18 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "<h2 id=ipport><a href=#ipport>#</a> ip:port to endpoint</h2><ul>")
{
type kv struct {
ipp netip.AddrPort
pi *peerInfo
addr epAddr
pi *peerInfo
}
ent := make([]kv, 0, len(c.peerMap.byIPPort))
for k, v := range c.peerMap.byIPPort {
ent := make([]kv, 0, len(c.peerMap.byEpAddr))
for k, v := range c.peerMap.byEpAddr {
ent = append(ent, kv{k, v})
}
sort.Slice(ent, func(i, j int) bool { return ipPortLess(ent[i].ipp, ent[j].ipp) })
sort.Slice(ent, func(i, j int) bool { return epAddrLess(ent[i].addr, ent[j].addr) })
for _, e := range ent {
ep := e.pi.ep
shortStr := ep.publicKey.ShortString()
fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.ipp, strings.Trim(shortStr, "[]"), shortStr)
fmt.Fprintf(w, "<li>%v: <a href='#%v'>%v</a></li>\n", e.addr, strings.Trim(shortStr, "[]"), shortStr)
}
}
@@ -148,11 +155,11 @@ func printEndpointHTML(w io.Writer, ep *endpoint) {
for ipp := range ep.endpointState {
eps = append(eps, ipp)
}
sort.Slice(eps, func(i, j int) bool { return ipPortLess(eps[i], eps[j]) })
sort.Slice(eps, func(i, j int) bool { return addrPortLess(eps[i], eps[j]) })
io.WriteString(w, "<p>Endpoints:</p><ul>")
for _, ipp := range eps {
s := ep.endpointState[ipp]
if ipp == ep.bestAddr.AddrPort {
if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.IsSet() {
fmt.Fprintf(w, "<li><b>%s</b>: (best)<ul>", ipp)
} else {
fmt.Fprintf(w, "<li>%s: ...<ul>", ipp)
@@ -196,9 +203,19 @@ func peerDebugName(p tailcfg.NodeView) string {
return p.Hostinfo().Hostname()
}
func ipPortLess(a, b netip.AddrPort) bool {
func addrPortLess(a, b netip.AddrPort) bool {
if v := a.Addr().Compare(b.Addr()); v != 0 {
return v < 0
}
return a.Port() < b.Port()
}
func epAddrLess(a, b epAddr) bool {
if v := a.ap.Addr().Compare(b.ap.Addr()); v != 0 {
return v < 0
}
if a.ap.Port() == b.ap.Port() {
return a.vni.Get() < b.vni.Get()
}
return a.ap.Port() < b.ap.Port()
}

View File

@@ -62,6 +62,9 @@ var (
//
//lint:ignore U1000 used on Linux/Darwin only
debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
// debugNeverDirectUDP disables the use of direct UDP connections, forcing
// all peer communication over DERP or peer relay.
debugNeverDirectUDP = envknob.RegisterBool("TS_DEBUG_NEVER_DIRECT_UDP")
// Hey you! Adding a new debugknob? Make sure to stub it out in the
// debugknobs_stubs.go file too.
)

View File

@@ -31,3 +31,4 @@ func debugRingBufferMaxSizeBytes() int { return 0 }
func inTest() bool { return false }
func debugPeerMap() bool { return false }
func pretendpoints() []netip.AddrPort { return []netip.AddrPort{} }
func debugNeverDirectUDP() bool { return false }

View File

@@ -11,9 +11,7 @@ import (
"net"
"net/netip"
"reflect"
"runtime"
"slices"
"sync"
"time"
"unsafe"
@@ -21,7 +19,6 @@ import (
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/net/dnscache"
"tailscale.com/net/netcheck"
"tailscale.com/net/tsaddr"
@@ -30,9 +27,9 @@ import (
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/backoff"
"tailscale.com/util/mak"
"tailscale.com/util/rands"
"tailscale.com/util/sysresources"
"tailscale.com/util/testenv"
)
@@ -94,7 +91,7 @@ func (c *Conn) fallbackDERPRegionForPeer(peer key.NodePublic) (regionID int) {
type activeDerp struct {
c *derphttp.Client
cancel context.CancelFunc
writeCh chan<- derpWriteRequest
writeCh chan derpWriteRequest
// lastWrite is the time of the last request for its write
// channel (currently even if there was no write).
// It is always non-nil and initialized to a non-zero Time.
@@ -219,17 +216,28 @@ func (c *Conn) derpRegionCodeLocked(regionID int) string {
return ""
}
// setHomeDERPGaugeLocked updates the home DERP gauge metric.
//
// c.mu must be held.
func (c *Conn) setHomeDERPGaugeLocked(derpNum int) {
if c.homeDERPGauge != nil {
c.homeDERPGauge.Set(float64(derpNum))
}
}
// c.mu must NOT be held.
func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
c.mu.Lock()
defer c.mu.Unlock()
if !c.wantDerpLocked() {
c.myDerp = 0
c.setHomeDERPGaugeLocked(0)
c.health.SetMagicSockDERPHome(0, c.homeless)
return false
}
if c.homeless {
c.myDerp = 0
c.setHomeDERPGaugeLocked(0)
c.health.SetMagicSockDERPHome(0, c.homeless)
return false
}
@@ -241,6 +249,7 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
metricDERPHomeChange.Add(1)
}
c.myDerp = derpNum
c.setHomeDERPGaugeLocked(derpNum)
c.health.SetMagicSockDERPHome(derpNum, c.homeless)
if c.privateKey.IsZero() {
@@ -282,59 +291,20 @@ func (c *Conn) goDerpConnect(regionID int) {
go c.derpWriteChanForRegion(regionID, key.NodePublic{})
}
var (
bufferedDerpWrites int
bufferedDerpWritesOnce sync.Once
)
// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued
// up the DERP client to write on the wire before we start dropping.
func bufferedDerpWritesBeforeDrop() int {
// For mobile devices, always return the previous minimum value of 32;
// we can do this outside the sync.Once to avoid that overhead.
if runtime.GOOS == "ios" || runtime.GOOS == "android" {
return 32
}
bufferedDerpWritesOnce.Do(func() {
// Some rough sizing: for the previous fixed value of 32, the
// total consumed memory can be:
// = numDerpRegions * messages/region * sizeof(message)
//
// For sake of this calculation, assume 100 DERP regions; at
// time of writing (2023-04-03), we have 24.
//
// A reasonable upper bound for the worst-case average size of
// a message is a *disco.CallMeMaybe message with 16 endpoints;
// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
// = 100 * 32 * 512
// = 1638400 (1.6MiB)
//
// On a reasonably-small node with 4GiB of memory that's
// connected to each region and handling a lot of load, 1.6MiB
// is about 0.04% of the total system memory.
//
// For sake of this calculation, then, let's double that memory
// usage to 0.08% and scale based on total system memory.
//
// For a 16GiB Linux box, this should buffer just over 256
// messages.
systemMemory := sysresources.TotalMemory()
memoryUsable := float64(systemMemory) * 0.0008
const (
theoreticalDERPRegions = 100
messageMaximumSizeBytes = 512
)
bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
// Never drop below the previous minimum value.
if bufferedDerpWrites < 32 {
bufferedDerpWrites = 32
}
})
return bufferedDerpWrites
}
// derpWriteQueueDepth is the depth of the in-process write queue to a single
// DERP region. DERP connections are TCP, and so the actual write queue depth is
// substantially larger than this suggests - often scaling into megabytes
// depending on dynamic TCP parameters and platform TCP tuning. This queue is
// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
// and does not want to be deep - if there are key situations where a node can't
// keep up, either the TCP link to DERP is too slow, or there is a
// synchronization issue in the write path, fixes should be focused on those
// paths, rather than extending this queue.
// TODO(raggi): make this even shorter, ideally this should be a fairly direct
// line into a socket TCP buffer. The challenge at present is that connect and
// reconnect are in the write path and we don't want to block other write
// operations on those.
const derpWriteQueueDepth = 32
// derpWriteChanForRegion returns a channel to which to send DERP packet write
// requests. It creates a new DERP connection to regionID if necessary.
@@ -344,7 +314,7 @@ func bufferedDerpWritesBeforeDrop() int {
//
// It returns nil if the network is down, the Conn is closed, or the regionID is
// not known.
func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<- derpWriteRequest {
func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan derpWriteRequest {
if c.networkDown() {
return nil
}
@@ -429,7 +399,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
dc.DNSCache = dnscache.Get()
ctx, cancel := context.WithCancel(c.connCtx)
ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop())
ch := make(chan derpWriteRequest, derpWriteQueueDepth)
ad.c = dc
ad.writeCh = ch
@@ -740,8 +710,11 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
ipp := netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
if c.handleDiscoMessage(b[:n], ipp, dm.src, discoRXPathDERP) {
srcAddr := epAddr{ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))}
pt, isGeneveEncap := packetLooksLike(b[:n])
if pt == packetLooksLikeDisco &&
!isGeneveEncap { // We should never receive Geneve-encapsulated disco over DERP.
c.handleDiscoMessage(b[:n], srcAddr, false, dm.src, discoRXPathDERP)
return 0, nil
}
@@ -755,9 +728,9 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
ep.noteRecvActivity(ipp, mono.Now())
if stats := c.stats.Load(); stats != nil {
stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, dm.n)
ep.noteRecvActivity(srcAddr, mono.Now())
if update := c.connCounter.Load(); update != nil {
update(0, netip.AddrPortFrom(ep.nodeAddr, 0), srcAddr.ap, 1, dm.n, true)
}
c.metrics.inboundPacketsDERPTotal.Add(1)
@@ -875,7 +848,6 @@ func (c *Conn) maybeCloseDERPsOnRebind(okayLocalIPs []netip.Prefix) {
c.closeOrReconnectDERPLocked(regionID, "rebind-default-route-change")
continue
}
regionID := regionID
dc := ad.c
go func() {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)

View File

@@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"sync/atomic"
"tailscale.com/types/key"
)
type discoKeyPair struct {
private key.DiscoPrivate
public key.DiscoPublic
short string // public.ShortString()
}
// discoAtomic is an atomic container for a disco private key, public key, and
// the public key's ShortString. The private and public keys are always kept
// synchronized.
//
// The zero value is not ready for use. Use [Set] to provide a usable value.
type discoAtomic struct {
pair atomic.Pointer[discoKeyPair]
}
// Pair returns the private and public keys together atomically.
// Code that needs both the private and public keys synchronized should
// use Pair instead of calling Private and Public separately.
func (dk *discoAtomic) Pair() (key.DiscoPrivate, key.DiscoPublic) {
p := dk.pair.Load()
return p.private, p.public
}
// Private returns the private key.
func (dk *discoAtomic) Private() key.DiscoPrivate {
return dk.pair.Load().private
}
// Public returns the public key.
func (dk *discoAtomic) Public() key.DiscoPublic {
return dk.pair.Load().public
}
// Short returns the short string of the public key (see [DiscoPublic.ShortString]).
func (dk *discoAtomic) Short() string {
return dk.pair.Load().short
}
// Set updates the private key (and the cached public key and short string).
func (dk *discoAtomic) Set(private key.DiscoPrivate) {
public := private.Public()
dk.pair.Store(&discoKeyPair{
private: private,
public: public,
short: public.ShortString(),
})
}

View File

@@ -22,8 +22,9 @@ const _discoPingPurpose_name = "DiscoveryHeartbeatCLIHeartbeatForUDPLifetime"
var _discoPingPurpose_index = [...]uint8{0, 9, 18, 21, 44}
func (i discoPingPurpose) String() string {
if i < 0 || i >= discoPingPurpose(len(_discoPingPurpose_index)-1) {
idx := int(i) - 0
if i < 0 || idx >= len(_discoPingPurpose_index)-1 {
return "discoPingPurpose(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _discoPingPurpose_name[_discoPingPurpose_index[i]:_discoPingPurpose_index[i+1]]
return _discoPingPurpose_name[_discoPingPurpose_index[idx]:_discoPingPurpose_index[idx+1]]
}

View File

@@ -17,7 +17,6 @@ import (
"reflect"
"runtime"
"slices"
"sync"
"sync/atomic"
"time"
@@ -25,14 +24,16 @@ import (
"golang.org/x/net/ipv6"
"tailscale.com/disco"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/packet"
"tailscale.com/net/stun"
"tailscale.com/net/tstun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/mak"
"tailscale.com/util/ringbuffer"
"tailscale.com/util/ringlog"
"tailscale.com/util/slicesx"
)
@@ -59,7 +60,7 @@ type endpoint struct {
lastRecvWG mono.Time // last time there were incoming packets from this peer destined for wireguard-go (e.g. not disco)
lastRecvUDPAny mono.Time // last time there were incoming UDP packets from this peer of any kind
numStopAndResetAtomic int64
debugUpdates *ringbuffer.RingBuffer[EndpointChange]
debugUpdates *ringlog.RingLog[EndpointChange]
// These fields are initialized once and never modified.
c *Conn
@@ -72,19 +73,20 @@ type endpoint struct {
disco atomic.Pointer[endpointDisco] // if the peer supports disco, the key and short string
// mu protects all following fields.
mu sync.Mutex // Lock ordering: Conn.mu, then endpoint.mu
mu syncs.Mutex // Lock ordering: Conn.mu, then endpoint.mu
heartBeatTimer *time.Timer // nil when idle
lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
heartBeatTimer *time.Timer // nil when idle
lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints
lastUDPRelayPathDiscovery mono.Time // last time we ran UDP relay path discovery
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked()
bestAddrAt mono.Time // time best address re-confirmed
trustBestAddrUntil mono.Time // time when bestAddr expires
sentPing map[stun.TxID]sentPing
endpointState map[netip.AddrPort]*endpointState
endpointState map[netip.AddrPort]*endpointState // netip.AddrPort type for key (instead of [epAddr]) as [endpointState] is irrelevant for Geneve-encapsulated paths
isCallMeMaybeEP map[netip.AddrPort]bool
// The following fields are related to the new "silent disco"
@@ -95,10 +97,40 @@ type endpoint struct {
expired bool // whether the node has expired
isWireguardOnly bool // whether the endpoint is WireGuard only
relayCapable bool // whether the node is capable of speaking via a [tailscale.com/net/udprelay.Server]
}
// udpRelayEndpointReady determines whether the given relay [addrQuality] should
// be installed as de.bestAddr. It is only called by [relayManager] once it has
// determined maybeBest is functional via [disco.Pong] reception.
func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
de.mu.Lock()
defer de.mu.Unlock()
now := mono.Now()
curBestAddrTrusted := now.Before(de.trustBestAddrUntil)
sameRelayServer := de.bestAddr.vni.IsSet() && maybeBest.relayServerDisco.Compare(de.bestAddr.relayServerDisco) == 0
if !curBestAddrTrusted ||
sameRelayServer ||
betterAddr(maybeBest, de.bestAddr) {
// We must set maybeBest as de.bestAddr if:
// 1. de.bestAddr is untrusted. betterAddr does not consider
// time-based trust.
// 2. maybeBest & de.bestAddr are on the same relay. If the maybeBest
// handshake happened to use a different source address/transport,
// the relay will drop packets from the 'old' de.bestAddr's.
// 3. maybeBest is a 'betterAddr'.
//
// TODO(jwhited): add observability around !curBestAddrTrusted and sameRelayServer
// TODO(jwhited): collapse path change logging with endpoint.handlePongConnLocked()
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v", de.publicKey.ShortString(), de.discoShort(), maybeBest.epAddr, maybeBest.wireMTU)
de.setBestAddrLocked(maybeBest)
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
}
}
func (de *endpoint) setBestAddrLocked(v addrQuality) {
if v.AddrPort != de.bestAddr.AddrPort {
if v.epAddr != de.bestAddr.epAddr {
de.probeUDPLifetime.resetCycleEndpointLocked()
}
de.bestAddr = v
@@ -134,11 +166,11 @@ type probeUDPLifetime struct {
// timeout cliff in the future.
timer *time.Timer
// bestAddr contains the endpoint.bestAddr.AddrPort at the time a cycle was
// bestAddr contains the endpoint.bestAddr.epAddr at the time a cycle was
// scheduled to start. A probing cycle is 1:1 with the current
// endpoint.bestAddr.AddrPort in the interest of simplicity. When
// endpoint.bestAddr.AddrPort changes, any active probing cycle will reset.
bestAddr netip.AddrPort
// endpoint.bestAddr.epAddr in the interest of simplicity. When
// endpoint.bestAddr.epAddr changes, any active probing cycle will reset.
bestAddr epAddr
// cycleStartedAt contains the time at which the first cliff
// (ProbeUDPLifetimeConfig.Cliffs[0]) was pinged for the current/last cycle.
cycleStartedAt time.Time
@@ -190,7 +222,7 @@ func (p *probeUDPLifetime) resetCycleEndpointLocked() {
}
p.cycleActive = false
p.currentCliff = 0
p.bestAddr = netip.AddrPort{}
p.bestAddr = epAddr{}
}
// ProbeUDPLifetimeConfig represents the configuration for probing UDP path
@@ -333,7 +365,7 @@ type endpointDisco struct {
}
type sentPing struct {
to netip.AddrPort
to epAddr
at mono.Time
timer *time.Timer // timeout timer
purpose discoPingPurpose
@@ -445,7 +477,8 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
From: ep,
})
delete(de.endpointState, ep)
if de.bestAddr.AddrPort == ep {
asEpAddr := epAddr{ap: ep}
if de.bestAddr.epAddr == asEpAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "deleteEndpointLocked-bestAddr-" + why,
@@ -467,11 +500,12 @@ func (de *endpoint) initFakeUDPAddr() {
}
// noteRecvActivity records receive activity on de, and invokes
// Conn.noteRecvActivity no more than once every 10s.
func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
// Conn.noteRecvActivity no more than once every 10s, returning true if it
// was called, otherwise false.
func (de *endpoint) noteRecvActivity(src epAddr, now mono.Time) bool {
if de.isWireguardOnly {
de.mu.Lock()
de.bestAddr.AddrPort = ipp
de.bestAddr.ap = src.ap
de.bestAddrAt = now
de.trustBestAddrUntil = now.Add(5 * time.Second)
de.mu.Unlock()
@@ -481,7 +515,7 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
// kick off discovery disco pings every trustUDPAddrDuration and mirror
// to DERP.
de.mu.Lock()
if de.heartbeatDisabled && de.bestAddr.AddrPort == ipp {
if de.heartbeatDisabled && de.bestAddr.epAddr == src {
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
}
de.mu.Unlock()
@@ -492,10 +526,12 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
de.lastRecvWG.StoreAtomic(now)
if de.c.noteRecvActivity == nil {
return
return false
}
de.c.noteRecvActivity(de.publicKey)
return true
}
return false
}
func (de *endpoint) discoShort() string {
@@ -529,10 +565,10 @@ func (de *endpoint) DstToBytes() []byte { return packIPPort(de.fakeWGAddr) }
// de.mu must be held.
//
// TODO(val): Rewrite the addrFor*Locked() variations to share code.
func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) {
udpAddr = de.bestAddr.AddrPort
func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr epAddr, derpAddr netip.AddrPort, sendWGPing bool) {
udpAddr = de.bestAddr.epAddr
if udpAddr.IsValid() && !now.After(de.trustBestAddrUntil) {
if udpAddr.ap.IsValid() && !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}, false
}
@@ -551,12 +587,12 @@ func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.Ad
// addrForWireGuardSendLocked returns the address that should be used for
// sending the next packet. If a packet has never or not recently been sent to
// the endpoint, then a randomly selected address for the endpoint is returned,
// as well as a bool indiciating that WireGuard discovery pings should be started.
// as well as a bool indicating that WireGuard discovery pings should be started.
// If the addresses have latency information available, then the address with the
// best latency is used.
//
// de.mu must be held.
func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.AddrPort, shouldPing bool) {
func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr epAddr, shouldPing bool) {
if len(de.endpointState) == 0 {
de.c.logf("magicsock: addrForSendWireguardLocked: [unexpected] no candidates available for endpoint")
return udpAddr, false
@@ -580,22 +616,22 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// TODO(catzkorn): Consider a small increase in latency to use
// IPv6 in comparison to IPv4, when possible.
lowestLatency = latency
udpAddr = ipp
udpAddr.ap = ipp
}
}
}
needPing := len(de.endpointState) > 1 && now.Sub(oldestPing) > wireguardPingInterval
if !udpAddr.IsValid() {
if !udpAddr.ap.IsValid() {
candidates := slicesx.MapKeys(de.endpointState)
// Randomly select an address to use until we retrieve latency information
// and give it a short trustBestAddrUntil time so we avoid flapping between
// addresses while waiting on latency information to be populated.
udpAddr = candidates[rand.IntN(len(candidates))]
udpAddr.ap = candidates[rand.IntN(len(candidates))]
}
de.bestAddr.AddrPort = udpAddr
de.bestAddr.epAddr = epAddr{ap: udpAddr.ap}
// Only extend trustBestAddrUntil by one second to avoid packet
// reordering and/or CPU usage from random selection during the first
// second. We should receive a response due to a WireGuard handshake in
@@ -613,18 +649,18 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// both of the returned UDP address and DERP address may be non-zero.
//
// de.mu must be held.
func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) {
func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr epAddr, derpAddr netip.AddrPort) {
if size == 0 {
udpAddr, derpAddr, _ = de.addrForSendLocked(now)
return
}
udpAddr = de.bestAddr.AddrPort
udpAddr = de.bestAddr.epAddr
pathMTU := de.bestAddr.wireMTU
requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
requestedMTU := pingSizeToPktLen(size, udpAddr)
mtuOk := requestedMTU <= pathMTU
if udpAddr.IsValid() && mtuOk {
if udpAddr.ap.IsValid() && mtuOk {
if !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}
}
@@ -637,7 +673,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
// for the packet. Return a zero-value udpAddr to signal that we should
// keep probing the path MTU to all addresses for this endpoint, and a
// valid DERP addr to signal that we should also send via DERP.
return netip.AddrPort{}, de.derpAddr
return epAddr{}, de.derpAddr
}
// maybeProbeUDPLifetimeLocked returns an afterInactivityFor duration and true
@@ -648,7 +684,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
if p == nil {
return afterInactivityFor, false
}
if !de.bestAddr.IsValid() {
if !de.bestAddr.ap.IsValid() {
return afterInactivityFor, false
}
epDisco := de.disco.Load()
@@ -661,7 +697,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
// shuffling probing probability where the local node ends up with a large
// key value lexicographically relative to the other nodes it tends to
// communicate with. If de's disco key changes, the cycle will reset.
if de.c.discoPublic.Compare(epDisco.key) >= 0 {
if de.c.discoAtomic.Public().Compare(epDisco.key) >= 0 {
// lower disco pub key node probes higher
return afterInactivityFor, false
}
@@ -700,7 +736,7 @@ func (de *endpoint) scheduleHeartbeatForLifetimeLocked(after time.Duration, via
}
de.c.dlogf("[v1] magicsock: disco: scheduling UDP lifetime probe for cliff=%v via=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), via, de.publicKey.ShortString(), de.discoShort())
p.bestAddr = de.bestAddr.AddrPort
p.bestAddr = de.bestAddr.epAddr
p.timer = time.AfterFunc(after, de.heartbeatForLifetime)
if via == heartbeatForLifetimeViaSelf {
metricUDPLifetimeCliffsRescheduled.Add(1)
@@ -728,7 +764,7 @@ func (de *endpoint) heartbeatForLifetime() {
return
}
p.timer = nil
if !p.bestAddr.IsValid() || de.bestAddr.AddrPort != p.bestAddr {
if !p.bestAddr.ap.IsValid() || de.bestAddr.epAddr != p.bestAddr {
// best path changed
p.resetCycleEndpointLocked()
return
@@ -760,7 +796,7 @@ func (de *endpoint) heartbeatForLifetime() {
}
de.c.dlogf("[v1] magicsock: disco: sending disco ping for UDP lifetime probe cliff=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), de.publicKey.ShortString(), de.discoShort())
de.startDiscoPingLocked(de.bestAddr.AddrPort, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
de.startDiscoPingLocked(de.bestAddr.epAddr, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
}
// heartbeat is called every heartbeatInterval to keep the best UDP path alive,
@@ -818,8 +854,8 @@ func (de *endpoint) heartbeat() {
}
udpAddr, _, _ := de.addrForSendLocked(now)
if udpAddr.IsValid() {
// We have a preferred path. Ping that every 2 seconds.
if udpAddr.ap.IsValid() {
// We have a preferred path. Ping that every 'heartbeatInterval'.
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0, nil)
}
@@ -827,6 +863,10 @@ func (de *endpoint) heartbeat() {
de.sendDiscoPingsLocked(now, true)
}
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat)
}
@@ -837,6 +877,53 @@ func (de *endpoint) setHeartbeatDisabled(v bool) {
de.heartbeatDisabled = v
}
// discoverUDPRelayPathsLocked starts UDP relay path discovery.
func (de *endpoint) discoverUDPRelayPathsLocked(now mono.Time) {
de.lastUDPRelayPathDiscovery = now
lastBest := de.bestAddr
lastBestIsTrusted := mono.Now().Before(de.trustBestAddrUntil)
de.c.relayManager.startUDPRelayPathDiscoveryFor(de, lastBest, lastBestIsTrusted)
}
// wantUDPRelayPathDiscoveryLocked reports whether we should kick off UDP relay
// path discovery.
func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
if !de.c.hasPeerRelayServers.Load() {
// Changes in this value between its access and a call to
// [endpoint.discoverUDPRelayPathsLocked] are fine, we will eventually
// do the "right" thing during future path discovery. The worst case is
// we suppress path discovery for the current cycle, or we unnecessarily
// call into [relayManager] and do some wasted work.
return false
}
if !de.relayCapable {
return false
}
if de.bestAddr.isDirect() && now.Before(de.trustBestAddrUntil) {
return false
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) < discoverUDPRelayPathsInterval {
return false
}
// TODO(jwhited): consider applying 'goodEnoughLatency' suppression here,
// but not until we have a strategy for triggering CallMeMaybeVia regularly
// and/or enabling inbound packets to act as a UDP relay path discovery
// trigger, otherwise clients without relay servers may fall off a UDP
// relay path and never come back. They are dependent on the remote side
// regularly TX'ing CallMeMaybeVia, which currently only happens as part
// of full UDP relay path discovery.
if now.After(de.trustBestAddrUntil) {
return true
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) >= upgradeUDPRelayInterval {
return true
}
return false
}
// wantFullPingLocked reports whether we should ping to all our peers looking for
// a better path.
//
@@ -845,7 +932,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
if !de.bestAddr.IsValid() || de.lastFullPing.IsZero() {
if !de.bestAddr.isDirect() || de.lastFullPing.IsZero() {
return true
}
if now.After(de.trustBestAddrUntil) {
@@ -854,7 +941,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
if de.bestAddr.latency <= goodEnoughLatency {
return false
}
if now.Sub(de.lastFullPing) >= upgradeInterval {
if now.Sub(de.lastFullPing) >= upgradeUDPDirectInterval {
return true
}
return false
@@ -905,17 +992,38 @@ func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnst
udpAddr, derpAddr := de.addrForPingSizeLocked(now, size)
if derpAddr.IsValid() {
de.startDiscoPingLocked(derpAddr, now, pingCLI, size, resCB)
de.startDiscoPingLocked(epAddr{ap: derpAddr}, now, pingCLI, size, resCB)
}
if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
// Already have an active session, so just ping the address we're using.
// Otherwise "tailscale ping" results to a node on the local network
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
// IPv6 address, both 1ms away, and it's random who replies first.
switch {
case udpAddr.ap.IsValid() && now.Before(de.trustBestAddrUntil):
// We have a "trusted" direct OR peer relay address, ping it.
de.startDiscoPingLocked(udpAddr, now, pingCLI, size, resCB)
} else {
if !udpAddr.vni.IsSet() {
// If the path is direct we do not want to fallthrough to pinging
// all candidate direct paths, otherwise "tailscale ping" results to
// a node on the local network can look like they're bouncing
// between, say 10.0.0.0/8 and the peer's IPv6 address, both 1ms
// away, and it's random who replies first. cb() is called with the
// first reply, vs background path discovery that is subject to
// betterAddr() comparison and hysteresis
break
}
// If the trusted path is via a peer relay we want to fallthrough in
// order to also try all candidate direct paths.
fallthrough
default:
// Ping all candidate direct paths and start peer relay path discovery,
// if appropriate. This work overlaps with what [de.heartbeat] will
// periodically fire when it calls [de.sendDiscoPingsLocked] and
// [de.discoveryUDPRelayPathsLocked], but a user-initiated [pingCLI] is
// a "do it now" operation that should not be subject to
// [heartbeatInterval] tick or [discoPingInterval] rate-limiting.
for ep := range de.endpointState {
de.startDiscoPingLocked(ep, now, pingCLI, size, resCB)
de.startDiscoPingLocked(epAddr{ap: ep}, now, pingCLI, size, resCB)
}
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
}
}
@@ -926,7 +1034,7 @@ var (
errPingTooBig = errors.New("ping size too big")
)
func (de *endpoint) send(buffs [][]byte) error {
func (de *endpoint) send(buffs [][]byte, offset int) error {
de.mu.Lock()
if de.expired {
de.mu.Unlock()
@@ -940,14 +1048,17 @@ func (de *endpoint) send(buffs [][]byte) error {
if startWGPing {
de.sendWireGuardOnlyPingsLocked(now)
}
} else if !udpAddr.IsValid() || now.After(de.trustBestAddrUntil) {
} else if !udpAddr.isDirect() || now.After(de.trustBestAddrUntil) {
de.sendDiscoPingsLocked(now, true)
if de.wantUDPRelayPathDiscoveryLocked(now) {
de.discoverUDPRelayPathsLocked(now)
}
}
de.noteTxActivityExtTriggerLocked(now)
de.lastSendAny = now
de.mu.Unlock()
if !udpAddr.IsValid() && !derpAddr.IsValid() {
if !udpAddr.ap.IsValid() && !derpAddr.IsValid() {
// Make a last ditch effort to see if we have a DERP route for them. If
// they contacted us over DERP and we don't know their UDP endpoints or
// their DERP home, we can at least assume they're reachable over the
@@ -959,8 +1070,8 @@ func (de *endpoint) send(buffs [][]byte) error {
}
}
var err error
if udpAddr.IsValid() {
_, err = de.c.sendUDPBatch(udpAddr, buffs)
if udpAddr.ap.IsValid() {
_, err = de.c.sendUDPBatch(udpAddr, buffs, offset)
// If the error is known to indicate that the endpoint is no longer
// usable, clear the endpoint statistics so that the next send will
@@ -971,37 +1082,49 @@ func (de *endpoint) send(buffs [][]byte) error {
var txBytes int
for _, b := range buffs {
txBytes += len(b)
txBytes += len(b[offset:])
}
switch {
case udpAddr.Addr().Is4():
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
case udpAddr.Addr().Is6():
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
case udpAddr.ap.Addr().Is4():
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv4Total.Add(int64(txBytes))
} else {
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
}
case udpAddr.ap.Addr().Is6():
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv6Total.Add(int64(txBytes))
} else {
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
}
}
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.
if stats := de.c.stats.Load(); err == nil && stats != nil {
stats.UpdateTxPhysical(de.nodeAddr, udpAddr, len(buffs), txBytes)
if update := de.c.connCounter.Load(); err == nil && update != nil {
update(0, netip.AddrPortFrom(de.nodeAddr, 0), udpAddr.ap, len(buffs), txBytes, false)
}
}
if derpAddr.IsValid() {
allOk := true
var txBytes int
for _, buff := range buffs {
buff = buff[offset:]
const isDisco = false
ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco)
const isGeneveEncap = false
ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco, isGeneveEncap)
txBytes += len(buff)
if !ok {
allOk = false
}
}
if stats := de.c.stats.Load(); stats != nil {
stats.UpdateTxPhysical(de.nodeAddr, derpAddr, len(buffs), txBytes)
if update := de.c.connCounter.Load(); update != nil {
update(0, netip.AddrPortFrom(de.nodeAddr, 0), derpAddr, len(buffs), txBytes, false)
}
if allOk {
return nil
@@ -1053,7 +1176,12 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
if !ok {
return
}
if debugDisco() || !de.bestAddr.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
bestUntrusted := mono.Now().After(de.trustBestAddrUntil)
if sp.to == de.bestAddr.epAddr && sp.to.vni.IsSet() && bestUntrusted {
// TODO(jwhited): consider applying this to direct UDP paths as well
de.clearBestAddrLocked()
}
if debugDisco() || !de.bestAddr.ap.IsValid() || bestUntrusted {
de.c.dlogf("[v1] magicsock: disco: timeout waiting for pong %x from %v (%v, %v)", txid[:6], sp.to, de.publicKey.ShortString(), de.discoShort())
}
de.removeSentDiscoPingLocked(txid, sp, discoPingTimedOut)
@@ -1107,7 +1235,7 @@ const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
func (de *endpoint) sendDiscoPing(ep epAddr, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
size = min(size, MaxDiscoPingSize)
padding := max(size-discoPingSize, 0)
@@ -1123,7 +1251,7 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t
if size != 0 {
metricSentDiscoPeerMTUProbes.Add(1)
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep)))
}
}
@@ -1154,16 +1282,20 @@ const (
// if non-nil, means that a caller external to the magicsock package internals
// is interested in the result (such as a CLI "tailscale ping" or a c2n ping
// request, etc)
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
func (de *endpoint) startDiscoPingLocked(ep epAddr, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
if runtime.GOOS == "js" {
return
}
if debugNeverDirectUDP() && !ep.vni.IsSet() && ep.ap.Addr() != tailcfg.DerpMagicIPAddr {
return
}
epDisco := de.disco.Load()
if epDisco == nil {
return
}
if purpose != pingCLI {
st, ok := de.endpointState[ep]
if purpose != pingCLI &&
!ep.vni.IsSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
st, ok := de.endpointState[ep.ap]
if !ok {
// Shouldn't happen. But don't ping an endpoint that's
// not active for us.
@@ -1180,11 +1312,11 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
// Default to sending a single ping of the specified size
sizes := []int{size}
if de.c.PeerMTUEnabled() {
isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
isDerp := ep.ap.Addr() == tailcfg.DerpMagicIPAddr
if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
de.c.dlogf("[v1] magicsock: starting MTU probe")
sizes = mtuProbePingSizesV4
if ep.Addr().Is6() {
if ep.ap.Addr().Is6() {
sizes = mtuProbePingSizesV6
}
}
@@ -1239,7 +1371,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
}
de.startDiscoPingLocked(ep, now, pingDiscovery, 0, nil)
de.startDiscoPingLocked(epAddr{ap: ep}, now, pingDiscovery, 0, nil)
}
derpAddr := de.derpAddr
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@@ -1253,7 +1385,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
}
// sendWireGuardOnlyPingsLocked evaluates all available addresses for
// a WireGuard only endpoint and initates an ICMP ping for useable
// a WireGuard only endpoint and initiates an ICMP ping for useable
// addresses.
func (de *endpoint) sendWireGuardOnlyPingsLocked(now mono.Time) {
if runtime.GOOS == "js" {
@@ -1390,6 +1522,8 @@ func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, p
}
de.setEndpointsLocked(n.Endpoints())
de.relayCapable = capVerIsRelayCapable(n.Cap())
}
func (de *endpoint) setEndpointsLocked(eps interface {
@@ -1472,7 +1606,7 @@ func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.T
}
}
size2 := len(de.endpointState)
de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v candidate set from %v to %v entries", size, size2)
de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v (%s) candidate set from %v to %v entries", de.discoShort(), de.publicKey.ShortString(), size, size2)
}
return false
}
@@ -1487,17 +1621,19 @@ func (de *endpoint) clearBestAddrLocked() {
de.trustBestAddrUntil = 0
}
// noteBadEndpoint marks ipp as a bad endpoint that would need to be
// noteBadEndpoint marks udpAddr as a bad endpoint that would need to be
// re-evaluated before future use, this should be called for example if a send
// to ipp fails due to a host unreachable error or similar.
func (de *endpoint) noteBadEndpoint(ipp netip.AddrPort) {
// to udpAddr fails due to a host unreachable error or similar.
func (de *endpoint) noteBadEndpoint(udpAddr epAddr) {
de.mu.Lock()
defer de.mu.Unlock()
de.clearBestAddrLocked()
if st, ok := de.endpointState[ipp]; ok {
st.clear()
if !udpAddr.vni.IsSet() {
if st, ok := de.endpointState[udpAddr.ap]; ok {
st.clear()
}
}
}
@@ -1517,17 +1653,20 @@ func (de *endpoint) noteConnectivityChange() {
// pingSizeToPktLen calculates the minimum path MTU that would permit
// a disco ping message of length size to reach its target at
// addr. size is the length of the entire disco message including
// udpAddr. size is the length of the entire disco message including
// disco headers. If size is zero, assume it is the safe wire MTU.
func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
func pingSizeToPktLen(size int, udpAddr epAddr) tstun.WireMTU {
if size == 0 {
return tstun.SafeWireMTU()
}
headerLen := ipv4.HeaderLen
if is6 {
if udpAddr.ap.Addr().Is6() {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
if udpAddr.vni.IsSet() {
headerLen += packet.GeneveFixedHeaderLength
}
return tstun.WireMTU(size + headerLen)
}
@@ -1554,11 +1693,11 @@ func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
// It should be called with the Conn.mu held.
//
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAddr) (knownTxID bool) {
de.mu.Lock()
defer de.mu.Unlock()
isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr
sp, ok := de.sentPing[m.TxID]
if !ok {
@@ -1568,7 +1707,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
knownTxID = true // for naked returns below
de.removeSentDiscoPingLocked(m.TxID, sp, discoPongReceived)
pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
pktLen := int(pingSizeToPktLen(sp.size, src))
if sp.size != 0 {
m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
m.Add(1)
@@ -1580,25 +1719,27 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
now := mono.Now()
latency := now.Sub(sp.at)
if !isDerp {
st, ok := de.endpointState[sp.to]
if !isDerp && !src.vni.IsSet() {
// Note: we check vni.isSet() as relay [epAddr]'s are not stored in
// endpointState, they are either de.bestAddr or not.
st, ok := de.endpointState[sp.to.ap]
if !ok {
// This is no longer an endpoint we care about.
return
}
de.c.peerMap.setNodeKeyForIPPort(src, de.publicKey)
de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)
st.addPongReplyLocked(pongReply{
latency: latency,
pongAt: now,
from: src,
from: src.ap,
pongSrc: m.Src,
})
}
if sp.purpose != pingHeartbeat && sp.purpose != pingHeartbeatForUDPLifetime {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoAtomic.Short(), de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to)
}
@@ -1616,21 +1757,30 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp {
thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
thisPong := addrQuality{
epAddr: sp.to,
latency: latency,
wireMTU: pingSizeToPktLen(sp.size, sp.to),
}
// TODO(jwhited): consider checking de.trustBestAddrUntil as well. If
// de.bestAddr is untrusted we may want to clear it, otherwise we could
// get stuck with a forever untrusted bestAddr that blackholes, since
// we don't clear direct UDP paths on disco ping timeout (see
// discoPingTimeout).
if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-update",
What: "handlePongConnLocked-bestAddr-update",
From: de.bestAddr,
To: thisPong,
})
de.setBestAddrLocked(thisPong)
}
if de.bestAddr.AddrPort == thisPong.AddrPort {
if de.bestAddr.epAddr == thisPong.epAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-latency",
What: "handlePongConnLocked-bestAddr-latency",
From: de.bestAddr,
To: thisPong,
})
@@ -1642,20 +1792,43 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
return
}
// addrQuality is an IPPort with an associated latency and path mtu.
// epAddr is a [netip.AddrPort] with an optional Geneve header (RFC8926)
// [packet.VirtualNetworkID].
type epAddr struct {
ap netip.AddrPort // if ap == tailcfg.DerpMagicIPAddr then vni is never set
vni packet.VirtualNetworkID // vni.IsSet() indicates if this [epAddr] involves a Geneve header
}
// isDirect returns true if e.ap is valid and not tailcfg.DerpMagicIPAddr,
// and a VNI is not set.
func (e epAddr) isDirect() bool {
return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.IsSet()
}
func (e epAddr) String() string {
if !e.vni.IsSet() {
return e.ap.String()
}
return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.Get())
}
// addrQuality is an [epAddr], an optional [key.DiscoPublic] if a relay server
// is associated, a round-trip latency measurement, and path mtu.
type addrQuality struct {
netip.AddrPort
latency time.Duration
wireMTU tstun.WireMTU
epAddr
relayServerDisco key.DiscoPublic // only relevant if epAddr.vni.isSet(), otherwise zero value
latency time.Duration
wireMTU tstun.WireMTU
}
func (a addrQuality) String() string {
return fmt.Sprintf("%v@%v+%v", a.AddrPort, a.latency, a.wireMTU)
// TODO(jwhited): consider including relayServerDisco
return fmt.Sprintf("%v@%v+%v", a.epAddr, a.latency, a.wireMTU)
}
// betterAddr reports whether a is a better addr to use than b.
func betterAddr(a, b addrQuality) bool {
if a.AddrPort == b.AddrPort {
if a.epAddr == b.epAddr {
if a.wireMTU > b.wireMTU {
// TODO(val): Think harder about the case of lower
// latency and smaller or unknown MTU, and higher
@@ -1666,10 +1839,19 @@ func betterAddr(a, b addrQuality) bool {
}
return false
}
if !b.IsValid() {
if !b.ap.IsValid() {
return true
}
if !a.IsValid() {
if !a.ap.IsValid() {
return false
}
// Geneve-encapsulated paths (UDP relay servers) are lower preference in
// relation to non.
if !a.vni.IsSet() && b.vni.IsSet() {
return true
}
if a.vni.IsSet() && !b.vni.IsSet() {
return false
}
@@ -1693,27 +1875,27 @@ func betterAddr(a, b addrQuality) bool {
// addresses, and prefer link-local unicast addresses over other types
// of private IP addresses since it's definitionally more likely that
// they'll be on the same network segment than a general private IP.
if a.Addr().IsLoopback() {
if a.ap.Addr().IsLoopback() {
aPoints += 50
} else if a.Addr().IsLinkLocalUnicast() {
} else if a.ap.Addr().IsLinkLocalUnicast() {
aPoints += 30
} else if a.Addr().IsPrivate() {
} else if a.ap.Addr().IsPrivate() {
aPoints += 20
}
if b.Addr().IsLoopback() {
if b.ap.Addr().IsLoopback() {
bPoints += 50
} else if b.Addr().IsLinkLocalUnicast() {
} else if b.ap.Addr().IsLinkLocalUnicast() {
bPoints += 30
} else if b.Addr().IsPrivate() {
} else if b.ap.Addr().IsPrivate() {
bPoints += 20
}
// Prefer IPv6 for being a bit more robust, as long as
// the latencies are roughly equivalent.
if a.Addr().Is6() {
if a.ap.Addr().Is6() {
aPoints += 10
}
if b.Addr().Is6() {
if b.ap.Addr().Is6() {
bPoints += 10
}
@@ -1797,7 +1979,25 @@ func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
for _, st := range de.endpointState {
st.lastPing = 0
}
de.sendDiscoPingsLocked(mono.Now(), false)
monoNow := mono.Now()
de.sendDiscoPingsLocked(monoNow, false)
// This hook is required to trigger peer relay path discovery around
// disco "tailscale ping" initiated by de. We may be configured with peer
// relay servers that differ from de.
//
// The only other peer relay path discovery hook is in [endpoint.heartbeat],
// which is kicked off around outbound WireGuard packet flow, or if you are
// the "tailscale ping" initiator. Disco "tailscale ping" does not propagate
// into wireguard-go.
//
// We choose not to hook this around disco ping reception since peer relay
// path discovery can also trigger disco ping transmission, which *could*
// lead to an infinite loop of peer relay path discovery between two peers,
// absent intended triggers.
if de.wantUDPRelayPathDiscoveryLocked(monoNow) {
de.discoverUDPRelayPathsLocked(monoNow)
}
}
func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
@@ -1814,8 +2014,12 @@ func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
ps.LastWrite = de.lastSendExt.WallTime()
ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.IsValid() && !derpAddr.IsValid() {
ps.CurAddr = udpAddr.String()
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.ap.IsValid() && !derpAddr.IsValid() {
if udpAddr.vni.IsSet() {
ps.PeerRelay = udpAddr.String()
} else {
ps.CurAddr = udpAddr.String()
}
}
}
@@ -1863,14 +2067,22 @@ func (de *endpoint) resetLocked() {
}
}
de.probeUDPLifetime.resetCycleEndpointLocked()
de.c.relayManager.stopWork(de)
}
func (de *endpoint) numStopAndReset() int64 {
return atomic.LoadInt64(&de.numStopAndResetAtomic)
}
// setDERPHome sets the provided regionID as home for de. Calls to setDERPHome
// must never run concurrent to [Conn.updateRelayServersSet], otherwise
// [candidatePeerRelay] DERP home changes may be missed from the perspective of
// [relayManager].
func (de *endpoint) setDERPHome(regionID uint16) {
de.mu.Lock()
defer de.mu.Unlock()
de.derpAddr = netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
if de.c.hasPeerRelayServers.Load() {
de.c.relayManager.handleDERPHomeChange(de.publicKey, regionID)
}
}

View File

@@ -6,9 +6,9 @@ package magicsock
import (
"net/netip"
"slices"
"sync"
"time"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tempfork/heap"
"tailscale.com/util/mak"
@@ -107,7 +107,7 @@ func (eh endpointHeap) Min() *endpointTrackerEntry {
//
// See tailscale/tailscale#7877 for more information.
type endpointTracker struct {
mu sync.Mutex
mu syncs.Mutex
endpoints map[netip.Addr]*endpointHeap
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
//go:build !linux || ts_omit_listenrawdisco
package magicsock
@@ -9,19 +9,8 @@ import (
"errors"
"fmt"
"io"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
return nil, fmt.Errorf("raw disco listening not supported on this OS: %w", errors.ErrUnsupported)
}
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
portableTrySetSocketBuffer(pconn, logf)
}
const (
controlMessageSize = 0
)

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux && !ts_omit_listenrawdisco
package magicsock
import (
@@ -13,7 +15,6 @@ import (
"net"
"net/netip"
"strings"
"syscall"
"time"
"github.com/mdlayher/socket"
@@ -28,7 +29,6 @@ import (
"tailscale.com/types/ipproto"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
const (
@@ -66,10 +66,10 @@ var (
// fragmented, and we don't want to handle reassembly.
bpf.LoadAbsolute{Off: 6, Size: 2},
// More Fragments bit set means this is part of a fragmented packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 7, SkipFalse: 0},
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x2000, SkipTrue: 8, SkipFalse: 0},
// Non-zero fragment offset with MF=0 means this is the last
// fragment of packet.
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6, SkipFalse: 0},
bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 7, SkipFalse: 0},
// Load IP header length into X register.
bpf.LoadMemShift{Off: 0},
@@ -453,7 +453,13 @@ func (c *Conn) receiveDisco(pc *socket.Conn, isIPV6 bool) {
metricRecvDiscoPacketIPv4.Add(1)
}
c.handleDiscoMessage(payload, srcAddr, key.NodePublic{}, discoRXPathRawSocket)
pt, isGeneveEncap := packetLooksLike(payload)
if pt == packetLooksLikeDisco && !isGeneveEncap {
// The BPF program matching on disco does not currently support
// Geneve encapsulation. isGeneveEncap should not return true if
// payload is disco.
c.handleDiscoMessage(payload, epAddr{ap: srcAddr}, false, key.NodePublic{}, discoRXPathRawSocket)
}
}
}
@@ -483,38 +489,3 @@ func printSockaddr(sa unix.Sockaddr) string {
return fmt.Sprintf("unknown(%T)", sa)
}
}
// trySetSocketBuffer attempts to set SO_SNDBUFFORCE and SO_RECVBUFFORCE which
// can overcome the limit of net.core.{r,w}mem_max, but require CAP_NET_ADMIN.
// It falls back to the portable implementation if that fails, which may be
// silently capped to net.core.{r,w}mem_max.
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
if c, ok := pconn.(*net.UDPConn); ok {
var errRcv, errSnd error
rc, err := c.SyscallConn()
if err == nil {
rc.Control(func(fd uintptr) {
errRcv = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, socketBufferSize)
if errRcv != nil {
logf("magicsock: [warning] failed to force-set UDP read buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errRcv)
}
errSnd = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_SNDBUFFORCE, socketBufferSize)
if errSnd != nil {
logf("magicsock: [warning] failed to force-set UDP write buffer size to %d: %v; using kernel default values (impacts throughput only)", socketBufferSize, errSnd)
}
})
}
if err != nil || errRcv != nil || errSnd != nil {
portableTrySetSocketBuffer(pconn, logf)
}
}
}
var controlMessageSize = -1 // bomb if used for allocation before init
func init() {
// controlMessageSize is set to hold a UDP_GRO or UDP_SEGMENT control
// message. These contain a single uint16 of data.
controlMessageSize = unix.CmsgSpace(2)
}

View File

@@ -1,13 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !windows
package magicsock
import (
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {}

View File

@@ -1,58 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build windows
package magicsock
import (
"net"
"unsafe"
"golang.org/x/sys/windows"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
)
func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) {
c, ok := pconn.(*net.UDPConn)
if !ok {
// not a UDP connection; nothing to do
return
}
sysConn, err := c.SyscallConn()
if err != nil {
logf("trySetUDPSocketOptions: getting SyscallConn failed: %v", err)
return
}
// Similar to https://github.com/golang/go/issues/5834 (which involved
// WSAECONNRESET), Windows can return a WSAENETRESET error, even on UDP
// reads. Disable this.
const SIO_UDP_NETRESET = windows.IOC_IN | windows.IOC_VENDOR | 15
var ioctlErr error
err = sysConn.Control(func(fd uintptr) {
ret := uint32(0)
flag := uint32(0)
size := uint32(unsafe.Sizeof(flag))
ioctlErr = windows.WSAIoctl(
windows.Handle(fd),
SIO_UDP_NETRESET, // iocc
(*byte)(unsafe.Pointer(&flag)), // inbuf
size, // cbif
nil, // outbuf
0, // cbob
&ret, // cbbr
nil, // overlapped
0, // completionRoutine
)
})
if ioctlErr != nil {
logf("trySetUDPSocketOptions: could not set SIO_UDP_NETRESET: %v", ioctlErr)
}
if err != nil {
logf("trySetUDPSocketOptions: SyscallConn.Control failed: %v", err)
}
}

View File

@@ -4,8 +4,6 @@
package magicsock
import (
"net/netip"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/util/set"
@@ -15,17 +13,17 @@ import (
// peer.
type peerInfo struct {
ep *endpoint // always non-nil.
// ipPorts is an inverted version of peerMap.byIPPort (below), so
// epAddrs is an inverted version of peerMap.byEpAddr (below), so
// that when we're deleting this node, we can rapidly find out the
// keys that need deleting from peerMap.byIPPort without having to
// iterate over every IPPort known for any peer.
ipPorts set.Set[netip.AddrPort]
// keys that need deleting from peerMap.byEpAddr without having to
// iterate over every epAddr known for any peer.
epAddrs set.Set[epAddr]
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
ep: ep,
ipPorts: set.Set[netip.AddrPort]{},
epAddrs: set.Set[epAddr]{},
}
}
@@ -35,9 +33,21 @@ func newPeerInfo(ep *endpoint) *peerInfo {
// It doesn't do any locking; all access must be done with Conn.mu held.
type peerMap struct {
byNodeKey map[key.NodePublic]*peerInfo
byIPPort map[netip.AddrPort]*peerInfo
byEpAddr map[epAddr]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
// relayEpAddrByNodeKey ensures we only hold a single relay
// [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them
// grow unbounded. Relay [epAddr]'s are dynamically created by
// [relayManager] during path discovery, and are only useful to track in
// peerMap so long as they are the endpoint.bestAddr. [relayManager] handles
// all creation and initial probing responsibilities otherwise, and it does
// not depend on [peerMap].
//
// Note: This doesn't address unbounded growth of non-relay epAddr's in
// byEpAddr. That issue is being tracked in http://go/corp/29422.
relayEpAddrByNodeKey map[key.NodePublic]epAddr
// nodesOfDisco contains the set of nodes that are using a
// DiscoKey. Usually those sets will be just one node.
nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
@@ -45,10 +55,11 @@ type peerMap struct {
func newPeerMap() peerMap {
return peerMap{
byNodeKey: map[key.NodePublic]*peerInfo{},
byIPPort: map[netip.AddrPort]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
byNodeKey: map[key.NodePublic]*peerInfo{},
byEpAddr: map[epAddr]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
relayEpAddrByNodeKey: map[key.NodePublic]epAddr{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
}
@@ -88,10 +99,10 @@ func (m *peerMap) endpointForNodeID(nodeID tailcfg.NodeID) (ep *endpoint, ok boo
return nil, false
}
// endpointForIPPort returns the endpoint for the peer we
// believe to be at ipp, or nil if we don't know of any such peer.
func (m *peerMap) endpointForIPPort(ipp netip.AddrPort) (ep *endpoint, ok bool) {
if info, ok := m.byIPPort[ipp]; ok {
// endpointForEpAddr returns the endpoint for the peer we
// believe to be at addr, or nil if we don't know of any such peer.
func (m *peerMap) endpointForEpAddr(addr epAddr) (ep *endpoint, ok bool) {
if info, ok := m.byEpAddr[addr]; ok {
return info.ep, true
}
return nil, false
@@ -148,10 +159,10 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
// peer has, say, 192.168.0.2 and so does a tailscale peer, the
// wireguard one will win. That may not be the outcome that we want -
// perhaps we should prefer bestAddr.AddrPort if it is set?
// perhaps we should prefer bestAddr.epAddr.ap if it is set?
// see tailscale/tailscale#7994
for ipp := range ep.endpointState {
m.setNodeKeyForIPPort(ipp, ep.publicKey)
m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
}
return
}
@@ -163,20 +174,31 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
discoSet.Add(ep.publicKey)
}
// setNodeKeyForIPPort makes future peer lookups by ipp return the
// setNodeKeyForEpAddr makes future peer lookups by addr return the
// same endpoint as a lookup by nk.
//
// This should only be called with a fully verified mapping of ipp to
// This should only be called with a fully verified mapping of addr to
// nk, because calling this function defines the endpoint we hand to
// WireGuard for packets received from ipp.
func (m *peerMap) setNodeKeyForIPPort(ipp netip.AddrPort, nk key.NodePublic) {
if pi := m.byIPPort[ipp]; pi != nil {
delete(pi.ipPorts, ipp)
delete(m.byIPPort, ipp)
// WireGuard for packets received from addr.
func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
if pi := m.byEpAddr[addr]; pi != nil {
delete(pi.epAddrs, addr)
delete(m.byEpAddr, addr)
if addr.vni.IsSet() {
delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
}
}
if pi, ok := m.byNodeKey[nk]; ok {
pi.ipPorts.Add(ipp)
m.byIPPort[ipp] = pi
if addr.vni.IsSet() {
relay, ok := m.relayEpAddrByNodeKey[nk]
if ok {
delete(pi.epAddrs, relay)
delete(m.byEpAddr, relay)
}
m.relayEpAddrByNodeKey[nk] = addr
}
pi.epAddrs.Add(addr)
m.byEpAddr[addr] = pi
}
}
@@ -203,7 +225,8 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
// Unexpected. But no logger plumbed here to log so.
return
}
for ip := range pi.ipPorts {
delete(m.byIPPort, ip)
for ip := range pi.epAddrs {
delete(m.byEpAddr, ip)
}
delete(m.relayEpAddrByNodeKey, ep.publicKey)
}

View File

@@ -5,14 +5,17 @@ package magicsock
import (
"errors"
"fmt"
"net"
"net/netip"
"sync"
"sync/atomic"
"syscall"
"golang.org/x/net/ipv6"
"tailscale.com/net/batching"
"tailscale.com/net/netaddr"
"tailscale.com/net/packet"
"tailscale.com/syncs"
"tailscale.com/types/nettype"
)
@@ -28,7 +31,7 @@ type RebindingUDPConn struct {
// Neither is expected to be nil, sockets are bound on creation.
pconnAtomic atomic.Pointer[nettype.PacketConn]
mu sync.Mutex // held while changing pconn (and pconnAtomic)
mu syncs.Mutex // held while changing pconn (and pconnAtomic)
pconn nettype.PacketConn
port uint16
}
@@ -40,7 +43,7 @@ type RebindingUDPConn struct {
// disrupting surrounding code that assumes nettype.PacketConn is a
// *net.UDPConn.
func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) {
upc := tryUpgradeToBatchingConn(p, network, batchSize)
upc := batching.TryUpgradeToConn(p, network, batchSize)
c.pconn = upc
c.pconnAtomic.Store(&upc)
c.port = uint16(c.localAddrLocked().Port)
@@ -70,21 +73,39 @@ func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, e
return c.readFromWithInitPconn(*c.pconnAtomic.Load(), b)
}
// WriteBatchTo writes buffs to addr.
func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) error {
// WriteWireGuardBatchTo writes buffs to addr. It serves primarily as an alias
// for [batching.Conn.WriteBatchTo], with fallback to single packet operations
// if c.pconn is not a [batching.Conn].
//
// WriteWireGuardBatchTo assumes buffs are WireGuard packets, which is notable
// for Geneve encapsulation: Geneve protocol is set to [packet.GeneveProtocolWireGuard],
// and the control bit is left unset.
func (c *RebindingUDPConn) WriteWireGuardBatchTo(buffs [][]byte, addr epAddr, offset int) error {
if offset != packet.GeneveFixedHeaderLength {
return fmt.Errorf("RebindingUDPConn.WriteWireGuardBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
}
gh := packet.GeneveHeader{
Protocol: packet.GeneveProtocolWireGuard,
VNI: addr.vni,
}
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
for _, buf := range buffs {
_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr)
if gh.VNI.IsSet() {
gh.Encode(buf)
} else {
buf = buf[offset:]
}
_, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr.ap)
if err != nil {
return err
}
}
return nil
}
err := b.WriteBatchTo(buffs, addr)
err := b.WriteBatchTo(buffs, addr.ap, gh, offset)
if err != nil {
if pconn != c.currentConn() {
continue
@@ -95,13 +116,12 @@ func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort) err
}
}
// ReadBatch reads messages from c into msgs. It returns the number of messages
// the caller should evaluate for nonzero len, as a zero len message may fall
// on either side of a nonzero.
// ReadBatch is an alias for [batching.Conn.ReadBatch] with fallback to single
// packet operations if c.pconn is not a [batching.Conn].
func (c *RebindingUDPConn) ReadBatch(msgs []ipv6.Message, flags int) (int, error) {
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
n, ap, err := c.readFromWithInitPconn(pconn, msgs[0].Buffers[0])
if err == nil {

1071
vendor/tailscale.com/wgengine/magicsock/relaymanager.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,274 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package netlog provides a logger that monitors a TUN device and
// periodically records any traffic into a log stream.
package netlog
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/netip"
"sync"
"time"
"tailscale.com/health"
"tailscale.com/logpolicy"
"tailscale.com/logtail"
"tailscale.com/net/connstats"
"tailscale.com/net/netmon"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsaddr"
"tailscale.com/tailcfg"
"tailscale.com/types/logid"
"tailscale.com/types/netlogtype"
"tailscale.com/util/multierr"
"tailscale.com/wgengine/router"
)
// pollPeriod specifies how often to poll for network traffic.
const pollPeriod = 5 * time.Second
// Device is an abstraction over a tunnel device or a magic socket.
// Both *tstun.Wrapper and *magicsock.Conn implement this interface.
type Device interface {
SetStatistics(*connstats.Statistics)
}
type noopDevice struct{}
func (noopDevice) SetStatistics(*connstats.Statistics) {}
// Logger logs statistics about every connection.
// At present, it only logs connections within a tailscale network.
// Exit node traffic is not logged for privacy reasons.
// The zero value is ready for use.
type Logger struct {
mu sync.Mutex // protects all fields below
logger *logtail.Logger
stats *connstats.Statistics
tun Device
sock Device
addrs map[netip.Addr]bool
prefixes map[netip.Prefix]bool
}
// Running reports whether the logger is running.
func (nl *Logger) Running() bool {
nl.mu.Lock()
defer nl.mu.Unlock()
return nl.logger != nil
}
var testClient *http.Client
// Startup starts an asynchronous network logger that monitors
// statistics for the provided tun and/or sock device.
//
// The tun Device captures packets within the tailscale network,
// where at least one address is a tailscale IP address.
// The source is always from the perspective of the current node.
// If one of the other endpoint is not a tailscale IP address,
// then it suggests the use of a subnet router or exit node.
// For example, when using a subnet router, the source address is
// the tailscale IP address of the current node, and
// the destination address is an IP address within the subnet range.
// In contrast, when acting as a subnet router, the source address is
// an IP address within the subnet range, and the destination is a
// tailscale IP address that initiated the subnet proxy connection.
// In this case, the node acting as a subnet router is acting on behalf
// of some remote endpoint within the subnet range.
// The tun is used to populate the VirtualTraffic, SubnetTraffic,
// and ExitTraffic fields in Message.
//
// The sock Device captures packets at the magicsock layer.
// The source is always a tailscale IP address and the destination
// is a non-tailscale IP address to contact for that particular tailscale node.
// The IP protocol and source port are always zero.
// The sock is used to populated the PhysicalTraffic field in Message.
// The netMon parameter is optional; if non-nil it's used to do faster interface lookups.
func (nl *Logger) Startup(nodeID tailcfg.StableNodeID, nodeLogID, domainLogID logid.PrivateID, tun, sock Device, netMon *netmon.Monitor, health *health.Tracker, logExitFlowEnabledEnabled bool) error {
nl.mu.Lock()
defer nl.mu.Unlock()
if nl.logger != nil {
return fmt.Errorf("network logger already running for %v", nl.logger.PrivateID().Public())
}
// Startup a log stream to Tailscale's logging service.
logf := log.Printf
httpc := &http.Client{Transport: logpolicy.NewLogtailTransport(logtail.DefaultHost, netMon, health, logf)}
if testClient != nil {
httpc = testClient
}
nl.logger = logtail.NewLogger(logtail.Config{
Collection: "tailtraffic.log.tailscale.io",
PrivateID: nodeLogID,
CopyPrivateID: domainLogID,
Stderr: io.Discard,
CompressLogs: true,
HTTPC: httpc,
// TODO(joetsai): Set Buffer? Use an in-memory buffer for now.
// Include process sequence numbers to identify missing samples.
IncludeProcID: true,
IncludeProcSequence: true,
}, logf)
nl.logger.SetSockstatsLabel(sockstats.LabelNetlogLogger)
// Startup a data structure to track per-connection statistics.
// There is a maximum size for individual log messages that logtail
// can upload to the Tailscale log service, so stay below this limit.
const maxLogSize = 256 << 10
const maxConns = (maxLogSize - netlogtype.MaxMessageJSONSize) / netlogtype.MaxConnectionCountsJSONSize
nl.stats = connstats.NewStatistics(pollPeriod, maxConns, func(start, end time.Time, virtual, physical map[netlogtype.Connection]netlogtype.Counts) {
nl.mu.Lock()
addrs := nl.addrs
prefixes := nl.prefixes
nl.mu.Unlock()
recordStatistics(nl.logger, nodeID, start, end, virtual, physical, addrs, prefixes, logExitFlowEnabledEnabled)
})
// Register the connection tracker into the TUN device.
if tun == nil {
tun = noopDevice{}
}
nl.tun = tun
nl.tun.SetStatistics(nl.stats)
// Register the connection tracker into magicsock.
if sock == nil {
sock = noopDevice{}
}
nl.sock = sock
nl.sock.SetStatistics(nl.stats)
return nil
}
func recordStatistics(logger *logtail.Logger, nodeID tailcfg.StableNodeID, start, end time.Time, connstats, sockStats map[netlogtype.Connection]netlogtype.Counts, addrs map[netip.Addr]bool, prefixes map[netip.Prefix]bool, logExitFlowEnabled bool) {
m := netlogtype.Message{NodeID: nodeID, Start: start.UTC(), End: end.UTC()}
classifyAddr := func(a netip.Addr) (isTailscale, withinRoute bool) {
// NOTE: There could be mis-classifications where an address is treated
// as a Tailscale IP address because the subnet range overlaps with
// the subnet range that Tailscale IP addresses are allocated from.
// This should never happen for IPv6, but could happen for IPv4.
withinRoute = addrs[a]
for p := range prefixes {
if p.Contains(a) && p.Bits() > 0 {
withinRoute = true
break
}
}
return withinRoute && tsaddr.IsTailscaleIP(a), withinRoute && !tsaddr.IsTailscaleIP(a)
}
exitTraffic := make(map[netlogtype.Connection]netlogtype.Counts)
for conn, cnts := range connstats {
srcIsTailscaleIP, srcWithinSubnet := classifyAddr(conn.Src.Addr())
dstIsTailscaleIP, dstWithinSubnet := classifyAddr(conn.Dst.Addr())
switch {
case srcIsTailscaleIP && dstIsTailscaleIP:
m.VirtualTraffic = append(m.VirtualTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
case srcWithinSubnet || dstWithinSubnet:
m.SubnetTraffic = append(m.SubnetTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
default:
const anonymize = true
if anonymize && !logExitFlowEnabled {
// Only preserve the address if it is a Tailscale IP address.
srcOrig, dstOrig := conn.Src, conn.Dst
conn = netlogtype.Connection{} // scrub everything by default
if srcIsTailscaleIP {
conn.Src = netip.AddrPortFrom(srcOrig.Addr(), 0)
}
if dstIsTailscaleIP {
conn.Dst = netip.AddrPortFrom(dstOrig.Addr(), 0)
}
}
exitTraffic[conn] = exitTraffic[conn].Add(cnts)
}
}
for conn, cnts := range exitTraffic {
m.ExitTraffic = append(m.ExitTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
}
for conn, cnts := range sockStats {
m.PhysicalTraffic = append(m.PhysicalTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
}
if len(m.VirtualTraffic)+len(m.SubnetTraffic)+len(m.ExitTraffic)+len(m.PhysicalTraffic) > 0 {
if b, err := json.Marshal(m); err != nil {
logger.Logf("json.Marshal error: %v", err)
} else {
logger.Logf("%s", b)
}
}
}
func makeRouteMaps(cfg *router.Config) (addrs map[netip.Addr]bool, prefixes map[netip.Prefix]bool) {
addrs = make(map[netip.Addr]bool)
for _, p := range cfg.LocalAddrs {
if p.IsSingleIP() {
addrs[p.Addr()] = true
}
}
prefixes = make(map[netip.Prefix]bool)
insertPrefixes := func(rs []netip.Prefix) {
for _, p := range rs {
if p.IsSingleIP() {
addrs[p.Addr()] = true
} else {
prefixes[p] = true
}
}
}
insertPrefixes(cfg.Routes)
insertPrefixes(cfg.SubnetRoutes)
return addrs, prefixes
}
// ReconfigRoutes configures the network logger with updated routes.
// The cfg is used to classify the types of connections captured by
// the tun Device passed to Startup.
func (nl *Logger) ReconfigRoutes(cfg *router.Config) {
nl.mu.Lock()
defer nl.mu.Unlock()
// TODO(joetsai): There is a race where deleted routes are not known at
// the time of extraction. We need to keep old routes around for a bit.
nl.addrs, nl.prefixes = makeRouteMaps(cfg)
}
// Shutdown shuts down the network logger.
// This attempts to flush out all pending log messages.
// Even if an error is returned, the logger is still shut down.
func (nl *Logger) Shutdown(ctx context.Context) error {
nl.mu.Lock()
defer nl.mu.Unlock()
if nl.logger == nil {
return nil
}
// Shutdown in reverse order of Startup.
// Do not hold lock while shutting down since this may flush one last time.
nl.mu.Unlock()
nl.sock.SetStatistics(nil)
nl.tun.SetStatistics(nil)
err1 := nl.stats.Shutdown(ctx)
err2 := nl.logger.Shutdown(ctx)
nl.mu.Lock()
// Purge state.
nl.logger = nil
nl.stats = nil
nl.tun = nil
nl.sock = nil
nl.addrs = nil
nl.prefixes = nil
return multierr.New(err1, err2)
}

494
vendor/tailscale.com/wgengine/netlog/netlog.go generated vendored Normal file
View File

@@ -0,0 +1,494 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_netlog && !ts_omit_logtail
// Package netlog provides a logger that monitors a TUN device and
// periodically records any traffic into a log stream.
package netlog
import (
"cmp"
"context"
"fmt"
"io"
"log"
"net/http"
"net/netip"
"time"
"tailscale.com/health"
"tailscale.com/logpolicy"
"tailscale.com/logtail"
"tailscale.com/net/netmon"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsaddr"
"tailscale.com/syncs"
"tailscale.com/types/ipproto"
"tailscale.com/types/logger"
"tailscale.com/types/logid"
"tailscale.com/types/netlogfunc"
"tailscale.com/types/netlogtype"
"tailscale.com/types/netmap"
"tailscale.com/util/eventbus"
"tailscale.com/util/set"
"tailscale.com/wgengine/router"
jsonv2 "github.com/go-json-experiment/json"
"github.com/go-json-experiment/json/jsontext"
)
// pollPeriod specifies how often to poll for network traffic.
const pollPeriod = 5 * time.Second
// Device is an abstraction over a tunnel device or a magic socket.
// Both *tstun.Wrapper and *magicsock.Conn implement this interface.
type Device interface {
SetConnectionCounter(netlogfunc.ConnectionCounter)
}
type noopDevice struct{}
func (noopDevice) SetConnectionCounter(netlogfunc.ConnectionCounter) {}
// Logger logs statistics about every connection.
// At present, it only logs connections within a tailscale network.
// By default, exit node traffic is not logged for privacy reasons
// unless the Tailnet administrator opts-into explicit logging.
// The zero value is ready for use.
type Logger struct {
mu syncs.Mutex // protects all fields below
logf logger.Logf
// shutdownLocked shuts down the logger.
// The mutex must be held when calling.
shutdownLocked func(context.Context) error
record record // the current record of network connection flows
recordLen int // upper bound on JSON length of record
recordsChan chan record // set to nil when shutdown
flushTimer *time.Timer // fires when record should flush to recordsChan
// Information about Tailscale nodes.
// These are read-only once updated by ReconfigNetworkMap.
selfNode nodeUser
allNodes map[netip.Addr]nodeUser // includes selfNode; nodeUser values are always valid
// Information about routes.
// These are read-only once updated by ReconfigRoutes.
routeAddrs set.Set[netip.Addr]
routePrefixes []netip.Prefix
}
// Running reports whether the logger is running.
func (nl *Logger) Running() bool {
nl.mu.Lock()
defer nl.mu.Unlock()
return nl.shutdownLocked != nil
}
var testClient *http.Client
// Startup starts an asynchronous network logger that monitors
// statistics for the provided tun and/or sock device.
//
// The tun [Device] captures packets within the tailscale network,
// where at least one address is usually a tailscale IP address.
// The source is usually from the perspective of the current node.
// If one of the other endpoint is not a tailscale IP address,
// then it suggests the use of a subnet router or exit node.
// For example, when using a subnet router, the source address is
// the tailscale IP address of the current node, and
// the destination address is an IP address within the subnet range.
// In contrast, when acting as a subnet router, the source address is
// an IP address within the subnet range, and the destination is a
// tailscale IP address that initiated the subnet proxy connection.
// In this case, the node acting as a subnet router is acting on behalf
// of some remote endpoint within the subnet range.
// The tun is used to populate the VirtualTraffic, SubnetTraffic,
// and ExitTraffic fields in [netlogtype.Message].
//
// The sock [Device] captures packets at the magicsock layer.
// The source is always a tailscale IP address and the destination
// is a non-tailscale IP address to contact for that particular tailscale node.
// The IP protocol and source port are always zero.
// The sock is used to populated the PhysicalTraffic field in [netlogtype.Message].
//
// The netMon parameter is optional; if non-nil it's used to do faster interface lookups.
func (nl *Logger) Startup(logf logger.Logf, nm *netmap.NetworkMap, nodeLogID, domainLogID logid.PrivateID, tun, sock Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus, logExitFlowEnabledEnabled bool) error {
nl.mu.Lock()
defer nl.mu.Unlock()
if nl.shutdownLocked != nil {
return fmt.Errorf("network logger already running")
}
nl.selfNode, nl.allNodes = makeNodeMaps(nm)
// Startup a log stream to Tailscale's logging service.
if logf == nil {
logf = log.Printf
}
httpc := &http.Client{Transport: logpolicy.NewLogtailTransport(logtail.DefaultHost, netMon, health, logf)}
if testClient != nil {
httpc = testClient
}
logger := logtail.NewLogger(logtail.Config{
Collection: "tailtraffic.log.tailscale.io",
PrivateID: nodeLogID,
CopyPrivateID: domainLogID,
Bus: bus,
Stderr: io.Discard,
CompressLogs: true,
HTTPC: httpc,
// TODO(joetsai): Set Buffer? Use an in-memory buffer for now.
// Include process sequence numbers to identify missing samples.
IncludeProcID: true,
IncludeProcSequence: true,
}, logf)
logger.SetSockstatsLabel(sockstats.LabelNetlogLogger)
// Register the connection tracker into the TUN device.
tun = cmp.Or[Device](tun, noopDevice{})
tun.SetConnectionCounter(nl.updateVirtConn)
// Register the connection tracker into magicsock.
sock = cmp.Or[Device](sock, noopDevice{})
sock.SetConnectionCounter(nl.updatePhysConn)
// Startup a goroutine to record log messages.
// This is done asynchronously so that the cost of serializing
// the network flow log message never stalls processing of packets.
nl.record = record{}
nl.recordLen = 0
nl.recordsChan = make(chan record, 100)
recorderDone := make(chan struct{})
go func(recordsChan chan record) {
defer close(recorderDone)
for rec := range recordsChan {
msg := rec.toMessage(false, !logExitFlowEnabledEnabled)
if b, err := jsonv2.Marshal(msg, jsontext.AllowInvalidUTF8(true)); err != nil {
if nl.logf != nil {
nl.logf("netlog: json.Marshal error: %v", err)
}
} else {
logger.Logf("%s", b)
}
}
}(nl.recordsChan)
// Register the mechanism for shutting down.
nl.shutdownLocked = func(ctx context.Context) error {
tun.SetConnectionCounter(nil)
sock.SetConnectionCounter(nil)
// Flush and process all pending records.
nl.flushRecordLocked()
close(nl.recordsChan)
nl.recordsChan = nil
<-recorderDone
recorderDone = nil
// Try to upload all pending records.
err := logger.Shutdown(ctx)
// Purge state.
nl.shutdownLocked = nil
nl.selfNode = nodeUser{}
nl.allNodes = nil
nl.routeAddrs = nil
nl.routePrefixes = nil
return err
}
return nil
}
var (
tailscaleServiceIPv4 = tsaddr.TailscaleServiceIP()
tailscaleServiceIPv6 = tsaddr.TailscaleServiceIPv6()
)
func (nl *Logger) updateVirtConn(proto ipproto.Proto, src, dst netip.AddrPort, packets, bytes int, recv bool) {
// Network logging is defined as traffic between two Tailscale nodes.
// Traffic with the internal Tailscale service is not with another node
// and should not be logged. It also happens to be a high volume
// amount of discrete traffic flows (e.g., DNS lookups).
switch dst.Addr() {
case tailscaleServiceIPv4, tailscaleServiceIPv6:
return
}
nl.mu.Lock()
defer nl.mu.Unlock()
// Lookup the connection and increment the counts.
nl.initRecordLocked()
conn := netlogtype.Connection{Proto: proto, Src: src, Dst: dst}
cnts, found := nl.record.virtConns[conn]
if !found {
cnts.connType = nl.addNewVirtConnLocked(conn)
}
if recv {
cnts.RxPackets += uint64(packets)
cnts.RxBytes += uint64(bytes)
} else {
cnts.TxPackets += uint64(packets)
cnts.TxBytes += uint64(bytes)
}
nl.record.virtConns[conn] = cnts
}
// addNewVirtConnLocked adds the first insertion of a physical connection.
// The [Logger.mu] must be held.
func (nl *Logger) addNewVirtConnLocked(c netlogtype.Connection) connType {
// Check whether this is the first insertion of the src and dst node.
// If so, compute the additional JSON bytes that would be added
// to the record for the node information.
var srcNodeLen, dstNodeLen int
srcNode, srcSeen := nl.record.seenNodes[c.Src.Addr()]
if !srcSeen {
srcNode = nl.allNodes[c.Src.Addr()]
if srcNode.Valid() {
srcNodeLen = srcNode.jsonLen()
}
}
dstNode, dstSeen := nl.record.seenNodes[c.Dst.Addr()]
if !dstSeen {
dstNode = nl.allNodes[c.Dst.Addr()]
if dstNode.Valid() {
dstNodeLen = dstNode.jsonLen()
}
}
// Check whether the additional [netlogtype.ConnectionCounts]
// and [netlogtype.Node] information would exceed [maxLogSize].
if nl.recordLen+netlogtype.MaxConnectionCountsJSONSize+srcNodeLen+dstNodeLen > maxLogSize {
nl.flushRecordLocked()
nl.initRecordLocked()
}
// Insert newly seen src and/or dst nodes.
if !srcSeen && srcNode.Valid() {
nl.record.seenNodes[c.Src.Addr()] = srcNode
}
if !dstSeen && dstNode.Valid() {
nl.record.seenNodes[c.Dst.Addr()] = dstNode
}
nl.recordLen += netlogtype.MaxConnectionCountsJSONSize + srcNodeLen + dstNodeLen
// Classify the traffic type.
var srcIsSelfNode bool
if nl.selfNode.Valid() {
srcIsSelfNode = nl.selfNode.Addresses().ContainsFunc(func(p netip.Prefix) bool {
return c.Src.Addr() == p.Addr() && p.IsSingleIP()
})
}
switch {
case srcIsSelfNode && dstNode.Valid():
return virtualTraffic
case srcIsSelfNode:
// TODO: Should we swap src for the node serving as the proxy?
// It is relatively useless always using the self IP address.
if nl.withinRoutesLocked(c.Dst.Addr()) {
return subnetTraffic // a client using another subnet router
} else {
return exitTraffic // a client using exit an exit node
}
case dstNode.Valid():
if nl.withinRoutesLocked(c.Src.Addr()) {
return subnetTraffic // serving as a subnet router
} else {
return exitTraffic // serving as an exit node
}
default:
return unknownTraffic
}
}
func (nl *Logger) updatePhysConn(proto ipproto.Proto, src, dst netip.AddrPort, packets, bytes int, recv bool) {
nl.mu.Lock()
defer nl.mu.Unlock()
// Lookup the connection and increment the counts.
nl.initRecordLocked()
conn := netlogtype.Connection{Proto: proto, Src: src, Dst: dst}
cnts, found := nl.record.physConns[conn]
if !found {
nl.addNewPhysConnLocked(conn)
}
if recv {
cnts.RxPackets += uint64(packets)
cnts.RxBytes += uint64(bytes)
} else {
cnts.TxPackets += uint64(packets)
cnts.TxBytes += uint64(bytes)
}
nl.record.physConns[conn] = cnts
}
// addNewPhysConnLocked adds the first insertion of a physical connection.
// The [Logger.mu] must be held.
func (nl *Logger) addNewPhysConnLocked(c netlogtype.Connection) {
// Check whether this is the first insertion of the src node.
var srcNodeLen int
srcNode, srcSeen := nl.record.seenNodes[c.Src.Addr()]
if !srcSeen {
srcNode = nl.allNodes[c.Src.Addr()]
if srcNode.Valid() {
srcNodeLen = srcNode.jsonLen()
}
}
// Check whether the additional [netlogtype.ConnectionCounts]
// and [netlogtype.Node] information would exceed [maxLogSize].
if nl.recordLen+netlogtype.MaxConnectionCountsJSONSize+srcNodeLen > maxLogSize {
nl.flushRecordLocked()
nl.initRecordLocked()
}
// Insert newly seen src and/or dst nodes.
if !srcSeen && srcNode.Valid() {
nl.record.seenNodes[c.Src.Addr()] = srcNode
}
nl.recordLen += netlogtype.MaxConnectionCountsJSONSize + srcNodeLen
}
// initRecordLocked initialize the current record if uninitialized.
// The [Logger.mu] must be held.
func (nl *Logger) initRecordLocked() {
if nl.recordLen != 0 {
return
}
nl.record = record{
selfNode: nl.selfNode,
start: time.Now().UTC(),
seenNodes: make(map[netip.Addr]nodeUser),
virtConns: make(map[netlogtype.Connection]countsType),
physConns: make(map[netlogtype.Connection]netlogtype.Counts),
}
nl.recordLen = netlogtype.MinMessageJSONSize + nl.selfNode.jsonLen()
// Start a time to auto-flush the record.
// Avoid tickers since continually waking up a goroutine
// is expensive on battery powered devices.
nl.flushTimer = time.AfterFunc(pollPeriod, func() {
nl.mu.Lock()
defer nl.mu.Unlock()
if !nl.record.start.IsZero() && time.Since(nl.record.start) > pollPeriod/2 {
nl.flushRecordLocked()
}
})
}
// flushRecordLocked flushes the current record if initialized.
// The [Logger.mu] must be held.
func (nl *Logger) flushRecordLocked() {
if nl.recordLen == 0 {
return
}
nl.record.end = time.Now().UTC()
if nl.recordsChan != nil {
select {
case nl.recordsChan <- nl.record:
default:
if nl.logf != nil {
nl.logf("netlog: dropped record due to processing backlog")
}
}
}
if nl.flushTimer != nil {
nl.flushTimer.Stop()
nl.flushTimer = nil
}
nl.record = record{}
nl.recordLen = 0
}
func makeNodeMaps(nm *netmap.NetworkMap) (selfNode nodeUser, allNodes map[netip.Addr]nodeUser) {
if nm == nil {
return
}
allNodes = make(map[netip.Addr]nodeUser)
if nm.SelfNode.Valid() {
selfNode = nodeUser{nm.SelfNode, nm.UserProfiles[nm.SelfNode.User()]}
for _, addr := range nm.SelfNode.Addresses().All() {
if addr.IsSingleIP() {
allNodes[addr.Addr()] = selfNode
}
}
}
for _, peer := range nm.Peers {
if peer.Valid() {
for _, addr := range peer.Addresses().All() {
if addr.IsSingleIP() {
allNodes[addr.Addr()] = nodeUser{peer, nm.UserProfiles[peer.User()]}
}
}
}
}
return selfNode, allNodes
}
// ReconfigNetworkMap configures the network logger with an updated netmap.
func (nl *Logger) ReconfigNetworkMap(nm *netmap.NetworkMap) {
selfNode, allNodes := makeNodeMaps(nm) // avoid holding lock while making maps
nl.mu.Lock()
nl.selfNode, nl.allNodes = selfNode, allNodes
nl.mu.Unlock()
}
func makeRouteMaps(cfg *router.Config) (addrs set.Set[netip.Addr], prefixes []netip.Prefix) {
addrs = make(set.Set[netip.Addr])
insertPrefixes := func(rs []netip.Prefix) {
for _, p := range rs {
if p.IsSingleIP() {
addrs.Add(p.Addr())
} else {
prefixes = append(prefixes, p)
}
}
}
insertPrefixes(cfg.LocalAddrs)
insertPrefixes(cfg.Routes)
insertPrefixes(cfg.SubnetRoutes)
return addrs, prefixes
}
// ReconfigRoutes configures the network logger with updated routes.
// The cfg is used to classify the types of connections captured by
// the tun Device passed to Startup.
func (nl *Logger) ReconfigRoutes(cfg *router.Config) {
addrs, prefixes := makeRouteMaps(cfg) // avoid holding lock while making maps
nl.mu.Lock()
nl.routeAddrs, nl.routePrefixes = addrs, prefixes
nl.mu.Unlock()
}
// withinRoutesLocked reports whether a is within the configured routes,
// which should only contain Tailscale addresses and subnet routes.
// The [Logger.mu] must be held.
func (nl *Logger) withinRoutesLocked(a netip.Addr) bool {
if nl.routeAddrs.Contains(a) {
return true
}
for _, p := range nl.routePrefixes {
if p.Contains(a) && p.Bits() > 0 {
return true
}
}
return false
}
// Shutdown shuts down the network logger.
// This attempts to flush out all pending log messages.
// Even if an error is returned, the logger is still shut down.
func (nl *Logger) Shutdown(ctx context.Context) error {
nl.mu.Lock()
defer nl.mu.Unlock()
if nl.shutdownLocked == nil {
return nil
}
return nl.shutdownLocked(ctx)
}

14
vendor/tailscale.com/wgengine/netlog/netlog_omit.go generated vendored Normal file
View File

@@ -0,0 +1,14 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ts_omit_netlog || ts_omit_logtail
package netlog
type Logger struct{}
func (*Logger) Startup(...any) error { return nil }
func (*Logger) Running() bool { return false }
func (*Logger) Shutdown(any) error { return nil }
func (*Logger) ReconfigNetworkMap(any) {}
func (*Logger) ReconfigRoutes(any) {}

218
vendor/tailscale.com/wgengine/netlog/record.go generated vendored Normal file
View File

@@ -0,0 +1,218 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_netlog && !ts_omit_logtail
package netlog
import (
"cmp"
"net/netip"
"slices"
"strings"
"time"
"unicode/utf8"
"tailscale.com/tailcfg"
"tailscale.com/types/bools"
"tailscale.com/types/netlogtype"
"tailscale.com/util/set"
)
// maxLogSize is the maximum number of bytes for a log message.
const maxLogSize = 256 << 10
// record is the in-memory representation of a [netlogtype.Message].
// It uses maps to efficiently look-up addresses and connections.
// In contrast, [netlogtype.Message] is designed to be JSON serializable,
// where complex keys types are not well support in JSON objects.
type record struct {
selfNode nodeUser
start time.Time
end time.Time
seenNodes map[netip.Addr]nodeUser
virtConns map[netlogtype.Connection]countsType
physConns map[netlogtype.Connection]netlogtype.Counts
}
// nodeUser is a node with additional user profile information.
type nodeUser struct {
tailcfg.NodeView
user tailcfg.UserProfileView // UserProfileView for NodeView.User
}
// countsType is a counts with classification information about the connection.
type countsType struct {
netlogtype.Counts
connType connType
}
type connType uint8
const (
unknownTraffic connType = iota
virtualTraffic
subnetTraffic
exitTraffic
)
// toMessage converts a [record] into a [netlogtype.Message].
func (r record) toMessage(excludeNodeInfo, anonymizeExitTraffic bool) netlogtype.Message {
if !r.selfNode.Valid() {
return netlogtype.Message{}
}
m := netlogtype.Message{
NodeID: r.selfNode.StableID(),
Start: r.start.UTC(),
End: r.end.UTC(),
}
// Convert node fields.
if !excludeNodeInfo {
m.SrcNode = r.selfNode.toNode()
seenIDs := set.Of(r.selfNode.ID())
for _, node := range r.seenNodes {
if _, ok := seenIDs[node.ID()]; !ok && node.Valid() {
m.DstNodes = append(m.DstNodes, node.toNode())
seenIDs.Add(node.ID())
}
}
slices.SortFunc(m.DstNodes, func(x, y netlogtype.Node) int {
return cmp.Compare(x.NodeID, y.NodeID)
})
}
// Converter traffic fields.
anonymizedExitTraffic := make(map[netlogtype.Connection]netlogtype.Counts)
for conn, cnts := range r.virtConns {
switch cnts.connType {
case virtualTraffic:
m.VirtualTraffic = append(m.VirtualTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts.Counts})
case subnetTraffic:
m.SubnetTraffic = append(m.SubnetTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts.Counts})
default:
if anonymizeExitTraffic {
conn = netlogtype.Connection{ // scrub the IP protocol type
Src: netip.AddrPortFrom(conn.Src.Addr(), 0), // scrub the port number
Dst: netip.AddrPortFrom(conn.Dst.Addr(), 0), // scrub the port number
}
if !r.seenNodes[conn.Src.Addr()].Valid() {
conn.Src = netip.AddrPort{} // not a Tailscale node, so scrub the address
}
if !r.seenNodes[conn.Dst.Addr()].Valid() {
conn.Dst = netip.AddrPort{} // not a Tailscale node, so scrub the address
}
anonymizedExitTraffic[conn] = anonymizedExitTraffic[conn].Add(cnts.Counts)
continue
}
m.ExitTraffic = append(m.ExitTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts.Counts})
}
}
for conn, cnts := range anonymizedExitTraffic {
m.ExitTraffic = append(m.ExitTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
}
for conn, cnts := range r.physConns {
m.PhysicalTraffic = append(m.PhysicalTraffic, netlogtype.ConnectionCounts{Connection: conn, Counts: cnts})
}
// Sort the connections for deterministic results.
slices.SortFunc(m.VirtualTraffic, compareConnCnts)
slices.SortFunc(m.SubnetTraffic, compareConnCnts)
slices.SortFunc(m.ExitTraffic, compareConnCnts)
slices.SortFunc(m.PhysicalTraffic, compareConnCnts)
return m
}
func compareConnCnts(x, y netlogtype.ConnectionCounts) int {
return cmp.Or(
netip.AddrPort.Compare(x.Src, y.Src),
netip.AddrPort.Compare(x.Dst, y.Dst),
cmp.Compare(x.Proto, y.Proto))
}
// jsonLen computes an upper-bound on the size of the JSON representation.
func (nu nodeUser) jsonLen() (n int) {
if !nu.Valid() {
return len(`{"nodeId":""}`)
}
n += len(`{}`)
n += len(`"nodeId":`) + jsonQuotedLen(string(nu.StableID())) + len(`,`)
if len(nu.Name()) > 0 {
n += len(`"name":`) + jsonQuotedLen(nu.Name()) + len(`,`)
}
if nu.Addresses().Len() > 0 {
n += len(`"addresses":[]`)
for _, addr := range nu.Addresses().All() {
n += bools.IfElse(addr.Addr().Is4(), len(`"255.255.255.255"`), len(`"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"`)) + len(",")
}
}
if nu.Hostinfo().Valid() && len(nu.Hostinfo().OS()) > 0 {
n += len(`"os":`) + jsonQuotedLen(nu.Hostinfo().OS()) + len(`,`)
}
if nu.Tags().Len() > 0 {
n += len(`"tags":[]`)
for _, tag := range nu.Tags().All() {
n += jsonQuotedLen(tag) + len(",")
}
} else if nu.user.Valid() && nu.user.ID() == nu.User() && len(nu.user.LoginName()) > 0 {
n += len(`"user":`) + jsonQuotedLen(nu.user.LoginName()) + len(",")
}
return n
}
// toNode converts the [nodeUser] into a [netlogtype.Node].
func (nu nodeUser) toNode() netlogtype.Node {
if !nu.Valid() {
return netlogtype.Node{}
}
n := netlogtype.Node{
NodeID: nu.StableID(),
Name: strings.TrimSuffix(nu.Name(), "."),
}
var ipv4, ipv6 netip.Addr
for _, addr := range nu.Addresses().All() {
switch {
case addr.IsSingleIP() && addr.Addr().Is4():
ipv4 = addr.Addr()
case addr.IsSingleIP() && addr.Addr().Is6():
ipv6 = addr.Addr()
}
}
n.Addresses = []netip.Addr{ipv4, ipv6}
n.Addresses = slices.DeleteFunc(n.Addresses, func(a netip.Addr) bool { return !a.IsValid() })
if nu.Hostinfo().Valid() {
n.OS = nu.Hostinfo().OS()
}
if nu.Tags().Len() > 0 {
n.Tags = nu.Tags().AsSlice()
slices.Sort(n.Tags)
n.Tags = slices.Compact(n.Tags)
} else if nu.user.Valid() && nu.user.ID() == nu.User() {
n.User = nu.user.LoginName()
}
return n
}
// jsonQuotedLen computes the length of the JSON serialization of s
// according to [jsontext.AppendQuote].
func jsonQuotedLen(s string) int {
n := len(`"`) + len(s) + len(`"`)
for i, r := range s {
switch {
case r == '\b', r == '\t', r == '\n', r == '\f', r == '\r', r == '"', r == '\\':
n += len(`\X`) - 1
case r < ' ':
n += len(`\uXXXX`) - 1
case r == utf8.RuneError:
if _, m := utf8.DecodeRuneInString(s[i:]); m == 1 { // exactly an invalid byte
n += len("<22>") - 1
}
}
}
return n
}

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_netstack
// Package gro implements GRO for the receive (write) path into gVisor.
package gro

View File

@@ -1,7 +1,7 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ios
//go:build !ios && !ts_omit_gro
package gro

View File

@@ -1,22 +1,27 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios
//go:build ios || ts_omit_gro
package gro
import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
"runtime"
"tailscale.com/net/packet"
)
type GRO struct{}
func NewGRO() *GRO {
panic("unsupported on iOS")
if runtime.GOOS == "ios" {
panic("unsupported on iOS")
}
panic("GRO disabled in build")
}
func (g *GRO) SetDispatcher(_ stack.NetworkDispatcher) {}
func (g *GRO) SetDispatcher(any) {}
func (g *GRO) Enqueue(_ *packet.Parsed) {}

View File

@@ -0,0 +1,10 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ts_omit_netstack
package gro
func RXChecksumOffload(any) any {
panic("unreachable")
}

View File

@@ -10,6 +10,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"tailscale.com/feature/buildfeatures"
"tailscale.com/net/packet"
"tailscale.com/types/ipproto"
"tailscale.com/wgengine/netstack/gro"
@@ -125,24 +126,24 @@ func newLinkEndpoint(size int, mtu uint32, linkAddr tcpip.LinkAddress, supported
return le
}
// gro attempts to enqueue p on g if l supports a GRO kind matching the
// gro attempts to enqueue p on g if ep supports a GRO kind matching the
// transport protocol carried in p. gro may allocate g if it is nil. gro can
// either return the existing g, a newly allocated one, or nil. Callers are
// responsible for calling Flush() on the returned value if it is non-nil once
// they have finished iterating through all GRO candidates for a given vector.
// If gro allocates a *gro.GRO it will have l's stack.NetworkDispatcher set via
// If gro allocates a *gro.GRO it will have ep's stack.NetworkDispatcher set via
// SetDispatcher().
func (l *linkEndpoint) gro(p *packet.Parsed, g *gro.GRO) *gro.GRO {
if l.supportedGRO == groNotSupported || p.IPProto != ipproto.TCP {
func (ep *linkEndpoint) gro(p *packet.Parsed, g *gro.GRO) *gro.GRO {
if !buildfeatures.HasGRO || ep.supportedGRO == groNotSupported || p.IPProto != ipproto.TCP {
// IPv6 may have extension headers preceding a TCP header, but we trade
// for a fast path and assume p cannot be coalesced in such a case.
l.injectInbound(p)
ep.injectInbound(p)
return g
}
if g == nil {
l.mu.RLock()
d := l.dispatcher
l.mu.RUnlock()
ep.mu.RLock()
d := ep.dispatcher
ep.mu.RUnlock()
g = gro.NewGRO()
g.SetDispatcher(d)
}
@@ -153,40 +154,40 @@ func (l *linkEndpoint) gro(p *packet.Parsed, g *gro.GRO) *gro.GRO {
// Close closes l. Further packet injections will return an error, and all
// pending packets are discarded. Close may be called concurrently with
// WritePackets.
func (l *linkEndpoint) Close() {
l.mu.Lock()
l.dispatcher = nil
l.mu.Unlock()
l.q.Close()
l.Drain()
func (ep *linkEndpoint) Close() {
ep.mu.Lock()
ep.dispatcher = nil
ep.mu.Unlock()
ep.q.Close()
ep.Drain()
}
// Read does non-blocking read one packet from the outbound packet queue.
func (l *linkEndpoint) Read() *stack.PacketBuffer {
return l.q.Read()
func (ep *linkEndpoint) Read() *stack.PacketBuffer {
return ep.q.Read()
}
// ReadContext does blocking read for one packet from the outbound packet queue.
// It can be cancelled by ctx, and in this case, it returns nil.
func (l *linkEndpoint) ReadContext(ctx context.Context) *stack.PacketBuffer {
return l.q.ReadContext(ctx)
func (ep *linkEndpoint) ReadContext(ctx context.Context) *stack.PacketBuffer {
return ep.q.ReadContext(ctx)
}
// Drain removes all outbound packets from the channel and counts them.
func (l *linkEndpoint) Drain() int {
return l.q.Drain()
func (ep *linkEndpoint) Drain() int {
return ep.q.Drain()
}
// NumQueued returns the number of packets queued for outbound.
func (l *linkEndpoint) NumQueued() int {
return l.q.Num()
func (ep *linkEndpoint) NumQueued() int {
return ep.q.Num()
}
func (l *linkEndpoint) injectInbound(p *packet.Parsed) {
l.mu.RLock()
d := l.dispatcher
l.mu.RUnlock()
if d == nil {
func (ep *linkEndpoint) injectInbound(p *packet.Parsed) {
ep.mu.RLock()
d := ep.dispatcher
ep.mu.RUnlock()
if d == nil || !buildfeatures.HasNetstack {
return
}
pkt := gro.RXChecksumOffload(p)
@@ -199,35 +200,35 @@ func (l *linkEndpoint) injectInbound(p *packet.Parsed) {
// Attach saves the stack network-layer dispatcher for use later when packets
// are injected.
func (l *linkEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
l.mu.Lock()
defer l.mu.Unlock()
l.dispatcher = dispatcher
func (ep *linkEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.dispatcher = dispatcher
}
// IsAttached implements stack.LinkEndpoint.IsAttached.
func (l *linkEndpoint) IsAttached() bool {
l.mu.RLock()
defer l.mu.RUnlock()
return l.dispatcher != nil
func (ep *linkEndpoint) IsAttached() bool {
ep.mu.RLock()
defer ep.mu.RUnlock()
return ep.dispatcher != nil
}
// MTU implements stack.LinkEndpoint.MTU.
func (l *linkEndpoint) MTU() uint32 {
l.mu.RLock()
defer l.mu.RUnlock()
return l.mtu
func (ep *linkEndpoint) MTU() uint32 {
ep.mu.RLock()
defer ep.mu.RUnlock()
return ep.mtu
}
// SetMTU implements stack.LinkEndpoint.SetMTU.
func (l *linkEndpoint) SetMTU(mtu uint32) {
l.mu.Lock()
defer l.mu.Unlock()
l.mtu = mtu
func (ep *linkEndpoint) SetMTU(mtu uint32) {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.mtu = mtu
}
// Capabilities implements stack.LinkEndpoint.Capabilities.
func (l *linkEndpoint) Capabilities() stack.LinkEndpointCapabilities {
func (ep *linkEndpoint) Capabilities() stack.LinkEndpointCapabilities {
// We are required to offload RX checksum validation for the purposes of
// GRO.
return stack.CapabilityRXChecksumOffload
@@ -241,8 +242,8 @@ func (*linkEndpoint) GSOMaxSize() uint32 {
}
// SupportedGSO implements stack.GSOEndpoint.
func (l *linkEndpoint) SupportedGSO() stack.SupportedGSO {
return l.SupportedGSOKind
func (ep *linkEndpoint) SupportedGSO() stack.SupportedGSO {
return ep.SupportedGSOKind
}
// MaxHeaderLength returns the maximum size of the link layer header. Given it
@@ -252,22 +253,22 @@ func (*linkEndpoint) MaxHeaderLength() uint16 {
}
// LinkAddress returns the link address of this endpoint.
func (l *linkEndpoint) LinkAddress() tcpip.LinkAddress {
l.mu.RLock()
defer l.mu.RUnlock()
return l.linkAddr
func (ep *linkEndpoint) LinkAddress() tcpip.LinkAddress {
ep.mu.RLock()
defer ep.mu.RUnlock()
return ep.linkAddr
}
// SetLinkAddress implements stack.LinkEndpoint.SetLinkAddress.
func (l *linkEndpoint) SetLinkAddress(addr tcpip.LinkAddress) {
l.mu.Lock()
defer l.mu.Unlock()
l.linkAddr = addr
func (ep *linkEndpoint) SetLinkAddress(addr tcpip.LinkAddress) {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.linkAddr = addr
}
// WritePackets stores outbound packets into the channel.
// Multiple concurrent calls are permitted.
func (l *linkEndpoint) WritePackets(pkts stack.PacketBufferList) (int, tcpip.Error) {
func (ep *linkEndpoint) WritePackets(pkts stack.PacketBufferList) (int, tcpip.Error) {
n := 0
// TODO(jwhited): evaluate writing a stack.PacketBufferList instead of a
// single packet. We can split 2 x 64K GSO across
@@ -277,7 +278,7 @@ func (l *linkEndpoint) WritePackets(pkts stack.PacketBufferList) (int, tcpip.Err
// control MTU (and by effect TCP MSS in gVisor) we *shouldn't* expect to
// ever overflow 128 slots (see wireguard-go/tun.ErrTooManySegments usage).
for _, pkt := range pkts.AsSlice() {
if err := l.q.Write(pkt); err != nil {
if err := ep.q.Write(pkt); err != nil {
if _, ok := err.(*tcpip.ErrNoBufferSpace); !ok && n == 0 {
return 0, err
}

View File

@@ -33,11 +33,13 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"gvisor.dev/gvisor/pkg/waiter"
"tailscale.com/envknob"
"tailscale.com/feature/buildfeatures"
"tailscale.com/ipn/ipnlocal"
"tailscale.com/metrics"
"tailscale.com/net/dns"
"tailscale.com/net/ipset"
"tailscale.com/net/netaddr"
"tailscale.com/net/netx"
"tailscale.com/net/packet"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tsdial"
@@ -208,7 +210,7 @@ type Impl struct {
// TCP connection to another host (e.g. in subnet router mode).
//
// This is currently only used in tests.
forwardDialFunc func(context.Context, string, string) (net.Conn, error)
forwardDialFunc netx.DialFunc
// forwardInFlightPerClientDropped is a metric that tracks how many
// in-flight TCP forward requests were dropped due to the per-client
@@ -326,10 +328,15 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
if tcpipErr != nil {
return nil, fmt.Errorf("could not disable TCP RACK: %v", tcpipErr)
}
cubicOpt := tcpip.CongestionControlOption("cubic")
tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &cubicOpt)
// gVisor defaults to reno at the time of writing. We explicitly set reno
// congestion control in order to prevent unexpected changes. Netstack
// has an int overflow in sender congestion window arithmetic that is more
// prone to trigger with cubic congestion control.
// See https://github.com/google/gvisor/issues/11632
renoOpt := tcpip.CongestionControlOption("reno")
tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &renoOpt)
if tcpipErr != nil {
return nil, fmt.Errorf("could not set cubic congestion control: %v", tcpipErr)
return nil, fmt.Errorf("could not set reno congestion control: %v", tcpipErr)
}
err := setTCPBufSizes(ipstack)
if err != nil {
@@ -337,7 +344,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
}
supportedGSOKind := stack.GSONotSupported
supportedGROKind := groNotSupported
if runtime.GOOS == "linux" {
if runtime.GOOS == "linux" && buildfeatures.HasGRO {
// TODO(jwhited): add Windows support https://github.com/tailscale/corp/issues/21874
supportedGROKind = tcpGROSupported
supportedGSOKind = stack.HostGSOSupported
@@ -571,9 +578,16 @@ func (ns *Impl) decrementInFlightTCPForward(tei stack.TransportEndpointID, remot
}
}
// LocalBackend is a fake name for *ipnlocal.LocalBackend to avoid an import cycle.
type LocalBackend = any
// Start sets up all the handlers so netstack can start working. Implements
// wgengine.FakeImpl.
func (ns *Impl) Start(lb *ipnlocal.LocalBackend) error {
func (ns *Impl) Start(b LocalBackend) error {
if b == nil {
panic("nil LocalBackend interface")
}
lb := b.(*ipnlocal.LocalBackend)
if lb == nil {
panic("nil LocalBackend")
}
@@ -637,13 +651,15 @@ func (ns *Impl) UpdateNetstackIPs(nm *netmap.NetworkMap) {
var selfNode tailcfg.NodeView
var serviceAddrSet set.Set[netip.Addr]
if nm != nil {
vipServiceIPMap := nm.GetVIPServiceIPMap()
serviceAddrSet = make(set.Set[netip.Addr], len(vipServiceIPMap)*2)
for _, addrs := range vipServiceIPMap {
serviceAddrSet.AddSlice(addrs)
}
ns.atomicIsLocalIPFunc.Store(ipset.NewContainsIPFunc(nm.GetAddresses()))
ns.atomicIsVIPServiceIPFunc.Store(serviceAddrSet.Contains)
if buildfeatures.HasServe {
vipServiceIPMap := nm.GetVIPServiceIPMap()
serviceAddrSet = make(set.Set[netip.Addr], len(vipServiceIPMap)*2)
for _, addrs := range vipServiceIPMap {
serviceAddrSet.AddSlice(addrs)
}
ns.atomicIsVIPServiceIPFunc.Store(serviceAddrSet.Contains)
}
selfNode = nm.SelfNode
} else {
ns.atomicIsLocalIPFunc.Store(ipset.FalseContainsIPFunc())
@@ -1026,6 +1042,9 @@ func (ns *Impl) isLocalIP(ip netip.Addr) bool {
// isVIPServiceIP reports whether ip is an IP address that's
// assigned to a VIP service.
func (ns *Impl) isVIPServiceIP(ip netip.Addr) bool {
if !buildfeatures.HasServe {
return false
}
return ns.atomicIsVIPServiceIPFunc.Load()(ip)
}
@@ -1068,7 +1087,7 @@ func (ns *Impl) shouldProcessInbound(p *packet.Parsed, t *tstun.Wrapper) bool {
return true
}
}
if isService {
if buildfeatures.HasServe && isService {
if p.IsEchoRequest() {
return true
}
@@ -1429,6 +1448,13 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) {
}
}
// tcpCloser is an interface to abstract around various TCPConn types that
// allow closing of the read and write streams independently of each other.
type tcpCloser interface {
CloseRead() error
CloseWrite() error
}
func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.TCPConn, clientRemoteIP netip.Addr, wq *waiter.Queue, dialAddr netip.AddrPort) (handled bool) {
dialAddrStr := dialAddr.String()
if debugNetstack() {
@@ -1457,7 +1483,7 @@ func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.
}()
// Attempt to dial the outbound connection before we accept the inbound one.
var dialFunc func(context.Context, string, string) (net.Conn, error)
var dialFunc netx.DialFunc
if ns.forwardDialFunc != nil {
dialFunc = ns.forwardDialFunc
} else {
@@ -1495,18 +1521,48 @@ func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.
}
defer client.Close()
// As of 2025-07-03, backend is always either a net.TCPConn
// from stdDialer.DialContext (which has the requisite functions),
// or nil from hangDialer in tests (in which case we would have
// errored out by now), so this conversion should always succeed.
backendTCPCloser, backendIsTCPCloser := backend.(tcpCloser)
connClosed := make(chan error, 2)
go func() {
_, err := io.Copy(backend, client)
if err != nil {
err = fmt.Errorf("client -> backend: %w", err)
}
connClosed <- err
err = nil
if backendIsTCPCloser {
err = backendTCPCloser.CloseWrite()
}
err = errors.Join(err, client.CloseRead())
if err != nil {
ns.logf("client -> backend close connection: %v", err)
}
}()
go func() {
_, err := io.Copy(client, backend)
if err != nil {
err = fmt.Errorf("backend -> client: %w", err)
}
connClosed <- err
err = nil
if backendIsTCPCloser {
err = backendTCPCloser.CloseRead()
}
err = errors.Join(err, client.CloseWrite())
if err != nil {
ns.logf("backend -> client close connection: %v", err)
}
}()
err = <-connClosed
if err != nil {
ns.logf("proxy connection closed with error: %v", err)
// Wait for both ends of the connection to close.
for range 2 {
err = <-connClosed
if err != nil {
ns.logf("proxy connection closed with error: %v", err)
}
}
ns.logf("[v2] netstack: forwarder connection to %s closed", dialAddrStr)
return
@@ -1849,7 +1905,6 @@ func (ns *Impl) ExpVar() expvar.Var {
{"option_unknown_received", ipStats.OptionUnknownReceived},
}
for _, metric := range ipMetrics {
metric := metric
m.Set("counter_ip_"+metric.name, expvar.Func(func() any {
return readStatCounter(metric.field)
}))
@@ -1876,7 +1931,6 @@ func (ns *Impl) ExpVar() expvar.Var {
{"errors", fwdStats.Errors},
}
for _, metric := range fwdMetrics {
metric := metric
m.Set("counter_ip_forward_"+metric.name, expvar.Func(func() any {
return readStatCounter(metric.field)
}))
@@ -1920,7 +1974,6 @@ func (ns *Impl) ExpVar() expvar.Var {
{"forward_max_in_flight_drop", tcpStats.ForwardMaxInFlightDrop},
}
for _, metric := range tcpMetrics {
metric := metric
m.Set("counter_tcp_"+metric.name, expvar.Func(func() any {
return readStatCounter(metric.field)
}))
@@ -1947,7 +2000,6 @@ func (ns *Impl) ExpVar() expvar.Var {
{"checksum_errors", udpStats.ChecksumErrors},
}
for _, metric := range udpMetrics {
metric := metric
m.Set("counter_udp_"+metric.name, expvar.Func(func() any {
return readStatCounter(metric.field)
}))

View File

@@ -13,6 +13,7 @@ import (
"runtime"
"time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/version/distro"
)
@@ -20,7 +21,7 @@ import (
// CAP_NET_RAW from tailscaled's binary.
var setAmbientCapsRaw func(*exec.Cmd)
var isSynology = runtime.GOOS == "linux" && distro.Get() == distro.Synology
var isSynology = runtime.GOOS == "linux" && buildfeatures.HasSynology && distro.Get() == distro.Synology
// sendOutboundUserPing sends a non-privileged ICMP (or ICMPv6) ping to dstIP with the given timeout.
func (ns *Impl) sendOutboundUserPing(dstIP netip.Addr, timeout time.Duration) error {
@@ -61,7 +62,7 @@ func (ns *Impl) sendOutboundUserPing(dstIP netip.Addr, timeout time.Duration) er
ping = "/bin/ping"
}
cmd := exec.Command(ping, "-c", "1", "-W", "3", dstIP.String())
if isSynology && os.Getuid() != 0 {
if buildfeatures.HasSynology && isSynology && os.Getuid() != 0 {
// On DSM7 we run as non-root and need to pass
// CAP_NET_RAW if our binary has it.
setAmbientCapsRaw(cmd)

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_debug
package wgengine
import (
@@ -20,6 +22,8 @@ import (
"tailscale.com/wgengine/filter"
)
type flowtrackTuple = flowtrack.Tuple
const tcpTimeoutBeforeDebug = 5 * time.Second
type pendingOpenFlow struct {
@@ -56,6 +60,10 @@ func (e *userspaceEngine) noteFlowProblemFromPeer(f flowtrack.Tuple, problem pac
of.problem = problem
}
func tsRejectFlow(rh packet.TailscaleRejectedHeader) flowtrack.Tuple {
return flowtrack.MakeTuple(rh.Proto, rh.Src, rh.Dst)
}
func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
res = filter.Accept // always
@@ -66,8 +74,8 @@ func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapp
return
}
if rh.MaybeBroken {
e.noteFlowProblemFromPeer(rh.Flow(), rh.Reason)
} else if f := rh.Flow(); e.removeFlow(f) {
e.noteFlowProblemFromPeer(tsRejectFlow(rh), rh.Reason)
} else if f := tsRejectFlow(rh); e.removeFlow(f) {
e.logf("open-conn-track: flow %v %v > %v rejected due to %v", rh.Proto, rh.Src, rh.Dst, rh.Reason)
}
return

24
vendor/tailscale.com/wgengine/pendopen_omit.go generated vendored Normal file
View File

@@ -0,0 +1,24 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ts_omit_debug
package wgengine
import (
"tailscale.com/net/packet"
"tailscale.com/net/tstun"
"tailscale.com/wgengine/filter"
)
type flowtrackTuple = struct{}
type pendingOpenFlow struct{}
func (*userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
panic("unreachable")
}
func (*userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wrapper) (res filter.Response) {
panic("unreachable")
}

View File

@@ -56,13 +56,6 @@ func (r *CallbackRouter) Set(rcfg *Config) error {
return r.SetBoth(r.rcfg, r.dcfg)
}
// UpdateMagicsockPort implements the Router interface. This implementation
// does nothing and returns nil because this router does not currently need
// to know what the magicsock UDP port is.
func (r *CallbackRouter) UpdateMagicsockPort(_ uint16, _ string) error {
return nil
}
// SetDNS implements dns.OSConfigurator.
func (r *CallbackRouter) SetDNS(dcfg dns.OSConfig) error {
r.mu.Lock()

View File

@@ -1,834 +0,0 @@
/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2019 WireGuard LLC. All Rights Reserved.
*/
package router
import (
"errors"
"fmt"
"log"
"net/netip"
"slices"
"sort"
"time"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tstun"
"tailscale.com/util/multierr"
"tailscale.com/wgengine/winnet"
ole "github.com/go-ole/go-ole"
"github.com/tailscale/wireguard-go/tun"
"go4.org/netipx"
"golang.org/x/sys/windows"
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
)
// monitorDefaultRoutes subscribes to route change events and updates
// the Tailscale tunnel interface's MTU to match that of the
// underlying default route.
//
// This is an attempt at making the MTU mostly correct, but in
// practice this entire piece of code ends up just using the 1280
// value passed in at device construction time. This code might make
// the MTU go lower due to very low-MTU IPv4 interfaces.
//
// TODO: this code is insufficient to control the MTU correctly. The
// correct way to do it is per-peer PMTU discovery, and synthesizing
// ICMP fragmentation-needed messages within tailscaled. This code may
// address a few rare corner cases, but is unlikely to significantly
// help with MTU issues compared to a static 1280B implementation.
func monitorDefaultRoutes(tun *tun.NativeTun) (*winipcfg.RouteChangeCallback, error) {
ourLuid := winipcfg.LUID(tun.LUID())
lastMtu := uint32(0)
doIt := func() error {
mtu, err := getDefaultRouteMTU()
if err != nil {
return fmt.Errorf("error getting default route MTU: %w", err)
}
if mtu > 0 && (lastMtu == 0 || lastMtu != mtu) {
iface, err := ourLuid.IPInterface(windows.AF_INET)
if err != nil {
if !errors.Is(err, windows.ERROR_NOT_FOUND) {
return fmt.Errorf("getting v4 interface: %w", err)
}
} else {
iface.NLMTU = mtu - 80
// If the TUN device was created with a smaller MTU,
// though, such as 1280, we don't want to go bigger
// than configured. (See the comment on minimalMTU in
// the wgengine package.)
if min, err := tun.MTU(); err == nil && min < int(iface.NLMTU) {
iface.NLMTU = uint32(min)
}
if iface.NLMTU < 576 {
iface.NLMTU = 576
}
err = iface.Set()
if err != nil {
return fmt.Errorf("error setting v4 MTU: %w", err)
}
tun.ForceMTU(int(iface.NLMTU))
}
iface, err = ourLuid.IPInterface(windows.AF_INET6)
if err != nil {
if !errors.Is(err, windows.ERROR_NOT_FOUND) {
return fmt.Errorf("error getting v6 interface: %w", err)
}
} else {
iface.NLMTU = mtu - 80
if iface.NLMTU < 1280 {
iface.NLMTU = 1280
}
err = iface.Set()
if err != nil {
return fmt.Errorf("error setting v6 MTU: %w", err)
}
}
lastMtu = mtu
}
return nil
}
err := doIt()
if err != nil {
return nil, err
}
cb, err := winipcfg.RegisterRouteChangeCallback(func(notificationType winipcfg.MibNotificationType, route *winipcfg.MibIPforwardRow2) {
//fmt.Printf("MonitorDefaultRoutes: changed: %v\n", route.DestinationPrefix)
if route.DestinationPrefix.PrefixLength == 0 {
_ = doIt()
}
})
if err != nil {
return nil, err
}
return cb, nil
}
func getDefaultRouteMTU() (uint32, error) {
mtus, err := netmon.NonTailscaleMTUs()
if err != nil {
return 0, err
}
routes, err := winipcfg.GetIPForwardTable2(windows.AF_INET)
if err != nil {
return 0, err
}
best := ^uint32(0)
mtu := uint32(0)
for _, route := range routes {
if route.DestinationPrefix.PrefixLength != 0 {
continue
}
routeMTU := mtus[route.InterfaceLUID]
if routeMTU == 0 {
continue
}
if route.Metric < best {
best = route.Metric
mtu = routeMTU
}
}
routes, err = winipcfg.GetIPForwardTable2(windows.AF_INET6)
if err != nil {
return 0, err
}
best = ^uint32(0)
for _, route := range routes {
if route.DestinationPrefix.PrefixLength != 0 {
continue
}
routeMTU := mtus[route.InterfaceLUID]
if routeMTU == 0 {
continue
}
if route.Metric < best {
best = route.Metric
if routeMTU < mtu {
mtu = routeMTU
}
}
}
return mtu, nil
}
// setPrivateNetwork marks the provided network adapter's category to private.
// It returns (false, nil) if the adapter was not found.
func setPrivateNetwork(ifcLUID winipcfg.LUID) (bool, error) {
// NLM_NETWORK_CATEGORY values.
const (
categoryPublic = 0
categoryPrivate = 1
categoryDomain = 2
)
ifcGUID, err := ifcLUID.GUID()
if err != nil {
return false, fmt.Errorf("ifcLUID.GUID: %v", err)
}
// aaron: DO NOT call Initialize() or Uninitialize() on c!
// We've already handled that process-wide.
var c ole.Connection
m, err := winnet.NewNetworkListManager(&c)
if err != nil {
return false, fmt.Errorf("winnet.NewNetworkListManager: %v", err)
}
defer m.Release()
cl, err := m.GetNetworkConnections()
if err != nil {
return false, fmt.Errorf("m.GetNetworkConnections: %v", err)
}
defer cl.Release()
for _, nco := range cl {
aid, err := nco.GetAdapterId()
if err != nil {
return false, fmt.Errorf("nco.GetAdapterId: %v", err)
}
if aid != ifcGUID.String() {
continue
}
n, err := nco.GetNetwork()
if err != nil {
return false, fmt.Errorf("GetNetwork: %v", err)
}
defer n.Release()
cat, err := n.GetCategory()
if err != nil {
return false, fmt.Errorf("GetCategory: %v", err)
}
if cat != categoryPrivate && cat != categoryDomain {
if err := n.SetCategory(categoryPrivate); err != nil {
return false, fmt.Errorf("SetCategory: %v", err)
}
}
return true, nil
}
return false, nil
}
// interfaceFromLUID returns IPAdapterAddresses with specified LUID.
func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.IPAdapterAddresses, error) {
addresses, err := winipcfg.GetAdaptersAddresses(windows.AF_UNSPEC, flags)
if err != nil {
return nil, err
}
for _, addr := range addresses {
if addr.LUID == luid {
return addr, nil
}
}
return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid)
}
var networkCategoryWarnable = health.Register(&health.Warnable{
Code: "set-network-category-failed",
Severity: health.SeverityMedium,
Title: "Windows network configuration failed",
Text: func(args health.Args) string {
return fmt.Sprintf("Failed to set the network category to private on the Tailscale adapter. This may prevent Tailscale from working correctly. Error: %s", args[health.ArgError])
},
MapDebugFlag: "warn-network-category-unhealthy",
})
func configureInterface(cfg *Config, tun *tun.NativeTun, ht *health.Tracker) (retErr error) {
var mtu = tstun.DefaultTUNMTU()
luid := winipcfg.LUID(tun.LUID())
iface, err := interfaceFromLUID(luid,
// Issue 474: on early boot, when the network is still
// coming up, if the Tailscale service comes up first,
// the Tailscale adapter it finds might not have the
// IPv4 service available yet? Try this flag:
winipcfg.GAAFlagIncludeAllInterfaces,
)
if err != nil {
return fmt.Errorf("getting interface: %w", err)
}
// Send non-nil return errors to retErrc, to interrupt our background
// setPrivateNetwork goroutine.
retErrc := make(chan error, 1)
defer func() {
if retErr != nil {
retErrc <- retErr
}
}()
go func() {
// It takes a weirdly long time for Windows to notice the
// new interface has come up. Poll periodically until it
// does.
const tries = 20
for i := range tries {
found, err := setPrivateNetwork(luid)
if err != nil {
ht.SetUnhealthy(networkCategoryWarnable, health.Args{health.ArgError: err.Error()})
log.Printf("setPrivateNetwork(try=%d): %v", i, err)
} else {
ht.SetHealthy(networkCategoryWarnable)
if found {
if i > 0 {
log.Printf("setPrivateNetwork(try=%d): success", i)
}
return
}
log.Printf("setPrivateNetwork(try=%d): not found", i)
}
select {
case <-time.After(time.Second):
case <-retErrc:
return
}
}
log.Printf("setPrivateNetwork: adapter LUID %v not found after %d tries, giving up", luid, tries)
}()
// Figure out which of IPv4 and IPv6 are available. Both protocols
// can be disabled on a per-interface basis by the user, as well
// as globally via a registry policy. We skip programming anything
// related to the disabled protocols, since by definition they're
// unusable.
ipif4, err := iface.LUID.IPInterface(windows.AF_INET)
if err != nil {
if !errors.Is(err, windows.ERROR_NOT_FOUND) {
return fmt.Errorf("getting AF_INET interface: %w", err)
}
log.Printf("AF_INET interface not found on Tailscale adapter, skipping IPv4 programming")
ipif4 = nil
}
ipif6, err := iface.LUID.IPInterface(windows.AF_INET6)
if err != nil {
if !errors.Is(err, windows.ERROR_NOT_FOUND) {
return fmt.Errorf("getting AF_INET6 interface: %w", err)
}
log.Printf("AF_INET6 interface not found on Tailscale adapter, skipping IPv6 programming")
ipif6 = nil
}
// Windows requires routes to have a nexthop. Routes created using
// the interface's local IP address or an unspecified IP address
// ("0.0.0.0" or "::") as the nexthop are considered on-link routes.
//
// Notably, Windows treats on-link subnet routes differently, reserving the last
// IP in the range as the broadcast IP and therefore prohibiting TCP connections
// to it, resulting in WSA error 10049: "The requested address is not valid in its context."
// This does not happen with single-host routes, such as routes to Tailscale IP addresses,
// but becomes a problem with advertised subnets when all IPs in the range should be reachable.
// See https://github.com/tailscale/support-escalations/issues/57 for details.
//
// For routes such as ours where the nexthop is meaningless, we can use an
// arbitrary nexthop address, such as TailscaleServiceIP, to prevent the
// routes from being marked as on-link. We can still create on-link routes
// for single-host Tailscale routes, but we shouldn't attempt to create a
// route for the interface's own IP.
var localAddr4, localAddr6 netip.Addr
var gatewayAddr4, gatewayAddr6 netip.Addr
addresses := make([]netip.Prefix, 0, len(cfg.LocalAddrs))
for _, addr := range cfg.LocalAddrs {
if (addr.Addr().Is4() && ipif4 == nil) || (addr.Addr().Is6() && ipif6 == nil) {
// Can't program addresses for disabled protocol.
continue
}
addresses = append(addresses, addr)
if addr.Addr().Is4() && !gatewayAddr4.IsValid() {
localAddr4 = addr.Addr()
gatewayAddr4 = tsaddr.TailscaleServiceIP()
} else if addr.Addr().Is6() && !gatewayAddr6.IsValid() {
localAddr6 = addr.Addr()
gatewayAddr6 = tsaddr.TailscaleServiceIPv6()
}
}
var routes []*routeData
foundDefault4 := false
foundDefault6 := false
for _, route := range cfg.Routes {
if (route.Addr().Is4() && ipif4 == nil) || (route.Addr().Is6() && ipif6 == nil) {
// Can't program routes for disabled protocol.
continue
}
if route.Addr().Is6() && !gatewayAddr6.IsValid() {
// Windows won't let us set IPv6 routes without having an
// IPv6 local address set. However, when we've configured
// a default route, we want to forcibly grab IPv6 traffic
// even if the v6 overlay network isn't configured. To do
// that, we add a dummy local IPv6 address to serve as a
// route source.
ip := tsaddr.Tailscale4To6Placeholder()
addresses = append(addresses, netip.PrefixFrom(ip, ip.BitLen()))
gatewayAddr6 = ip
} else if route.Addr().Is4() && !gatewayAddr4.IsValid() {
// TODO: do same dummy behavior as v6?
return errors.New("due to a Windows limitation, one cannot have interface routes without an interface address")
}
var gateway, localAddr netip.Addr
if route.Addr().Is4() {
localAddr = localAddr4
gateway = gatewayAddr4
} else if route.Addr().Is6() {
localAddr = localAddr6
gateway = gatewayAddr6
}
switch destAddr := route.Addr().Unmap(); {
case destAddr == localAddr:
// no need to add a route for the interface's
// own IP. The kernel does that for us.
// If we try to replace it, we'll fail to
// add the route unless NextHop is set, but
// then the interface's IP won't be pingable.
continue
case route.IsSingleIP() && (destAddr == gateway || tsaddr.IsTailscaleIP(destAddr)):
// add an on-link route if the destination
// is the nexthop itself or a single Tailscale IP.
gateway = localAddr
}
r := &routeData{
RouteData: winipcfg.RouteData{
Destination: route,
NextHop: gateway,
Metric: 0,
},
}
if route.Addr().Is4() {
if route.Bits() == 0 {
foundDefault4 = true
}
} else if route.Addr().Is6() {
if route.Bits() == 0 {
foundDefault6 = true
}
}
routes = append(routes, r)
}
err = syncAddresses(iface, addresses)
if err != nil {
return fmt.Errorf("syncAddresses: %w", err)
}
slices.SortFunc(routes, (*routeData).Compare)
deduplicatedRoutes := []*routeData{}
for i := range len(routes) {
// There's only one way to get to a given IP+Mask, so delete
// all matches after the first.
if i > 0 && routes[i].Destination == routes[i-1].Destination {
continue
}
deduplicatedRoutes = append(deduplicatedRoutes, routes[i])
}
// Re-read interface after syncAddresses.
iface, err = interfaceFromLUID(luid,
// Issue 474: on early boot, when the network is still
// coming up, if the Tailscale service comes up first,
// the Tailscale adapter it finds might not have the
// IPv4 service available yet? Try this flag:
winipcfg.GAAFlagIncludeAllInterfaces,
)
if err != nil {
return fmt.Errorf("getting interface: %w", err)
}
var errAcc error
err = syncRoutes(iface, deduplicatedRoutes, cfg.LocalAddrs)
if err != nil && errAcc == nil {
log.Printf("setroutes: %v", err)
errAcc = err
}
if ipif4 != nil {
ipif4, err = iface.LUID.IPInterface(windows.AF_INET)
if err != nil {
return fmt.Errorf("getting AF_INET interface: %w", err)
}
if foundDefault4 {
ipif4.UseAutomaticMetric = false
ipif4.Metric = 0
}
if mtu > 0 {
ipif4.NLMTU = uint32(mtu)
tun.ForceMTU(int(ipif4.NLMTU))
}
err = ipif4.Set()
if err != nil && errAcc == nil {
errAcc = err
}
}
if ipif6 != nil {
ipif6, err = iface.LUID.IPInterface(windows.AF_INET6)
if err != nil {
return fmt.Errorf("getting AF_INET6 interface: %w", err)
} else {
if foundDefault6 {
ipif6.UseAutomaticMetric = false
ipif6.Metric = 0
}
if mtu > 0 {
ipif6.NLMTU = uint32(mtu)
}
ipif6.DadTransmits = 0
ipif6.RouterDiscoveryBehavior = winipcfg.RouterDiscoveryDisabled
err = ipif6.Set()
if err != nil && errAcc == nil {
errAcc = err
}
}
}
return errAcc
}
func netCompare(a, b netip.Prefix) int {
aip, bip := a.Addr().Unmap(), b.Addr().Unmap()
v := aip.Compare(bip)
if v != 0 {
return v
}
if a.Bits() == b.Bits() {
return 0
}
// narrower first
if a.Bits() > b.Bits() {
return -1
}
return 1
}
func sortNets(s []netip.Prefix) {
sort.Slice(s, func(i, j int) bool {
return netCompare(s[i], s[j]) == -1
})
}
// deltaNets returns the changes to turn a into b.
func deltaNets(a, b []netip.Prefix) (add, del []netip.Prefix) {
add = make([]netip.Prefix, 0, len(b))
del = make([]netip.Prefix, 0, len(a))
sortNets(a)
sortNets(b)
i := 0
j := 0
for i < len(a) && j < len(b) {
switch netCompare(a[i], b[j]) {
case -1:
// a < b, delete
del = append(del, a[i])
i++
case 0:
// a == b, no diff
i++
j++
case 1:
// a > b, add missing entry
add = append(add, b[j])
j++
default:
panic("unexpected compare result")
}
}
del = append(del, a[i:]...)
add = append(add, b[j:]...)
return
}
func isIPv6LinkLocal(a netip.Prefix) bool {
return a.Addr().Is6() && a.Addr().IsLinkLocalUnicast()
}
// ipAdapterUnicastAddressToPrefix converts windows.IpAdapterUnicastAddress to netip.Prefix
func ipAdapterUnicastAddressToPrefix(u *windows.IpAdapterUnicastAddress) netip.Prefix {
ip, _ := netip.AddrFromSlice(u.Address.IP())
return netip.PrefixFrom(ip.Unmap(), int(u.OnLinkPrefixLength))
}
// unicastIPNets returns all unicast net.IPNet for ifc interface.
func unicastIPNets(ifc *winipcfg.IPAdapterAddresses) []netip.Prefix {
var nets []netip.Prefix
for addr := ifc.FirstUnicastAddress; addr != nil; addr = addr.Next {
nets = append(nets, ipAdapterUnicastAddressToPrefix(addr))
}
return nets
}
// syncAddresses incrementally sets the interface's unicast IP addresses,
// doing the minimum number of AddAddresses & DeleteAddress calls.
// This avoids the full FlushAddresses.
//
// Any IPv6 link-local addresses are not deleted out of caution as some
// configurations may repeatedly re-add them. Link-local addresses are adjusted
// to set SkipAsSource. SkipAsSource prevents the addresses from being added to
// DNS locally or remotely and from being picked as a source address for
// outgoing packets with unspecified sources. See #4647 and
// https://web.archive.org/web/20200912120956/https://devblogs.microsoft.com/scripting/use-powershell-to-change-ip-behavior-with-skipassource/
func syncAddresses(ifc *winipcfg.IPAdapterAddresses, want []netip.Prefix) error {
var erracc error
got := unicastIPNets(ifc)
add, del := deltaNets(got, want)
ll := make([]netip.Prefix, 0)
for _, a := range del {
// do not delete link-local addresses, and collect them for later
// applying SkipAsSource.
if isIPv6LinkLocal(a) {
ll = append(ll, a)
continue
}
err := ifc.LUID.DeleteIPAddress(a)
if err != nil {
erracc = fmt.Errorf("deleting IP %q: %w", a, err)
}
}
for _, a := range add {
err := ifc.LUID.AddIPAddress(a)
if err != nil {
erracc = fmt.Errorf("adding IP %q: %w", a, err)
}
}
for _, a := range ll {
mib, err := ifc.LUID.IPAddress(a.Addr())
if err != nil {
erracc = fmt.Errorf("setting skip-as-source on IP %q: unable to retrieve MIB: %w", a, err)
continue
}
if !mib.SkipAsSource {
mib.SkipAsSource = true
if err := mib.Set(); err != nil {
erracc = fmt.Errorf("setting skip-as-source on IP %q: unable to set MIB: %w", a, err)
}
}
}
return erracc
}
// routeData wraps winipcfg.RouteData with an additional field that permits
// caching of the associated MibIPForwardRow2; by keeping it around, we can
// avoid unnecessary (and slow) lookups of information that we already have.
type routeData struct {
winipcfg.RouteData
Row *winipcfg.MibIPforwardRow2
}
func (rd *routeData) Less(other *routeData) bool {
return rd.Compare(other) < 0
}
func (rd *routeData) Compare(other *routeData) int {
v := rd.Destination.Addr().Compare(other.Destination.Addr())
if v != 0 {
return v
}
// Narrower masks first
b1, b2 := rd.Destination.Bits(), other.Destination.Bits()
if b1 != b2 {
if b1 > b2 {
return -1
}
return 1
}
// No nexthop before non-empty nexthop
v = rd.NextHop.Compare(other.NextHop)
if v != 0 {
return v
}
// Lower metrics first
if rd.Metric < other.Metric {
return -1
} else if rd.Metric > other.Metric {
return 1
}
return 0
}
func deltaRouteData(a, b []*routeData) (add, del []*routeData) {
add = make([]*routeData, 0, len(b))
del = make([]*routeData, 0, len(a))
slices.SortFunc(a, (*routeData).Compare)
slices.SortFunc(b, (*routeData).Compare)
i := 0
j := 0
for i < len(a) && j < len(b) {
switch a[i].Compare(b[j]) {
case -1:
// a < b, delete
del = append(del, a[i])
i++
case 0:
// a == b, no diff
i++
j++
case 1:
// a > b, add missing entry
add = append(add, b[j])
j++
default:
panic("unexpected compare result")
}
}
del = append(del, a[i:]...)
add = append(add, b[j:]...)
return
}
// getInterfaceRoutes returns all the interface's routes.
// Corresponds to GetIpForwardTable2 function, but filtered by interface.
func getInterfaceRoutes(ifc *winipcfg.IPAdapterAddresses, family winipcfg.AddressFamily) (matches []*winipcfg.MibIPforwardRow2, err error) {
routes, err := winipcfg.GetIPForwardTable2(family)
if err != nil {
return nil, err
}
for i := range routes {
if routes[i].InterfaceLUID == ifc.LUID {
matches = append(matches, &routes[i])
}
}
return
}
func getAllInterfaceRoutes(ifc *winipcfg.IPAdapterAddresses) ([]*routeData, error) {
routes4, err := getInterfaceRoutes(ifc, windows.AF_INET)
if err != nil {
return nil, err
}
routes6, err := getInterfaceRoutes(ifc, windows.AF_INET6)
if err != nil {
// TODO: what if v6 unavailable?
return nil, err
}
rd := make([]*routeData, 0, len(routes4)+len(routes6))
for _, r := range routes4 {
rd = append(rd, &routeData{
RouteData: winipcfg.RouteData{
Destination: r.DestinationPrefix.Prefix(),
NextHop: r.NextHop.Addr(),
Metric: r.Metric,
},
Row: r,
})
}
for _, r := range routes6 {
rd = append(rd, &routeData{
RouteData: winipcfg.RouteData{
Destination: r.DestinationPrefix.Prefix(),
NextHop: r.NextHop.Addr(),
Metric: r.Metric,
},
Row: r,
})
}
return rd, nil
}
// filterRoutes removes routes that have been added by Windows and should not
// be managed by us.
func filterRoutes(routes []*routeData, dontDelete []netip.Prefix) []*routeData {
ddm := make(map[netip.Prefix]bool)
for _, dd := range dontDelete {
// See issue 1448: we don't want to touch the routes added
// by Windows for our interface addresses.
ddm[dd] = true
}
for _, r := range routes {
// We don't want to touch broadcast routes that Windows adds.
nr := r.Destination
if !nr.IsValid() {
continue
}
if nr.IsSingleIP() {
continue
}
lastIP := netipx.RangeOfPrefix(nr).To()
ddm[netip.PrefixFrom(lastIP, lastIP.BitLen())] = true
}
filtered := make([]*routeData, 0, len(routes))
for _, r := range routes {
rr := r.Destination
if rr.IsValid() && ddm[rr] {
continue
}
filtered = append(filtered, r)
}
return filtered
}
// syncRoutes incrementally sets multiples routes on an interface.
// This avoids a full ifc.FlushRoutes call.
// dontDelete is a list of interface address routes that the
// synchronization logic should never delete.
func syncRoutes(ifc *winipcfg.IPAdapterAddresses, want []*routeData, dontDelete []netip.Prefix) error {
existingRoutes, err := getAllInterfaceRoutes(ifc)
if err != nil {
return err
}
got := filterRoutes(existingRoutes, dontDelete)
add, del := deltaRouteData(got, want)
var errs []error
for _, a := range del {
var err error
if a.Row == nil {
// DeleteRoute requires a routing table lookup, so only do that if
// a does not already have the row.
err = ifc.LUID.DeleteRoute(a.Destination, a.NextHop)
} else {
// Otherwise, delete the row directly.
err = a.Row.Delete()
}
if err != nil {
dstStr := a.Destination.String()
if dstStr == "169.254.255.255/32" {
// Issue 785. Ignore these routes
// failing to delete. Harmless.
// TODO(maisem): do we still need this?
continue
}
errs = append(errs, fmt.Errorf("deleting route %v: %w", dstStr, err))
}
}
for _, a := range add {
err := ifc.LUID.AddRoute(a.Destination, a.NextHop, a.Metric)
if err != nil {
errs = append(errs, fmt.Errorf("adding route %v: %w", &a.Destination, err))
}
}
return multierr.New(errs...)
}

View File

@@ -6,14 +6,21 @@
package router
import (
"errors"
"fmt"
"net/netip"
"reflect"
"runtime"
"slices"
"github.com/tailscale/wireguard-go/tun"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/types/preftype"
"tailscale.com/util/eventbus"
)
// Router is responsible for managing the system network stack.
@@ -28,33 +35,70 @@ type Router interface {
// implementation should handle gracefully.
Set(*Config) error
// UpdateMagicsockPort tells the OS network stack what port magicsock
// is currently listening on, so it can be threaded through firewalls
// and such. This is distinct from Set() since magicsock may rebind
// ports independently from the Config changing.
//
// network should be either "udp4" or "udp6".
UpdateMagicsockPort(port uint16, network string) error
// Close closes the router.
Close() error
}
// NewOpts are the options passed to the NewUserspaceRouter hook.
type NewOpts struct {
Logf logger.Logf // required
Tun tun.Device // required
NetMon *netmon.Monitor // optional
Health *health.Tracker // required (but TODO: support optional later)
Bus *eventbus.Bus // required
}
// PortUpdate is an eventbus value, reporting the port and address family
// magicsock is currently listening on, so it can be threaded through firewalls
// and such.
type PortUpdate struct {
UDPPort uint16
EndpointNetwork string // either "udp4" or "udp6".
}
// HookNewUserspaceRouter is the registration point for router implementations
// to register a constructor for userspace routers. It's meant for implementations
// in wgengine/router/osrouter.
//
// If no implementation is registered, [New] will return an error.
var HookNewUserspaceRouter feature.Hook[func(NewOpts) (Router, error)]
// New returns a new Router for the current platform, using the
// provided tun device.
//
// If netMon is nil, it's not used. It's currently (2021-07-20) only
// used on Linux in some situations.
func New(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func New(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor,
health *health.Tracker, bus *eventbus.Bus,
) (Router, error) {
logf = logger.WithPrefix(logf, "router: ")
return newUserspaceRouter(logf, tundev, netMon, health)
if f, ok := HookNewUserspaceRouter.GetOk(); ok {
return f(NewOpts{
Logf: logf,
Tun: tundev,
NetMon: netMon,
Health: health,
Bus: bus,
})
}
if !buildfeatures.HasOSRouter {
return nil, errors.New("router: tailscaled was built without OSRouter support")
}
return nil, fmt.Errorf("unsupported OS %q", runtime.GOOS)
}
// HookCleanUp is the optional registration point for router implementations
// to register a cleanup function for [CleanUp] to use. It's meant for
// implementations in wgengine/router/osrouter.
var HookCleanUp feature.Hook[func(_ logger.Logf, _ *netmon.Monitor, ifName string)]
// CleanUp restores the system network configuration to its original state
// in case the Tailscale daemon terminated without closing the router.
// No other state needs to be instantiated before this runs.
func CleanUp(logf logger.Logf, netMon *netmon.Monitor, interfaceName string) {
cleanUp(logf, interfaceName)
if f, ok := HookCleanUp.GetOk(); ok {
f(logf, netMon, interfaceName)
}
}
// Config is the subset of Tailscale configuration that is relevant to
@@ -91,7 +135,7 @@ type Config struct {
SNATSubnetRoutes bool // SNAT traffic to local subnets
StatefulFiltering bool // Apply stateful filtering to inbound connections
NetfilterMode preftype.NetfilterMode // how much to manage netfilter rules
NetfilterKind string // what kind of netfilter to use (nftables, iptables)
NetfilterKind string // what kind of netfilter to use ("nftables", "iptables", or "" to auto-detect)
}
func (a *Config) Equal(b *Config) bool {
@@ -104,7 +148,14 @@ func (a *Config) Equal(b *Config) bool {
return reflect.DeepEqual(a, b)
}
// shutdownConfig is a routing configuration that removes all router
// state from the OS. It's the config used when callers pass in a nil
// Config.
var shutdownConfig = Config{}
func (c *Config) Clone() *Config {
if c == nil {
return nil
}
c2 := *c
c2.LocalAddrs = slices.Clone(c.LocalAddrs)
c2.Routes = slices.Clone(c.Routes)
c2.LocalRoutes = slices.Clone(c.LocalRoutes)
c2.SubnetRoutes = slices.Clone(c.SubnetRoutes)
return &c2
}

View File

@@ -1,19 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package router
import (
"github.com/tailscale/wireguard-go/tun"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
)
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
return newUserspaceBSDRouter(logf, tundev, netMon, health)
}
func cleanUp(logger.Logf, string) {
// Nothing to do.
}

View File

@@ -1,24 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !windows && !linux && !darwin && !openbsd && !freebsd
package router
import (
"fmt"
"runtime"
"github.com/tailscale/wireguard-go/tun"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
)
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
return nil, fmt.Errorf("unsupported OS %q", runtime.GOOS)
}
func cleanUp(logf logger.Logf, interfaceName string) {
// Nothing to do here.
}

View File

@@ -27,11 +27,6 @@ func (r fakeRouter) Set(cfg *Config) error {
return nil
}
func (r fakeRouter) UpdateMagicsockPort(_ uint16, _ string) error {
r.logf("[v1] warning: fakeRouter.UpdateMagicsockPort: not implemented.")
return nil
}
func (r fakeRouter) Close() error {
r.logf("[v1] warning: fakeRouter.Close: not implemented.")
return nil

View File

@@ -1,31 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package router
import (
"github.com/tailscale/wireguard-go/tun"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
)
// For now this router only supports the userspace WireGuard implementations.
//
// Work is currently underway for an in-kernel FreeBSD implementation of wireguard
// https://svnweb.freebsd.org/base?view=revision&revision=357986
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
return newUserspaceBSDRouter(logf, tundev, netMon, health)
}
func cleanUp(logf logger.Logf, interfaceName string) {
// If the interface was left behind, ifconfig down will not remove it.
// In fact, this will leave a system in a tainted state where starting tailscaled
// will result in "interface tailscale0 already exists"
// until the defunct interface is ifconfig-destroyed.
ifup := []string{"ifconfig", interfaceName, "destroy"}
if out, err := cmd(ifup...).CombinedOutput(); err != nil {
logf("ifconfig destroy: %v\n%s", err, out)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,249 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package router
import (
"errors"
"fmt"
"log"
"net/netip"
"os/exec"
"github.com/tailscale/wireguard-go/tun"
"go4.org/netipx"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/set"
)
// For now this router only supports the WireGuard userspace implementation.
// There is an experimental kernel version in the works for OpenBSD:
// https://git.zx2c4.com/wireguard-openbsd.
type openbsdRouter struct {
logf logger.Logf
netMon *netmon.Monitor
tunname string
local4 netip.Prefix
local6 netip.Prefix
routes set.Set[netip.Prefix]
}
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
tunname, err := tundev.Name()
if err != nil {
return nil, err
}
return &openbsdRouter{
logf: logf,
netMon: netMon,
tunname: tunname,
}, nil
}
func cmd(args ...string) *exec.Cmd {
if len(args) == 0 {
log.Fatalf("exec.Cmd(%#v) invalid; need argv[0]", args)
}
return exec.Command(args[0], args[1:]...)
}
func (r *openbsdRouter) Up() error {
ifup := []string{"ifconfig", r.tunname, "up"}
if out, err := cmd(ifup...).CombinedOutput(); err != nil {
r.logf("running ifconfig failed: %v\n%s", err, out)
return err
}
return nil
}
func inet(p netip.Prefix) string {
if p.Addr().Is6() {
return "inet6"
}
return "inet"
}
func (r *openbsdRouter) Set(cfg *Config) error {
if cfg == nil {
cfg = &shutdownConfig
}
// TODO: support configuring multiple local addrs on interface.
if len(cfg.LocalAddrs) == 0 {
return nil
}
numIPv4 := 0
numIPv6 := 0
localAddr4 := netip.Prefix{}
localAddr6 := netip.Prefix{}
for _, addr := range cfg.LocalAddrs {
if addr.Addr().Is4() {
numIPv4++
localAddr4 = addr
}
if addr.Addr().Is6() {
numIPv6++
localAddr6 = addr
}
}
if numIPv4 > 1 || numIPv6 > 1 {
return errors.New("openbsd doesn't support setting multiple local addrs yet")
}
var errq error
if localAddr4 != r.local4 {
if r.local4.IsValid() {
addrdel := []string{"ifconfig", r.tunname,
"inet", r.local4.String(), "-alias"}
out, err := cmd(addrdel...).CombinedOutput()
if err != nil {
r.logf("addr del failed: %v: %v\n%s", addrdel, err, out)
if errq == nil {
errq = err
}
}
routedel := []string{"route", "-q", "-n",
"del", "-inet", r.local4.String(),
"-iface", r.local4.Addr().String()}
if out, err := cmd(routedel...).CombinedOutput(); err != nil {
r.logf("route del failed: %v: %v\n%s", routedel, err, out)
if errq == nil {
errq = err
}
}
}
if localAddr4.IsValid() {
addradd := []string{"ifconfig", r.tunname,
"inet", localAddr4.String(), "alias"}
out, err := cmd(addradd...).CombinedOutput()
if err != nil {
r.logf("addr add failed: %v: %v\n%s", addradd, err, out)
if errq == nil {
errq = err
}
}
routeadd := []string{"route", "-q", "-n",
"add", "-inet", localAddr4.String(),
"-iface", localAddr4.Addr().String()}
if out, err := cmd(routeadd...).CombinedOutput(); err != nil {
r.logf("route add failed: %v: %v\n%s", routeadd, err, out)
if errq == nil {
errq = err
}
}
}
}
if localAddr6.IsValid() {
// in https://github.com/tailscale/tailscale/issues/1307 we made
// FreeBSD use a /48 for IPv6 addresses, which is nice because we
// don't need to additionally add routing entries. Do that here too.
localAddr6 = netip.PrefixFrom(localAddr6.Addr(), 48)
}
if localAddr6 != r.local6 {
if r.local6.IsValid() {
addrdel := []string{"ifconfig", r.tunname,
"inet6", r.local6.String(), "delete"}
out, err := cmd(addrdel...).CombinedOutput()
if err != nil {
r.logf("addr del failed: %v: %v\n%s", addrdel, err, out)
if errq == nil {
errq = err
}
}
}
if localAddr6.IsValid() {
addradd := []string{"ifconfig", r.tunname,
"inet6", localAddr6.String()}
out, err := cmd(addradd...).CombinedOutput()
if err != nil {
r.logf("addr add failed: %v: %v\n%s", addradd, err, out)
if errq == nil {
errq = err
}
}
}
}
newRoutes := set.Set[netip.Prefix]{}
for _, route := range cfg.Routes {
newRoutes.Add(route)
}
for route := range r.routes {
if _, keep := newRoutes[route]; !keep {
net := netipx.PrefixIPNet(route)
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Bits())
dst := localAddr4.Addr().String()
if route.Addr().Is6() {
dst = localAddr6.Addr().String()
}
routedel := []string{"route", "-q", "-n",
"del", "-" + inet(route), nstr,
"-iface", dst}
out, err := cmd(routedel...).CombinedOutput()
if err != nil {
r.logf("route del failed: %v: %v\n%s", routedel, err, out)
if errq == nil {
errq = err
}
}
}
}
for route := range newRoutes {
if _, exists := r.routes[route]; !exists {
net := netipx.PrefixIPNet(route)
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Bits())
dst := localAddr4.Addr().String()
if route.Addr().Is6() {
dst = localAddr6.Addr().String()
}
routeadd := []string{"route", "-q", "-n",
"add", "-" + inet(route), nstr,
"-iface", dst}
out, err := cmd(routeadd...).CombinedOutput()
if err != nil {
r.logf("addr add failed: %v: %v\n%s", routeadd, err, out)
if errq == nil {
errq = err
}
}
}
}
r.local4 = localAddr4
r.local6 = localAddr6
r.routes = newRoutes
return errq
}
// UpdateMagicsockPort implements the Router interface. This implementation
// does nothing and returns nil because this router does not currently need
// to know what the magicsock UDP port is.
func (r *openbsdRouter) UpdateMagicsockPort(_ uint16, _ string) error {
return nil
}
func (r *openbsdRouter) Close() error {
cleanUp(r.logf, r.tunname)
return nil
}
func cleanUp(logf logger.Logf, interfaceName string) {
out, err := cmd("ifconfig", interfaceName, "down").CombinedOutput()
if err != nil {
logf("ifconfig down: %v\n%s", err, out)
}
}

View File

@@ -1,211 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build darwin || freebsd
package router
import (
"fmt"
"log"
"net/netip"
"os/exec"
"runtime"
"github.com/tailscale/wireguard-go/tun"
"go4.org/netipx"
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/net/tsaddr"
"tailscale.com/types/logger"
"tailscale.com/version"
)
type userspaceBSDRouter struct {
logf logger.Logf
netMon *netmon.Monitor
health *health.Tracker
tunname string
local []netip.Prefix
routes map[netip.Prefix]bool
}
func newUserspaceBSDRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
tunname, err := tundev.Name()
if err != nil {
return nil, err
}
return &userspaceBSDRouter{
logf: logf,
netMon: netMon,
health: health,
tunname: tunname,
}, nil
}
func (r *userspaceBSDRouter) addrsToRemove(newLocalAddrs []netip.Prefix) (remove []netip.Prefix) {
for _, cur := range r.local {
found := false
for _, v := range newLocalAddrs {
found = (v == cur)
if found {
break
}
}
if !found {
remove = append(remove, cur)
}
}
return
}
func (r *userspaceBSDRouter) addrsToAdd(newLocalAddrs []netip.Prefix) (add []netip.Prefix) {
for _, cur := range newLocalAddrs {
found := false
for _, v := range r.local {
found = (v == cur)
if found {
break
}
}
if !found {
add = append(add, cur)
}
}
return
}
func cmd(args ...string) *exec.Cmd {
if len(args) == 0 {
log.Fatalf("exec.Cmd(%#v) invalid; need argv[0]", args)
}
return exec.Command(args[0], args[1:]...)
}
func (r *userspaceBSDRouter) Up() error {
ifup := []string{"ifconfig", r.tunname, "up"}
if out, err := cmd(ifup...).CombinedOutput(); err != nil {
r.logf("running ifconfig failed: %v\n%s", err, out)
return err
}
return nil
}
func inet(p netip.Prefix) string {
if p.Addr().Is6() {
return "inet6"
}
return "inet"
}
func (r *userspaceBSDRouter) Set(cfg *Config) (reterr error) {
if cfg == nil {
cfg = &shutdownConfig
}
setErr := func(err error) {
if reterr == nil {
reterr = err
}
}
addrsToRemove := r.addrsToRemove(cfg.LocalAddrs)
// If we're removing all addresses, we need to remove and re-add all
// routes.
resetRoutes := len(r.local) > 0 && len(addrsToRemove) == len(r.local)
// Update the addresses.
for _, addr := range addrsToRemove {
arg := []string{"ifconfig", r.tunname, inet(addr), addr.String(), "-alias"}
out, err := cmd(arg...).CombinedOutput()
if err != nil {
r.logf("addr del failed: %v => %v\n%s", arg, err, out)
setErr(err)
}
}
for _, addr := range r.addrsToAdd(cfg.LocalAddrs) {
var arg []string
if runtime.GOOS == "freebsd" && addr.Addr().Is6() && addr.Bits() == 128 {
// FreeBSD rejects tun addresses of the form fc00::1/128 -> fc00::1,
// https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=218508
// Instead add our whole /48, which works because we use a /48 route.
// Full history: https://github.com/tailscale/tailscale/issues/1307
tmp := netip.PrefixFrom(addr.Addr(), 48)
arg = []string{"ifconfig", r.tunname, inet(tmp), tmp.String()}
} else {
arg = []string{"ifconfig", r.tunname, inet(addr), addr.String(), addr.Addr().String()}
}
out, err := cmd(arg...).CombinedOutput()
if err != nil {
r.logf("addr add failed: %v => %v\n%s", arg, err, out)
setErr(err)
}
}
newRoutes := make(map[netip.Prefix]bool)
for _, route := range cfg.Routes {
if runtime.GOOS != "darwin" && route == tsaddr.TailscaleULARange() {
// Because we added the interface address as a /48 above,
// the kernel already created the Tailscale ULA route
// implicitly. We mustn't try to add/delete it ourselves.
continue
}
newRoutes[route] = true
}
// Delete any preexisting routes.
for route := range r.routes {
if resetRoutes || !newRoutes[route] {
net := netipx.PrefixIPNet(route)
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Bits())
del := "del"
if version.OS() == "macOS" {
del = "delete"
}
routedel := []string{"route", "-q", "-n",
del, "-" + inet(route), nstr,
"-iface", r.tunname}
out, err := cmd(routedel...).CombinedOutput()
if err != nil {
r.logf("route del failed: %v: %v\n%s", routedel, err, out)
setErr(err)
}
}
}
// Add the routes.
for route := range newRoutes {
if resetRoutes || !r.routes[route] {
net := netipx.PrefixIPNet(route)
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Bits())
routeadd := []string{"route", "-q", "-n",
"add", "-" + inet(route), nstr,
"-iface", r.tunname}
out, err := cmd(routeadd...).CombinedOutput()
if err != nil {
r.logf("addr add failed: %v: %v\n%s", routeadd, err, out)
setErr(err)
}
}
}
// Store the interface and routes so we know what to change on an update.
if reterr == nil {
r.local = append([]netip.Prefix{}, cfg.LocalAddrs...)
}
r.routes = newRoutes
return reterr
}
// UpdateMagicsockPort implements the Router interface. This implementation
// does nothing and returns nil because this router does not currently need
// to know what the magicsock UDP port is.
func (r *userspaceBSDRouter) UpdateMagicsockPort(_ uint16, _ string) error {
return nil
}
func (r *userspaceBSDRouter) Close() error {
return nil
}

View File

@@ -1,400 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package router
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"net/netip"
"os"
"os/exec"
"path/filepath"
"slices"
"strings"
"sync"
"syscall"
"time"
"github.com/tailscale/wireguard-go/tun"
"golang.org/x/sys/windows"
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/net/dns"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
)
type winRouter struct {
logf func(fmt string, args ...any)
netMon *netmon.Monitor // may be nil
health *health.Tracker
nativeTun *tun.NativeTun
routeChangeCallback *winipcfg.RouteChangeCallback
firewall *firewallTweaker
}
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
nativeTun := tundev.(*tun.NativeTun)
luid := winipcfg.LUID(nativeTun.LUID())
guid, err := luid.GUID()
if err != nil {
return nil, err
}
return &winRouter{
logf: logf,
netMon: netMon,
health: health,
nativeTun: nativeTun,
firewall: &firewallTweaker{
logf: logger.WithPrefix(logf, "firewall: "),
tunGUID: *guid,
},
}, nil
}
func (r *winRouter) Up() error {
r.firewall.clear()
var err error
t0 := time.Now()
r.routeChangeCallback, err = monitorDefaultRoutes(r.nativeTun)
d := time.Since(t0).Round(time.Millisecond)
if err != nil {
return fmt.Errorf("monitorDefaultRoutes, after %v: %v", d, err)
}
r.logf("monitorDefaultRoutes done after %v", d)
return nil
}
func (r *winRouter) Set(cfg *Config) error {
if cfg == nil {
cfg = &shutdownConfig
}
var localAddrs []string
for _, la := range cfg.LocalAddrs {
localAddrs = append(localAddrs, la.String())
}
r.firewall.set(localAddrs, cfg.Routes, cfg.LocalRoutes)
err := configureInterface(cfg, r.nativeTun, r.health)
if err != nil {
r.logf("ConfigureInterface: %v", err)
return err
}
// Flush DNS on router config change to clear cached DNS entries (solves #1430)
if err := dns.Flush(); err != nil {
r.logf("flushdns error: %v", err)
}
return nil
}
func hasDefaultRoute(routes []netip.Prefix) bool {
for _, route := range routes {
if route.Bits() == 0 {
return true
}
}
return false
}
// UpdateMagicsockPort implements the Router interface. This implementation
// does nothing and returns nil because this router does not currently need
// to know what the magicsock UDP port is.
func (r *winRouter) UpdateMagicsockPort(_ uint16, _ string) error {
return nil
}
func (r *winRouter) Close() error {
r.firewall.clear()
if r.routeChangeCallback != nil {
r.routeChangeCallback.Unregister()
}
return nil
}
func cleanUp(logf logger.Logf, interfaceName string) {
// Nothing to do here.
}
// firewallTweaker changes the Windows firewall. Normally this wouldn't be so complicated,
// but it can be REALLY SLOW to change the Windows firewall for reasons not understood.
// Like 4 minutes slow. But usually it's tens of milliseconds.
// See https://github.com/tailscale/tailscale/issues/785.
// So this tracks the desired state and runs the actual adjusting code asynchronously.
type firewallTweaker struct {
logf logger.Logf
tunGUID windows.GUID
mu sync.Mutex
didProcRule bool
running bool // doAsyncSet goroutine is running
known bool // firewall is in known state (in lastVal)
wantLocal []string // next value we want, or "" to delete the firewall rule
lastLocal []string // last set value, if known
localRoutes []netip.Prefix
lastLocalRoutes []netip.Prefix
wantKillswitch bool
lastKillswitch bool
// Only touched by doAsyncSet, so mu doesn't need to be held.
// fwProc is a subprocess that runs the wireguard-windows firewall
// killswitch code. It is only non-nil when the default route
// killswitch is active, and may go back and forth between nil and
// non-nil any number of times during the process's lifetime.
fwProc *exec.Cmd
// stop makes fwProc exit when closed.
fwProcWriter io.WriteCloser
fwProcEncoder *json.Encoder
// The path to the 'netsh.exe' binary, populated during the first call
// to runFirewall.
//
// not protected by mu; netshPath is only mutated inside netshPathOnce
netshPathOnce sync.Once
netshPath string
}
func (ft *firewallTweaker) clear() { ft.set(nil, nil, nil) }
// set takes CIDRs to allow, and the routes that point into the Tailscale tun interface.
// Empty slices remove firewall rules.
//
// set takes ownership of cidrs, but not routes.
func (ft *firewallTweaker) set(cidrs []string, routes, localRoutes []netip.Prefix) {
ft.mu.Lock()
defer ft.mu.Unlock()
if len(cidrs) == 0 {
ft.logf("marking for removal")
} else {
ft.logf("marking allowed %v", cidrs)
}
ft.wantLocal = cidrs
ft.localRoutes = localRoutes
ft.wantKillswitch = hasDefaultRoute(routes)
if ft.running {
// The doAsyncSet goroutine will check ft.wantLocal/wantKillswitch
// before returning.
return
}
ft.logf("starting netsh goroutine")
ft.running = true
go ft.doAsyncSet()
}
// getNetshPath returns the path that should be used to execute netsh.
//
// We've seen a report from a customer that we're triggering the "cannot run
// executable found relative to current directory" protection that was added to
// prevent running possibly attacker-controlled binaries. To mitigate this,
// first try looking up the path to netsh.exe in the System32 directory
// explicitly, and then fall back to the prior behaviour of passing "netsh" to
// os/exec.Command.
func (ft *firewallTweaker) getNetshPath() string {
ft.netshPathOnce.Do(func() {
// The default value is the old approach: just run "netsh" and
// let os/exec resolve that into a full path.
ft.netshPath = "netsh"
path, err := windows.KnownFolderPath(windows.FOLDERID_System, 0)
if err != nil {
ft.logf("getNetshPath: error getting FOLDERID_System: %v", err)
return
}
expath := filepath.Join(path, "netsh.exe")
if _, err := os.Stat(expath); err == nil {
ft.netshPath = expath
return
} else if !os.IsNotExist(err) {
ft.logf("getNetshPath: error checking for existence of %q: %v", expath, err)
}
// Keep default
})
return ft.netshPath
}
func (ft *firewallTweaker) runFirewall(args ...string) (time.Duration, error) {
t0 := time.Now()
args = append([]string{"advfirewall", "firewall"}, args...)
cmd := exec.Command(ft.getNetshPath(), args...)
cmd.SysProcAttr = &syscall.SysProcAttr{
CreationFlags: windows.DETACHED_PROCESS,
}
b, err := cmd.CombinedOutput()
if err != nil {
err = fmt.Errorf("%w: %v", err, string(b))
}
return time.Since(t0).Round(time.Millisecond), err
}
func (ft *firewallTweaker) doAsyncSet() {
bo := backoff.NewBackoff("win-firewall", ft.logf, time.Minute)
ctx := context.Background()
ft.mu.Lock()
for { // invariant: ft.mu must be locked when beginning this block
val := ft.wantLocal
if ft.known && slices.Equal(ft.lastLocal, val) && ft.wantKillswitch == ft.lastKillswitch && slices.Equal(ft.localRoutes, ft.lastLocalRoutes) {
ft.running = false
ft.logf("ending netsh goroutine")
ft.mu.Unlock()
return
}
wantKillswitch := ft.wantKillswitch
needClear := !ft.known || len(ft.lastLocal) > 0 || len(val) == 0
needProcRule := !ft.didProcRule
localRoutes := ft.localRoutes
ft.mu.Unlock()
err := ft.doSet(val, wantKillswitch, needClear, needProcRule, localRoutes)
if err != nil {
ft.logf("set failed: %v", err)
}
bo.BackOff(ctx, err)
ft.mu.Lock()
ft.lastLocal = val
ft.lastLocalRoutes = localRoutes
ft.lastKillswitch = wantKillswitch
ft.known = (err == nil)
}
}
// doSet creates and deletes firewall rules to make the system state
// match the values of local, killswitch, clear and procRule.
//
// local is the list of local Tailscale addresses (formatted as CIDR
// prefixes) to allow through the Windows firewall.
// killswitch, if true, enables the wireguard-windows based internet
// killswitch to prevent use of non-Tailscale default routes.
// clear, if true, removes all tailscale address firewall rules before
// adding local.
// procRule, if true, installs a firewall rule that permits the Tailscale
// process to dial out as it pleases.
//
// Must only be invoked from doAsyncSet.
func (ft *firewallTweaker) doSet(local []string, killswitch bool, clear bool, procRule bool, allowedRoutes []netip.Prefix) error {
if clear {
ft.logf("clearing Tailscale-In firewall rules...")
// We ignore the error here, because netsh returns an error for
// deleting something that doesn't match.
// TODO(bradfitz): care? That'd involve querying it before/after to see
// whether it was necessary/worked. But the output format is localized,
// so can't rely on parsing English. Maybe need to use OLE, not netsh.exe?
d, _ := ft.runFirewall("delete", "rule", "name=Tailscale-In", "dir=in")
ft.logf("cleared Tailscale-In firewall rules in %v", d)
}
if procRule {
ft.logf("deleting any prior Tailscale-Process rule...")
d, err := ft.runFirewall("delete", "rule", "name=Tailscale-Process", "dir=in") // best effort
if err == nil {
ft.logf("removed old Tailscale-Process rule in %v", d)
}
var exe string
exe, err = os.Executable()
if err != nil {
ft.logf("failed to find Executable for Tailscale-Process rule: %v", err)
} else {
ft.logf("adding Tailscale-Process rule to allow UDP for %q ...", exe)
d, err = ft.runFirewall("add", "rule", "name=Tailscale-Process",
"dir=in",
"action=allow",
"edge=yes",
"program="+exe,
"protocol=udp",
"profile=any",
"enable=yes",
)
if err != nil {
ft.logf("error adding Tailscale-Process rule: %v", err)
} else {
ft.mu.Lock()
ft.didProcRule = true
ft.mu.Unlock()
ft.logf("added Tailscale-Process rule in %v", d)
}
}
}
for _, cidr := range local {
ft.logf("adding Tailscale-In rule to allow %v ...", cidr)
var d time.Duration
d, err := ft.runFirewall("add", "rule", "name=Tailscale-In", "dir=in", "action=allow", "localip="+cidr, "profile=private,domain", "enable=yes")
if err != nil {
ft.logf("error adding Tailscale-In rule to allow %v: %v", cidr, err)
return err
}
ft.logf("added Tailscale-In rule to allow %v in %v", cidr, d)
}
if !killswitch {
if ft.fwProc != nil {
ft.fwProcWriter.Close()
ft.fwProcWriter = nil
ft.fwProc.Wait()
ft.fwProc = nil
ft.fwProcEncoder = nil
}
return nil
}
if ft.fwProc == nil {
exe, err := os.Executable()
if err != nil {
return err
}
proc := exec.Command(exe, "/firewall", ft.tunGUID.String())
proc.SysProcAttr = &syscall.SysProcAttr{
CreationFlags: windows.DETACHED_PROCESS,
}
in, err := proc.StdinPipe()
if err != nil {
return err
}
out, err := proc.StdoutPipe()
if err != nil {
in.Close()
return err
}
go func(out io.ReadCloser) {
b := bufio.NewReaderSize(out, 1<<10)
for {
line, err := b.ReadString('\n')
if err != nil {
return
}
line = strings.TrimSpace(line)
if line != "" {
ft.logf("fw-child: %s", line)
}
}
}(out)
proc.Stderr = proc.Stdout
if err := proc.Start(); err != nil {
return err
}
ft.fwProcWriter = in
ft.fwProc = proc
ft.fwProcEncoder = json.NewEncoder(in)
}
// Note(maisem): when local lan access toggled, we need to inform the
// firewall to let the local routes through. The set of routes is passed
// in via stdin encoded in json.
return ft.fwProcEncoder.Encode(allowedRoutes)
}

View File

@@ -1,120 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux
package router
import (
"errors"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
)
// commandRunner abstracts helpers to run OS commands. It exists
// purely to swap out osCommandRunner (below) with a fake runner in
// tests.
type commandRunner interface {
run(...string) error
output(...string) ([]byte, error)
}
type osCommandRunner struct {
// ambientCapNetAdmin determines whether commands are executed with
// CAP_NET_ADMIN.
// CAP_NET_ADMIN is required when running as non-root and executing cmds
// like `ip rule`. Even if our process has the capability, we need to
// explicitly grant it to the new process.
// We specifically need this for Synology DSM7 where tailscaled no longer
// runs as root.
ambientCapNetAdmin bool
}
// errCode extracts and returns the process exit code from err, or
// zero if err is nil.
func errCode(err error) int {
if err == nil {
return 0
}
var e *exec.ExitError
if ok := errors.As(err, &e); ok {
return e.ExitCode()
}
s := err.Error()
if strings.HasPrefix(s, "exitcode:") {
code, err := strconv.Atoi(s[9:])
if err == nil {
return code
}
}
return -42
}
func (o osCommandRunner) run(args ...string) error {
_, err := o.output(args...)
return err
}
func (o osCommandRunner) output(args ...string) ([]byte, error) {
if len(args) == 0 {
return nil, errors.New("cmd: no argv[0]")
}
cmd := exec.Command(args[0], args[1:]...)
cmd.Env = append(os.Environ(), "LC_ALL=C")
if o.ambientCapNetAdmin {
cmd.SysProcAttr = &syscall.SysProcAttr{
AmbientCaps: []uintptr{unix.CAP_NET_ADMIN},
}
}
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("running %q failed: %w\n%s", strings.Join(args, " "), err, out)
}
return out, nil
}
type runGroup struct {
OkCode []int // error codes that are acceptable, other than 0, if any
Runner commandRunner // the runner that actually runs our commands
ErrAcc error // first error encountered, if any
}
func newRunGroup(okCode []int, runner commandRunner) *runGroup {
return &runGroup{
OkCode: okCode,
Runner: runner,
}
}
func (rg *runGroup) okCode(err error) bool {
got := errCode(err)
for _, want := range rg.OkCode {
if got == want {
return true
}
}
return false
}
func (rg *runGroup) Output(args ...string) []byte {
b, err := rg.Runner.output(args...)
if rg.ErrAcc == nil && err != nil && !rg.okCode(err) {
rg.ErrAcc = err
}
return b
}
func (rg *runGroup) Run(args ...string) {
err := rg.Runner.run(args...)
if rg.ErrAcc == nil && err != nil && !rg.okCode(err) {
rg.ErrAcc = err
}
}

View File

@@ -10,8 +10,10 @@ import (
"errors"
"fmt"
"io"
"maps"
"math"
"net/netip"
"reflect"
"runtime"
"slices"
"strings"
@@ -23,17 +25,18 @@ import (
"tailscale.com/control/controlknobs"
"tailscale.com/drive"
"tailscale.com/envknob"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/dns"
"tailscale.com/net/flowtrack"
"tailscale.com/net/dns/resolver"
"tailscale.com/net/ipset"
"tailscale.com/net/netmon"
"tailscale.com/net/packet"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tsdial"
"tailscale.com/net/tshttpproxy"
"tailscale.com/net/tstun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
@@ -44,8 +47,11 @@ import (
"tailscale.com/types/logger"
"tailscale.com/types/netmap"
"tailscale.com/types/views"
"tailscale.com/util/backoff"
"tailscale.com/util/checkchange"
"tailscale.com/util/clientmetric"
"tailscale.com/util/deephash"
"tailscale.com/util/eventbus"
"tailscale.com/util/execqueue"
"tailscale.com/util/mak"
"tailscale.com/util/set"
"tailscale.com/util/testenv"
@@ -89,23 +95,29 @@ const statusPollInterval = 1 * time.Minute
const networkLoggerUploadTimeout = 5 * time.Second
type userspaceEngine struct {
logf logger.Logf
wgLogger *wglog.Logger //a wireguard-go logging wrapper
reqCh chan struct{}
waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool
timeNow func() mono.Time
tundev *tstun.Wrapper
wgdev *device.Device
router router.Router
confListenPort uint16 // original conf.ListenPort
dns *dns.Manager
magicConn *magicsock.Conn
netMon *netmon.Monitor
health *health.Tracker
netMonOwned bool // whether we created netMon (and thus need to close it)
netMonUnregister func() // unsubscribes from changes; used regardless of netMonOwned
birdClient BIRDClient // or nil
controlKnobs *controlknobs.Knobs // or nil
// eventBus will eventually become required, but for now may be nil.
eventBus *eventbus.Bus
eventClient *eventbus.Client
linkChangeQueue execqueue.ExecQueue
logf logger.Logf
wgLogger *wglog.Logger // a wireguard-go logging wrapper
reqCh chan struct{}
waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool
timeNow func() mono.Time
tundev *tstun.Wrapper
wgdev *device.Device
router router.Router
dialer *tsdial.Dialer
confListenPort uint16 // original conf.ListenPort
dns *dns.Manager
magicConn *magicsock.Conn
netMon *netmon.Monitor
health *health.Tracker
netMonOwned bool // whether we created netMon (and thus need to close it)
birdClient BIRDClient // or nil
controlKnobs *controlknobs.Knobs // or nil
testMaybeReconfigHook func() // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
@@ -121,11 +133,11 @@ type userspaceEngine struct {
wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below
lastCfgFull wgcfg.Config
lastNMinPeers int
lastRouterSig deephash.Sum // of router.Config
lastEngineSigFull deephash.Sum // of full wireguard config
lastEngineSigTrim deephash.Sum // of trimmed wireguard config
lastDNSConfig *dns.Config
lastIsSubnetRouter bool // was the node a primary subnet router in the last run.
lastRouter *router.Config
lastEngineFull *wgcfg.Config // of full wireguard config, not trimmed
lastEngineInputs *maybeReconfigInputs
lastDNSConfig dns.ConfigView // or invalid if none
lastIsSubnetRouter bool // was the node a primary subnet router in the last run.
recvActivityAt map[key.NodePublic]mono.Time
trimmedNodes map[key.NodePublic]bool // set of node keys of peers currently excluded from wireguard config
sentActivityAt map[netip.Addr]*mono.Time // value is accessed atomically
@@ -137,9 +149,9 @@ type userspaceEngine struct {
netMap *netmap.NetworkMap // or nil
closing bool // Close was called (even if we're still closing)
statusCallback StatusCallback
peerSequence []key.NodePublic
peerSequence views.Slice[key.NodePublic]
endpoints []tailcfg.Endpoint
pendOpen map[flowtrack.Tuple]*pendingOpenFlow // see pendopen.go
pendOpen map[flowtrackTuple]*pendingOpenFlow // see pendopen.go
// pongCallback is the map of response handlers waiting for disco or TSMP
// pong callbacks. The map key is a random slice of bytes.
@@ -227,6 +239,13 @@ type Config struct {
// DriveForLocal, if populated, will cause the engine to expose a Taildrive
// listener at 100.100.100.100:8080.
DriveForLocal drive.FileSystemForLocal
// EventBus, if non-nil, is used for event publication and subscription by
// the Engine and its subsystems.
//
// TODO(creachadair): As of 2025-03-19 this is optional, but is intended to
// become required non-nil.
EventBus *eventbus.Bus
}
// NewFakeUserspaceEngine returns a new userspace engine for testing.
@@ -255,6 +274,8 @@ func NewFakeUserspaceEngine(logf logger.Logf, opts ...any) (Engine, error) {
conf.HealthTracker = v
case *usermetric.Registry:
conf.Metrics = v
case *eventbus.Bus:
conf.EventBus = v
default:
return nil, fmt.Errorf("unknown option type %T", v)
}
@@ -295,13 +316,16 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
}
if conf.Dialer == nil {
conf.Dialer = &tsdial.Dialer{Logf: logf}
if conf.EventBus != nil {
conf.Dialer.SetBus(conf.EventBus)
}
}
var tsTUNDev *tstun.Wrapper
if conf.IsTAP {
tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics)
tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics, conf.EventBus)
} else {
tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics)
tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics, conf.EventBus)
}
closePool.add(tsTUNDev)
@@ -323,12 +347,14 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
}
e := &userspaceEngine{
eventBus: conf.EventBus,
timeNow: mono.Now,
logf: logf,
reqCh: make(chan struct{}, 1),
waitCh: make(chan struct{}),
tundev: tsTUNDev,
router: rtr,
dialer: conf.Dialer,
confListenPort: conf.ListenPort,
birdClient: conf.BIRDClient,
controlKnobs: conf.ControlKnobs,
@@ -348,7 +374,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
if conf.NetMon != nil {
e.netMon = conf.NetMon
} else {
mon, err := netmon.New(logf)
mon, err := netmon.New(conf.EventBus, logf)
if err != nil {
return nil, err
}
@@ -360,20 +386,14 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
tunName, _ := conf.Tun.Name()
conf.Dialer.SetTUNName(tunName)
conf.Dialer.SetNetMon(e.netMon)
e.dns = dns.NewManager(logf, conf.DNS, e.health, conf.Dialer, fwdDNSLinkSelector{e, tunName}, conf.ControlKnobs, runtime.GOOS)
conf.Dialer.SetBus(e.eventBus)
e.dns = dns.NewManager(logf, conf.DNS, e.health, conf.Dialer, fwdDNSLinkSelector{e, tunName}, conf.ControlKnobs, runtime.GOOS, e.eventBus)
// TODO: there's probably a better place for this
sockstats.SetNetMon(e.netMon)
logf("link state: %+v", e.netMon.InterfaceState())
unregisterMonWatch := e.netMon.RegisterChangeCallback(func(delta *netmon.ChangeDelta) {
tshttpproxy.InvalidateCache()
e.linkChange(delta)
})
closePool.addFunc(unregisterMonWatch)
e.netMonUnregister = unregisterMonWatch
endpointsFn := func(endpoints []tailcfg.Endpoint) {
e.mu.Lock()
e.endpoints = append(e.endpoints[:0], endpoints...)
@@ -381,26 +401,21 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
e.RequestStatus()
}
onPortUpdate := func(port uint16, network string) {
e.logf("onPortUpdate(port=%v, network=%s)", port, network)
if err := e.router.UpdateMagicsockPort(port, network); err != nil {
e.logf("UpdateMagicsockPort(port=%v, network=%s) failed: %v", port, network, err)
}
}
magicsockOpts := magicsock.Options{
Logf: logf,
Port: conf.ListenPort,
EndpointsFunc: endpointsFn,
DERPActiveFunc: e.RequestStatus,
IdleFunc: e.tundev.IdleDuration,
NoteRecvActivity: e.noteRecvActivity,
NetMon: e.netMon,
HealthTracker: e.health,
Metrics: conf.Metrics,
ControlKnobs: conf.ControlKnobs,
OnPortUpdate: onPortUpdate,
PeerByKeyFunc: e.PeerByKey,
EventBus: e.eventBus,
Logf: logf,
Port: conf.ListenPort,
EndpointsFunc: endpointsFn,
DERPActiveFunc: e.RequestStatus,
IdleFunc: e.tundev.IdleDuration,
NetMon: e.netMon,
HealthTracker: e.health,
Metrics: conf.Metrics,
ControlKnobs: conf.ControlKnobs,
PeerByKeyFunc: e.PeerByKey,
}
if buildfeatures.HasLazyWG {
magicsockOpts.NoteRecvActivity = e.noteRecvActivity
}
var err error
@@ -418,7 +433,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
}
e.tundev.PreFilterPacketOutboundToWireGuardEngineIntercept = e.handleLocalPackets
if envknob.BoolDefaultTrue("TS_DEBUG_CONNECT_FAILURES") {
if buildfeatures.HasDebug && envknob.BoolDefaultTrue("TS_DEBUG_CONNECT_FAILURES") {
if e.tundev.PreFilterPacketInboundFromWireGuard != nil {
return nil, errors.New("unexpected PreFilterIn already set")
}
@@ -436,6 +451,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
cb := e.pongCallback[pong.Data]
e.logf("wgengine: got TSMP pong %02x, peerAPIPort=%v; cb=%v", pong.Data, pong.PeerAPIPort, cb != nil)
if cb != nil {
delete(e.pongCallback, pong.Data)
go cb(pong)
}
}
@@ -449,6 +465,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
// We didn't swallow it, so let it flow to the host.
return false
}
delete(e.icmpEchoResponseCallback, idSeq)
e.logf("wgengine: got diagnostic ICMP response %02x", idSeq)
go cb()
return true
@@ -527,6 +544,31 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
}
}
ec := e.eventBus.Client("userspaceEngine")
eventbus.SubscribeFunc(ec, func(cd netmon.ChangeDelta) {
if f, ok := feature.HookProxyInvalidateCache.GetOk(); ok {
f()
}
e.linkChangeQueue.Add(func() { e.linkChange(&cd) })
})
eventbus.SubscribeFunc(ec, func(update tstun.DiscoKeyAdvertisement) {
e.logf("wgengine: got TSMP disco key advertisement from %v via eventbus", update.Src)
if e.magicConn == nil {
e.logf("wgengine: no magicConn")
return
}
pkt := packet.TSMPDiscoKeyAdvertisement{
Key: update.Key,
}
peer, ok := e.PeerForIP(update.Src)
if !ok {
e.logf("wgengine: no peer found for %v", update.Src)
return
}
e.magicConn.HandleDiscoKeyAdvertisement(peer.Node, pkt)
})
e.eventClient = ec
e.logf("Engine created.")
return e, nil
}
@@ -569,6 +611,17 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper)
return filter.Drop
}
}
if runtime.GOOS == "plan9" {
isLocalAddr, ok := e.isLocalAddr.LoadOk()
if ok {
if isLocalAddr(p.Dst.Addr()) {
// On Plan9's "tun" equivalent, everything goes back in and out
// the tun, even when the kernel's replying to itself.
t.InjectInboundCopy(p.Buffer())
return filter.Drop
}
}
}
return filter.Accept
}
@@ -672,6 +725,29 @@ func (e *userspaceEngine) isActiveSinceLocked(nk key.NodePublic, ip netip.Addr,
return timePtr.LoadAtomic().After(t)
}
// maybeReconfigInputs holds the inputs to the maybeReconfigWireguardLocked
// function. If these things don't change between calls, there's nothing to do.
type maybeReconfigInputs struct {
WGConfig *wgcfg.Config
TrimmedNodes map[key.NodePublic]bool
TrackNodes views.Slice[key.NodePublic]
TrackIPs views.Slice[netip.Addr]
}
func (i *maybeReconfigInputs) Equal(o *maybeReconfigInputs) bool {
return reflect.DeepEqual(i, o)
}
func (i *maybeReconfigInputs) Clone() *maybeReconfigInputs {
if i == nil {
return nil
}
v := *i
v.WGConfig = i.WGConfig.Clone()
v.TrimmedNodes = maps.Clone(i.TrimmedNodes)
return &v
}
// discoChanged are the set of peers whose disco keys have changed, implying they've restarted.
// If a peer is in this set and was previously in the live wireguard config,
// it needs to be first removed and then re-added to flush out its wireguard session key.
@@ -697,15 +773,22 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
// the past 5 minutes. That's more than WireGuard's key
// rotation time anyway so it's no harm if we remove it
// later if it's been inactive.
activeCutoff := e.timeNow().Add(-lazyPeerIdleThreshold)
var activeCutoff mono.Time
if buildfeatures.HasLazyWG {
activeCutoff = e.timeNow().Add(-lazyPeerIdleThreshold)
}
// Not all peers can be trimmed from the network map (see
// isTrimmablePeer). For those that are trimmable, keep track of
// their NodeKey and Tailscale IPs. These are the ones we'll need
// to install tracking hooks for to watch their send/receive
// activity.
trackNodes := make([]key.NodePublic, 0, len(full.Peers))
trackIPs := make([]netip.Addr, 0, len(full.Peers))
var trackNodes []key.NodePublic
var trackIPs []netip.Addr
if buildfeatures.HasLazyWG {
trackNodes = make([]key.NodePublic, 0, len(full.Peers))
trackIPs = make([]netip.Addr, 0, len(full.Peers))
}
// Don't re-alloc the map; the Go compiler optimizes map clears as of
// Go 1.11, so we can re-use the existing + allocated map.
@@ -719,7 +802,7 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
for i := range full.Peers {
p := &full.Peers[i]
nk := p.PublicKey
if !e.isTrimmablePeer(p, len(full.Peers)) {
if !buildfeatures.HasLazyWG || !e.isTrimmablePeer(p, len(full.Peers)) {
min.Peers = append(min.Peers, *p)
if discoChanged[nk] {
needRemoveStep = true
@@ -743,16 +826,18 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
}
e.lastNMinPeers = len(min.Peers)
if changed := deephash.Update(&e.lastEngineSigTrim, &struct {
WGConfig *wgcfg.Config
TrimmedNodes map[key.NodePublic]bool
TrackNodes []key.NodePublic
TrackIPs []netip.Addr
}{&min, e.trimmedNodes, trackNodes, trackIPs}); !changed {
if changed := checkchange.Update(&e.lastEngineInputs, &maybeReconfigInputs{
WGConfig: &min,
TrimmedNodes: e.trimmedNodes,
TrackNodes: views.SliceOf(trackNodes),
TrackIPs: views.SliceOf(trackIPs),
}); !changed {
return nil
}
e.updateActivityMapsLocked(trackNodes, trackIPs)
if buildfeatures.HasLazyWG {
e.updateActivityMapsLocked(trackNodes, trackIPs)
}
if needRemoveStep {
minner := min
@@ -788,6 +873,9 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
//
// e.wgLock must be held.
func (e *userspaceEngine) updateActivityMapsLocked(trackNodes []key.NodePublic, trackIPs []netip.Addr) {
if !buildfeatures.HasLazyWG {
return
}
// Generate the new map of which nodekeys we want to track
// receive times for.
mr := map[key.NodePublic]mono.Time{} // TODO: only recreate this if set of keys changed
@@ -859,6 +947,32 @@ func hasOverlap(aips, rips views.Slice[netip.Prefix]) bool {
return false
}
// ResetAndStop resets the engine to a clean state (like calling Reconfig
// with all pointers to zero values) and waits for it to be fully stopped,
// with no live peers or DERPs.
//
// Unlike Reconfig, it does not return ErrNoChanges.
//
// If the engine stops, returns the status. NB that this status will not be sent
// to the registered status callback, it is on the caller to ensure this status
// is handled appropriately.
func (e *userspaceEngine) ResetAndStop() (*Status, error) {
if err := e.Reconfig(&wgcfg.Config{}, &router.Config{}, &dns.Config{}); err != nil && !errors.Is(err, ErrNoChanges) {
return nil, err
}
bo := backoff.NewBackoff("UserspaceEngineResetAndStop", e.logf, 1*time.Second)
for {
st, err := e.getStatus()
if err != nil {
return nil, err
}
if len(st.Peers) == 0 && st.DERPs == 0 {
return st, nil
}
bo.BackOff(context.Background(), fmt.Errorf("waiting for engine to stop: peers=%d derps=%d", len(st.Peers), st.DERPs))
}
}
func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config) error {
if routerCfg == nil {
panic("routerCfg must not be nil")
@@ -872,15 +986,17 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
e.wgLock.Lock()
defer e.wgLock.Unlock()
e.tundev.SetWGConfig(cfg)
e.lastDNSConfig = dnsCfg
peerSet := make(set.Set[key.NodePublic], len(cfg.Peers))
e.mu.Lock()
e.peerSequence = e.peerSequence[:0]
seq := make([]key.NodePublic, 0, len(cfg.Peers))
for _, p := range cfg.Peers {
e.peerSequence = append(e.peerSequence, p.PublicKey)
seq = append(seq, p.PublicKey)
peerSet.Add(p.PublicKey)
}
e.peerSequence = views.SliceOf(seq)
nm := e.netMap
e.mu.Unlock()
@@ -892,22 +1008,24 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
peerMTUEnable := e.magicConn.ShouldPMTUD()
isSubnetRouter := false
if e.birdClient != nil && nm != nil && nm.SelfNode.Valid() {
if buildfeatures.HasBird && e.birdClient != nil && nm != nil && nm.SelfNode.Valid() {
isSubnetRouter = hasOverlap(nm.SelfNode.PrimaryRoutes(), nm.SelfNode.Hostinfo().RoutableIPs())
e.logf("[v1] Reconfig: hasOverlap(%v, %v) = %v; isSubnetRouter=%v lastIsSubnetRouter=%v",
nm.SelfNode.PrimaryRoutes(), nm.SelfNode.Hostinfo().RoutableIPs(),
isSubnetRouter, isSubnetRouter, e.lastIsSubnetRouter)
}
isSubnetRouterChanged := isSubnetRouter != e.lastIsSubnetRouter
isSubnetRouterChanged := buildfeatures.HasAdvertiseRoutes && isSubnetRouter != e.lastIsSubnetRouter
engineChanged := checkchange.Update(&e.lastEngineFull, cfg)
routerChanged := checkchange.Update(&e.lastRouter, routerCfg)
dnsChanged := buildfeatures.HasDNS && !e.lastDNSConfig.Equal(dnsCfg.View())
if dnsChanged {
e.lastDNSConfig = dnsCfg.View()
}
engineChanged := deephash.Update(&e.lastEngineSigFull, cfg)
routerChanged := deephash.Update(&e.lastRouterSig, &struct {
RouterConfig *router.Config
DNSConfig *dns.Config
}{routerCfg, dnsCfg})
listenPortChanged := listenPort != e.magicConn.LocalPort()
peerMTUChanged := peerMTUEnable != e.magicConn.PeerMTUEnabled()
if !engineChanged && !routerChanged && !listenPortChanged && !isSubnetRouterChanged && !peerMTUChanged {
if !engineChanged && !routerChanged && !dnsChanged && !listenPortChanged && !isSubnetRouterChanged && !peerMTUChanged {
return ErrNoChanges
}
newLogIDs := cfg.NetworkLogging
@@ -916,7 +1034,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
netLogIDsWasValid := !oldLogIDs.NodeID.IsZero() && !oldLogIDs.DomainID.IsZero()
netLogIDsChanged := netLogIDsNowValid && netLogIDsWasValid && newLogIDs != oldLogIDs
netLogRunning := netLogIDsNowValid && !routerCfg.Equal(&router.Config{})
if envknob.NoLogsNoSupport() {
if !buildfeatures.HasNetLog || envknob.NoLogsNoSupport() {
netLogRunning = false
}
@@ -925,7 +1043,9 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
// instead have ipnlocal populate a map of DNS IP => linkName and
// put that in the *dns.Config instead, and plumb it down to the
// dns.Manager. Maybe also with isLocalAddr above.
e.isDNSIPOverTailscale.Store(ipset.NewContainsIPFunc(views.SliceOf(dnsIPsOverTailscale(dnsCfg, routerCfg))))
if buildfeatures.HasDNS {
e.isDNSIPOverTailscale.Store(ipset.NewContainsIPFunc(views.SliceOf(dnsIPsOverTailscale(dnsCfg, routerCfg))))
}
// See if any peers have changed disco keys, which means they've restarted.
// If so, we need to update the wireguard-go/device.Device in two phases:
@@ -971,7 +1091,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
// Shutdown the network logger because the IDs changed.
// Let it be started back up by subsequent logic.
if netLogIDsChanged && e.networkLogger.Running() {
if buildfeatures.HasNetLog && netLogIDsChanged && e.networkLogger.Running() {
e.logf("wgengine: Reconfig: shutting down network logger")
ctx, cancel := context.WithTimeout(context.Background(), networkLoggerUploadTimeout)
defer cancel()
@@ -982,12 +1102,12 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
// Startup the network logger.
// Do this before configuring the router so that we capture initial packets.
if netLogRunning && !e.networkLogger.Running() {
if buildfeatures.HasNetLog && netLogRunning && !e.networkLogger.Running() {
nid := cfg.NetworkLogging.NodeID
tid := cfg.NetworkLogging.DomainID
logExitFlowEnabled := cfg.NetworkLogging.LogExitFlowEnabled
e.logf("wgengine: Reconfig: starting up network logger (node:%s tailnet:%s)", nid.Public(), tid.Public())
if err := e.networkLogger.Startup(cfg.NodeID, nid, tid, e.tundev, e.magicConn, e.netMon, e.health, logExitFlowEnabled); err != nil {
if err := e.networkLogger.Startup(e.logf, nm, nid, tid, e.tundev, e.magicConn, e.netMon, e.health, e.eventBus, logExitFlowEnabled); err != nil {
e.logf("wgengine: Reconfig: error starting up network logger: %v", err)
}
e.networkLogger.ReconfigRoutes(routerCfg)
@@ -1001,11 +1121,30 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
if err != nil {
return err
}
}
// We've historically re-set DNS even after just a router change. While
// refactoring in tailscale/tailscale#17448 and and
// tailscale/tailscale#17499, I'm erring on the side of keeping that
// historical quirk for now (2025-10-08), lest it's load bearing in
// unexpected ways
//
// TODO(bradfitz): try to do the "configuring DNS" part below only if
// dnsChanged, not routerChanged. The "resolver.ShouldUseRoutes" part
// probably needs to keep happening for both.
if buildfeatures.HasDNS && (routerChanged || dnsChanged) {
if resolver.ShouldUseRoutes(e.controlKnobs) {
e.logf("wgengine: Reconfig: user dialer")
e.dialer.SetRoutes(routerCfg.Routes, routerCfg.LocalRoutes)
} else {
e.dialer.SetRoutes(nil, nil)
}
// Keep DNS configuration after router configuration, as some
// DNS managers refuse to apply settings if the device has no
// assigned address.
e.logf("wgengine: Reconfig: configuring DNS")
err = e.dns.Set(*dnsCfg)
err := e.dns.Set(*dnsCfg)
e.health.SetDNSHealth(err)
if err != nil {
return err
@@ -1027,7 +1166,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
}
}
if isSubnetRouterChanged && e.birdClient != nil {
if buildfeatures.HasBird && isSubnetRouterChanged && e.birdClient != nil {
e.logf("wgengine: Reconfig: configuring BIRD")
var err error
if isSubnetRouter {
@@ -1112,7 +1251,7 @@ func (e *userspaceEngine) getStatus() (*Status, error) {
e.mu.Lock()
closing := e.closing
peerKeys := slices.Clone(e.peerSequence)
peerKeys := e.peerSequence
localAddrs := slices.Clone(e.endpoints)
e.mu.Unlock()
@@ -1120,8 +1259,8 @@ func (e *userspaceEngine) getStatus() (*Status, error) {
return nil, ErrEngineClosing
}
peers := make([]ipnstate.PeerStatusLite, 0, len(peerKeys))
for _, key := range peerKeys {
peers := make([]ipnstate.PeerStatusLite, 0, peerKeys.Len())
for _, key := range peerKeys.All() {
if status, ok := e.getPeerStatusLite(key); ok {
peers = append(peers, status)
}
@@ -1170,6 +1309,10 @@ func (e *userspaceEngine) RequestStatus() {
}
func (e *userspaceEngine) Close() {
e.eventClient.Close()
// TODO(cmol): Should we wait for it too?
// Same question raised in appconnector.go.
e.linkChangeQueue.Shutdown()
e.mu.Lock()
if e.closing {
e.mu.Unlock()
@@ -1181,7 +1324,6 @@ func (e *userspaceEngine) Close() {
r := bufio.NewReader(strings.NewReader(""))
e.wgdev.IpcSetOperation(r)
e.magicConn.Close()
e.netMonUnregister()
if e.netMonOwned {
e.netMon.Close()
}
@@ -1207,20 +1349,18 @@ func (e *userspaceEngine) Done() <-chan struct{} {
}
func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
changed := delta.Major // TODO(bradfitz): ask more specific questions?
cur := delta.New
up := cur.AnyInterfaceUp()
up := delta.AnyInterfaceUp()
if !up {
e.logf("LinkChange: all links down; pausing: %v", cur)
} else if changed {
e.logf("LinkChange: major, rebinding. New state: %v", cur)
e.logf("LinkChange: all links down; pausing: %v", delta.StateDesc())
} else if delta.RebindLikelyRequired {
e.logf("LinkChange: major, rebinding: %v", delta.StateDesc())
} else {
e.logf("[v1] LinkChange: minor")
}
e.health.SetAnyInterfaceUp(up)
e.magicConn.SetNetworkUp(up)
if !up || changed {
if !up || delta.RebindLikelyRequired {
if err := e.dns.FlushCaches(); err != nil {
e.logf("wgengine: dns flush failed after major link change: %v", err)
}
@@ -1230,16 +1370,27 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
// suspend/resume or whenever NetworkManager is started, it
// nukes all systemd-resolved configs. So reapply our DNS
// config on major link change.
// TODO: explain why this is ncessary not just on Linux but also android
// and Apple platforms.
if changed {
//
// On Darwin (netext), we reapply the DNS config when the interface flaps
// because the change in interface can potentially change the nameservers
// for the forwarder. On Darwin netext clients, magicDNS is ~always the default
// resolver so having no nameserver to forward queries to (or one on a network we
// are not currently on) breaks DNS resolution system-wide. There are notable
// timing issues here with Darwin's network stack. It is not guaranteed that
// the forward resolver will be available immediately after the interface
// comes up. We leave it to the network extension to also poke magicDNS directly
// via [dns.Manager.RecompileDNSConfig] when it detects any change in the
// nameservers.
//
// TODO: On Android, Darwin-tailscaled, and openbsd, why do we need this?
if delta.RebindLikelyRequired && up {
switch runtime.GOOS {
case "linux", "android", "ios", "darwin", "openbsd":
e.wgLock.Lock()
dnsCfg := e.lastDNSConfig
e.wgLock.Unlock()
if dnsCfg != nil {
if err := e.dns.Set(*dnsCfg); err != nil {
if dnsCfg.Valid() {
if err := e.dns.Set(*dnsCfg.AsStruct()); err != nil {
e.logf("wgengine: error setting DNS config after major link change: %v", err)
} else if err := e.reconfigureVPNIfNecessary(); err != nil {
e.logf("wgengine: error reconfiguring VPN after major link change: %v", err)
@@ -1250,22 +1401,32 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
}
}
e.magicConn.SetNetworkUp(up)
why := "link-change-minor"
if changed {
if delta.RebindLikelyRequired {
why = "link-change-major"
metricNumMajorChanges.Add(1)
e.magicConn.Rebind()
} else {
metricNumMinorChanges.Add(1)
}
e.magicConn.ReSTUN(why)
// If we're up and it's a minor change, just send a STUN ping
if up {
if delta.RebindLikelyRequired {
e.magicConn.Rebind()
}
e.magicConn.ReSTUN(why)
}
}
func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {
e.magicConn.SetNetworkMap(nm)
e.mu.Lock()
e.netMap = nm
e.mu.Unlock()
if e.networkLogger.Running() {
e.networkLogger.ReconfigNetworkMap(nm)
}
}
func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
@@ -1311,6 +1472,7 @@ func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size in
e.magicConn.Ping(peer, res, size, cb)
case "TSMP":
e.sendTSMPPing(ip, peer, res, cb)
e.sendTSMPDiscoAdvertisement(ip)
case "ICMP":
e.sendICMPEchoRequest(ip, peer, res, cb)
}
@@ -1431,6 +1593,29 @@ func (e *userspaceEngine) sendTSMPPing(ip netip.Addr, peer tailcfg.NodeView, res
e.tundev.InjectOutbound(tsmpPing)
}
func (e *userspaceEngine) sendTSMPDiscoAdvertisement(ip netip.Addr) {
srcIP, err := e.mySelfIPMatchingFamily(ip)
if err != nil {
e.logf("getting matching node: %s", err)
return
}
tdka := packet.TSMPDiscoKeyAdvertisement{
Src: srcIP,
Dst: ip,
Key: e.magicConn.DiscoPublicKey(),
}
payload, err := tdka.Marshal()
if err != nil {
e.logf("error generating TSMP Advertisement: %s", err)
metricTSMPDiscoKeyAdvertisementError.Add(1)
} else if err := e.tundev.InjectOutbound(payload); err != nil {
e.logf("error sending TSMP Advertisement: %s", err)
metricTSMPDiscoKeyAdvertisementError.Add(1)
} else {
metricTSMPDiscoKeyAdvertisementSent.Add(1)
}
}
func (e *userspaceEngine) setTSMPPongCallback(data [8]byte, cb func(packet.TSMPPongReply)) {
e.mu.Lock()
defer e.mu.Unlock()
@@ -1580,6 +1765,12 @@ type fwdDNSLinkSelector struct {
}
func (ls fwdDNSLinkSelector) PickLink(ip netip.Addr) (linkName string) {
// sandboxed macOS does not automatically bind to the loopback interface so
// we must be explicit about it.
if runtime.GOOS == "darwin" && ip.IsLoopback() {
return "lo0"
}
if ls.ue.isDNSIPOverTailscale.Load()(ip) {
return ls.tunName
}
@@ -1591,9 +1782,15 @@ var (
metricNumMajorChanges = clientmetric.NewCounter("wgengine_major_changes")
metricNumMinorChanges = clientmetric.NewCounter("wgengine_minor_changes")
metricTSMPDiscoKeyAdvertisementSent = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_sent")
metricTSMPDiscoKeyAdvertisementError = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_error")
)
func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) {
if !buildfeatures.HasCapture {
return
}
e.tundev.InstallCaptureHook(cb)
e.magicConn.InstallCaptureHook(cb)
}

View File

@@ -1,7 +1,7 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !js
//go:build !js && !ts_omit_debug
package wgengine
@@ -15,6 +15,7 @@ import (
"time"
"tailscale.com/envknob"
"tailscale.com/feature/buildfeatures"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/dns"
"tailscale.com/net/packet"
@@ -123,6 +124,12 @@ func (e *watchdogEngine) watchdog(name string, fn func()) {
func (e *watchdogEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config) error {
return e.watchdogErr("Reconfig", func() error { return e.wrap.Reconfig(cfg, routerCfg, dnsCfg) })
}
func (e *watchdogEngine) ResetAndStop() (st *Status, err error) {
e.watchdog("ResetAndStop", func() {
st, err = e.wrap.ResetAndStop()
})
return st, err
}
func (e *watchdogEngine) GetFilter() *filter.Filter {
return e.wrap.GetFilter()
}
@@ -163,6 +170,9 @@ func (e *watchdogEngine) Done() <-chan struct{} {
}
func (e *watchdogEngine) InstallCaptureHook(cb packet.CaptureCallback) {
if !buildfeatures.HasCapture {
return
}
e.wrap.InstallCaptureHook(cb)
}

View File

@@ -1,17 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build js
package wgengine
import "tailscale.com/net/dns/resolver"
type watchdogEngine struct {
Engine
wrap Engine
}
func (e *watchdogEngine) GetResolver() (r *resolver.Resolver, ok bool) {
return nil, false
}

8
vendor/tailscale.com/wgengine/watchdog_omit.go generated vendored Normal file
View File

@@ -0,0 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build js || ts_omit_debug
package wgengine
func NewWatchdog(e Engine) Engine { return e }

View File

@@ -6,8 +6,8 @@ package wgcfg
import (
"net/netip"
"slices"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logid"
)
@@ -17,8 +17,6 @@ import (
// Config is a WireGuard configuration.
// It only supports the set of things Tailscale uses.
type Config struct {
Name string
NodeID tailcfg.StableNodeID
PrivateKey key.NodePrivate
Addresses []netip.Prefix
MTU uint16
@@ -35,6 +33,18 @@ type Config struct {
}
}
func (c *Config) Equal(o *Config) bool {
if c == nil || o == nil {
return c == o
}
return c.PrivateKey.Equal(o.PrivateKey) &&
c.MTU == o.MTU &&
c.NetworkLogging == o.NetworkLogging &&
slices.Equal(c.Addresses, o.Addresses) &&
slices.Equal(c.DNS, o.DNS) &&
slices.EqualFunc(c.Peers, o.Peers, Peer.Equal)
}
type Peer struct {
PublicKey key.NodePublic
DiscoKey key.DiscoPublic // present only so we can handle restarts within wgengine, not passed to WireGuard
@@ -50,6 +60,24 @@ type Peer struct {
WGEndpoint key.NodePublic
}
func addrPtrEq(a, b *netip.Addr) bool {
if a == nil || b == nil {
return a == b
}
return *a == *b
}
func (p Peer) Equal(o Peer) bool {
return p.PublicKey == o.PublicKey &&
p.DiscoKey == o.DiscoKey &&
slices.Equal(p.AllowedIPs, o.AllowedIPs) &&
p.IsJailed == o.IsJailed &&
p.PersistentKeepalive == o.PersistentKeepalive &&
addrPtrEq(p.V4MasqAddr, o.V4MasqAddr) &&
addrPtrEq(p.V6MasqAddr, o.V6MasqAddr) &&
p.WGEndpoint == o.WGEndpoint
}
// PeerWithKey returns the Peer with key k and reports whether it was found.
func (config Config) PeerWithKey(k key.NodePublic) (Peer, bool) {
for _, p := range config.Peers {

View File

@@ -4,6 +4,7 @@
package wgcfg
import (
"errors"
"io"
"sort"
@@ -11,7 +12,6 @@ import (
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"tailscale.com/types/logger"
"tailscale.com/util/multierr"
)
// NewDevice returns a wireguard-go Device configured for Tailscale use.
@@ -31,7 +31,7 @@ func DeviceConfig(d *device.Device) (*Config, error) {
cfg, fromErr := FromUAPI(r)
r.Close()
getErr := <-errc
err := multierr.New(getErr, fromErr)
err := errors.Join(getErr, fromErr)
if err != nil {
return nil, err
}
@@ -64,5 +64,5 @@ func ReconfigDevice(d *device.Device, cfg *Config, logf logger.Logf) (err error)
toErr := cfg.ToUAPI(logf, w, prev)
w.Close()
setErr := <-errc
return multierr.New(setErr, toErr)
return errors.Join(setErr, toErr)
}

View File

@@ -5,12 +5,15 @@
package nmcfg
import (
"bytes"
"bufio"
"cmp"
"fmt"
"net/netip"
"strings"
"tailscale.com/net/tsaddr"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/logid"
"tailscale.com/types/netmap"
@@ -18,16 +21,7 @@ import (
)
func nodeDebugName(n tailcfg.NodeView) string {
name := n.Name()
if name == "" {
name = n.Hostinfo().Hostname()
}
if i := strings.Index(name, "."); i != -1 {
name = name[:i]
}
if name == "" && n.Addresses().Len() != 0 {
return n.Addresses().At(0).String()
}
name, _, _ := strings.Cut(cmp.Or(n.Name(), n.Hostinfo().Hostname()), ".")
return name
}
@@ -40,6 +34,9 @@ func cidrIsSubnet(node tailcfg.NodeView, cidr netip.Prefix) bool {
if !cidr.IsSingleIP() {
return true
}
if tsaddr.IsTailscaleIP(cidr.Addr()) {
return false
}
for _, selfCIDR := range node.Addresses().All() {
if cidr == selfCIDR {
return false
@@ -49,17 +46,15 @@ func cidrIsSubnet(node tailcfg.NodeView, cidr netip.Prefix) bool {
}
// WGCfg returns the NetworkMaps's WireGuard configuration.
func WGCfg(nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags, exitNode tailcfg.StableNodeID) (*wgcfg.Config, error) {
func WGCfg(pk key.NodePrivate, nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags, exitNode tailcfg.StableNodeID) (*wgcfg.Config, error) {
cfg := &wgcfg.Config{
Name: "tailscale",
PrivateKey: nm.PrivateKey,
PrivateKey: pk,
Addresses: nm.GetAddresses().AsSlice(),
Peers: make([]wgcfg.Peer, 0, len(nm.Peers)),
}
// Setup log IDs for data plane audit logging.
if nm.SelfNode.Valid() {
cfg.NodeID = nm.SelfNode.StableID()
canNetworkLog := nm.SelfNode.HasCap(tailcfg.CapabilityDataPlaneAuditLogs)
logExitFlowEnabled := nm.SelfNode.HasCap(tailcfg.NodeAttrLogExitFlows)
if canNetworkLog && nm.SelfNode.DataPlaneAuditLogID() != "" && nm.DomainAuditLogID != "" {
@@ -79,10 +74,7 @@ func WGCfg(nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags,
}
}
// Logging buffers
skippedUnselected := new(bytes.Buffer)
skippedSubnets := new(bytes.Buffer)
skippedExpired := new(bytes.Buffer)
var skippedExitNode, skippedSubnetRouter, skippedExpired []tailcfg.NodeView
for _, peer := range nm.Peers {
if peer.DiscoKey().IsZero() && peer.HomeDERP() == 0 && !peer.IsWireGuardOnly() {
@@ -95,16 +87,7 @@ func WGCfg(nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags,
// anyway, since control intentionally breaks node keys for
// expired peers so that we can't discover endpoints via DERP.
if peer.Expired() {
if skippedExpired.Len() >= 1<<10 {
if !bytes.HasSuffix(skippedExpired.Bytes(), []byte("...")) {
skippedExpired.WriteString("...")
}
} else {
if skippedExpired.Len() > 0 {
skippedExpired.WriteString(", ")
}
fmt.Fprintf(skippedExpired, "%s/%v", peer.StableID(), peer.Key().ShortString())
}
skippedExpired = append(skippedExpired, peer)
continue
}
@@ -114,28 +97,22 @@ func WGCfg(nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags,
})
cpeer := &cfg.Peers[len(cfg.Peers)-1]
didExitNodeWarn := false
didExitNodeLog := false
cpeer.V4MasqAddr = peer.SelfNodeV4MasqAddrForThisPeer().Clone()
cpeer.V6MasqAddr = peer.SelfNodeV6MasqAddrForThisPeer().Clone()
cpeer.IsJailed = peer.IsJailed()
for _, allowedIP := range peer.AllowedIPs().All() {
if allowedIP.Bits() == 0 && peer.StableID() != exitNode {
if didExitNodeWarn {
if didExitNodeLog {
// Don't log about both the IPv4 /0 and IPv6 /0.
continue
}
didExitNodeWarn = true
if skippedUnselected.Len() > 0 {
skippedUnselected.WriteString(", ")
}
fmt.Fprintf(skippedUnselected, "%q (%v)", nodeDebugName(peer), peer.Key().ShortString())
didExitNodeLog = true
skippedExitNode = append(skippedExitNode, peer)
continue
} else if cidrIsSubnet(peer, allowedIP) {
if (flags & netmap.AllowSubnetRoutes) == 0 {
if skippedSubnets.Len() > 0 {
skippedSubnets.WriteString(", ")
}
fmt.Fprintf(skippedSubnets, "%v from %q (%v)", allowedIP, nodeDebugName(peer), peer.Key().ShortString())
skippedSubnetRouter = append(skippedSubnetRouter, peer)
continue
}
}
@@ -143,14 +120,27 @@ func WGCfg(nm *netmap.NetworkMap, logf logger.Logf, flags netmap.WGConfigFlags,
}
}
if skippedUnselected.Len() > 0 {
logf("[v1] wgcfg: skipped unselected default routes from: %s", skippedUnselected.Bytes())
}
if skippedSubnets.Len() > 0 {
logf("[v1] wgcfg: did not accept subnet routes: %s", skippedSubnets)
}
if skippedExpired.Len() > 0 {
logf("[v1] wgcfg: skipped expired peer: %s", skippedExpired)
logList := func(title string, nodes []tailcfg.NodeView) {
if len(nodes) == 0 {
return
}
logf("[v1] wgcfg: %s from %d nodes: %s", title, len(nodes), logger.ArgWriter(func(bw *bufio.Writer) {
const max = 5
for i, n := range nodes {
if i == max {
fmt.Fprintf(bw, "... +%d", len(nodes)-max)
return
}
if i > 0 {
bw.WriteString(", ")
}
fmt.Fprintf(bw, "%s (%s)", nodeDebugName(n), n.StableID())
}
}))
}
logList("skipped unselected exit nodes", skippedExitNode)
logList("did not accept subnet routes", skippedSubnetRouter)
logList("skipped expired peers", skippedExpired)
return cfg, nil
}

View File

@@ -8,7 +8,6 @@ package wgcfg
import (
"net/netip"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logid"
"tailscale.com/types/ptr"
@@ -35,8 +34,6 @@ func (src *Config) Clone() *Config {
// A compilation failure here means this code must be regenerated, with the command at the top of this file.
var _ConfigCloneNeedsRegeneration = Config(struct {
Name string
NodeID tailcfg.StableNodeID
PrivateKey key.NodePrivate
Addresses []netip.Prefix
MTU uint16

View File

@@ -69,6 +69,13 @@ type Engine interface {
// The returned error is ErrNoChanges if no changes were made.
Reconfig(*wgcfg.Config, *router.Config, *dns.Config) error
// ResetAndStop resets the engine to a clean state (like calling Reconfig
// with all pointers to zero values) and waits for it to be fully stopped,
// with no live peers or DERPs.
//
// Unlike Reconfig, it does not return ErrNoChanges.
ResetAndStop() (*Status, error)
// PeerForIP returns the node to which the provided IP routes,
// if any. If none is found, (nil, false) is returned.
PeerForIP(netip.Addr) (_ PeerForIP, ok bool)

View File

@@ -1,192 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build windows
// Package winnet contains Windows-specific networking code.
package winnet
import (
"fmt"
"syscall"
"unsafe"
"github.com/go-ole/go-ole"
"github.com/go-ole/go-ole/oleutil"
)
const CLSID_NetworkListManager = "{DCB00C01-570F-4A9B-8D69-199FDBA5723B}"
var IID_INetwork = ole.NewGUID("{8A40A45D-055C-4B62-ABD7-6D613E2CEAEC}")
var IID_INetworkConnection = ole.NewGUID("{DCB00005-570F-4A9B-8D69-199FDBA5723B}")
type NetworkListManager struct {
d *ole.Dispatch
}
type INetworkConnection struct {
ole.IDispatch
}
type ConnectionList []*INetworkConnection
type INetworkConnectionVtbl struct {
ole.IDispatchVtbl
GetNetwork uintptr
Get_IsConnectedToInternet uintptr
Get_IsConnected uintptr
GetConnectivity uintptr
GetConnectionId uintptr
GetAdapterId uintptr
GetDomainType uintptr
}
type INetwork struct {
ole.IDispatch
}
type INetworkVtbl struct {
ole.IDispatchVtbl
GetName uintptr
SetName uintptr
GetDescription uintptr
SetDescription uintptr
GetNetworkId uintptr
GetDomainType uintptr
GetNetworkConnections uintptr
GetTimeCreatedAndConnected uintptr
Get_IsConnectedToInternet uintptr
Get_IsConnected uintptr
GetConnectivity uintptr
GetCategory uintptr
SetCategory uintptr
}
func NewNetworkListManager(c *ole.Connection) (*NetworkListManager, error) {
err := c.Create(CLSID_NetworkListManager)
if err != nil {
return nil, err
}
defer c.Release()
d, err := c.Dispatch()
if err != nil {
return nil, err
}
return &NetworkListManager{
d: d,
}, nil
}
func (m *NetworkListManager) Release() {
m.d.Release()
}
func (cl ConnectionList) Release() {
for _, v := range cl {
v.Release()
}
}
func asIID(u ole.UnknownLike, iid *ole.GUID) (*ole.IDispatch, error) {
if u == nil {
return nil, fmt.Errorf("asIID: nil UnknownLike")
}
d, err := u.QueryInterface(iid)
u.Release()
if err != nil {
return nil, err
}
return d, nil
}
func (m *NetworkListManager) GetNetworkConnections() (ConnectionList, error) {
ncraw, err := m.d.Call("GetNetworkConnections")
if err != nil {
return nil, err
}
nli := ncraw.ToIDispatch()
if nli == nil {
return nil, fmt.Errorf("GetNetworkConnections: not IDispatch")
}
cl := ConnectionList{}
err = oleutil.ForEach(nli, func(v *ole.VARIANT) error {
nc, err := asIID(v.ToIUnknown(), IID_INetworkConnection)
if err != nil {
return err
}
nco := (*INetworkConnection)(unsafe.Pointer(nc))
cl = append(cl, nco)
return nil
})
if err != nil {
cl.Release()
return nil, err
}
return cl, nil
}
func (n *INetwork) GetName() (string, error) {
v, err := n.CallMethod("GetName")
if err != nil {
return "", err
}
return v.ToString(), err
}
func (n *INetwork) GetCategory() (int32, error) {
var result int32
r, _, _ := syscall.SyscallN(
n.VTable().GetCategory,
uintptr(unsafe.Pointer(n)),
uintptr(unsafe.Pointer(&result)),
)
if int32(r) < 0 {
return 0, ole.NewError(r)
}
return result, nil
}
func (n *INetwork) SetCategory(v int32) error {
r, _, _ := syscall.SyscallN(
n.VTable().SetCategory,
uintptr(unsafe.Pointer(n)),
uintptr(v),
)
if int32(r) < 0 {
return ole.NewError(r)
}
return nil
}
func (n *INetwork) VTable() *INetworkVtbl {
return (*INetworkVtbl)(unsafe.Pointer(n.RawVTable))
}
func (v *INetworkConnection) VTable() *INetworkConnectionVtbl {
return (*INetworkConnectionVtbl)(unsafe.Pointer(v.RawVTable))
}
func (v *INetworkConnection) GetNetwork() (*INetwork, error) {
var result *INetwork
r, _, _ := syscall.SyscallN(
v.VTable().GetNetwork,
uintptr(unsafe.Pointer(v)),
uintptr(unsafe.Pointer(&result)),
)
if int32(r) < 0 {
return nil, ole.NewError(r)
}
return result, nil
}

View File

@@ -1,26 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package winnet
import (
"fmt"
"syscall"
"unsafe"
"github.com/go-ole/go-ole"
)
func (v *INetworkConnection) GetAdapterId() (string, error) {
buf := ole.GUID{}
hr, _, _ := syscall.Syscall(
v.VTable().GetAdapterId,
2,
uintptr(unsafe.Pointer(v)),
uintptr(unsafe.Pointer(&buf)),
0)
if hr != 0 {
return "", fmt.Errorf("GetAdapterId failed: %08x", hr)
}
return buf.String(), nil
}