This commit is contained in:
2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions

View File

@@ -20,37 +20,37 @@
package controlhttp
import (
"cmp"
"context"
"crypto/tls"
"encoding/base64"
"errors"
"fmt"
"io"
"math"
"net"
"net/http"
"net/http/httptrace"
"net/netip"
"net/url"
"runtime"
"sort"
"sync/atomic"
"time"
"tailscale.com/control/controlbase"
"tailscale.com/control/controlhttp/controlhttpcommon"
"tailscale.com/envknob"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/net/dnscache"
"tailscale.com/net/dnsfallback"
"tailscale.com/net/netutil"
"tailscale.com/net/netx"
"tailscale.com/net/sockstats"
"tailscale.com/net/tlsdial"
"tailscale.com/net/tshttpproxy"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
"tailscale.com/util/multierr"
)
var stdDialer net.Dialer
@@ -81,7 +81,7 @@ func (a *Dialer) getProxyFunc() func(*http.Request) (*url.URL, error) {
if a.proxyFunc != nil {
return a.proxyFunc
}
return tshttpproxy.ProxyFromEnvironment
return feature.HookProxyFromEnvironment.GetOrNil()
}
// httpsFallbackDelay is how long we'll wait for a.HTTPPort to work before
@@ -103,157 +103,71 @@ func (a *Dialer) dial(ctx context.Context) (*ClientConn, error) {
// host we know about.
useDialPlan := envknob.BoolDefaultTrue("TS_USE_CONTROL_DIAL_PLAN")
if !useDialPlan || a.DialPlan == nil || len(a.DialPlan.Candidates) == 0 {
return a.dialHost(ctx, netip.Addr{})
return a.dialHost(ctx)
}
candidates := a.DialPlan.Candidates
// Otherwise, we try dialing per the plan. Store the highest priority
// in the list, so that if we get a connection to one of those
// candidates we can return quickly.
var highestPriority int = math.MinInt
for _, c := range candidates {
if c.Priority > highestPriority {
highestPriority = c.Priority
}
}
// This context allows us to cancel in-flight connections if we get a
// highest-priority connection before we're all done.
// Create a context to be canceled as we return, so once we get a good connection,
// we can drop all the other ones.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Now, for each candidate, kick off a dial in parallel.
type dialResult struct {
conn *ClientConn
err error
addr netip.Addr
priority int
}
resultsCh := make(chan dialResult, len(candidates))
var pending atomic.Int32
pending.Store(int32(len(candidates)))
for _, c := range candidates {
go func(ctx context.Context, c tailcfg.ControlIPCandidate) {
var (
conn *ClientConn
err error
)
// Always send results back to our channel.
defer func() {
resultsCh <- dialResult{conn, err, c.IP, c.Priority}
if pending.Add(-1) == 0 {
close(resultsCh)
}
}()
// If non-zero, wait the configured start timeout
// before we do anything.
if c.DialStartDelaySec > 0 {
a.logf("[v2] controlhttp: waiting %.2f seconds before dialing %q @ %v", c.DialStartDelaySec, a.Hostname, c.IP)
tmr, tmrChannel := a.clock().NewTimer(time.Duration(c.DialStartDelaySec * float64(time.Second)))
defer tmr.Stop()
select {
case <-ctx.Done():
err = ctx.Err()
return
case <-tmrChannel:
}
}
// Now, create a sub-context with the given timeout and
// try dialing the provided host.
ctx, cancel := context.WithTimeout(ctx, time.Duration(c.DialTimeoutSec*float64(time.Second)))
defer cancel()
// This will dial, and the defer above sends it back to our parent.
a.logf("[v2] controlhttp: trying to dial %q @ %v", a.Hostname, c.IP)
conn, err = a.dialHost(ctx, c.IP)
}(ctx, c)
}
var results []dialResult
for res := range resultsCh {
// If we get a response that has the highest priority, we don't
// need to wait for any of the other connections to finish; we
// can just return this connection.
//
// TODO(andrew): we could make this better by keeping track of
// the highest remaining priority dynamically, instead of just
// checking for the highest total
if res.priority == highestPriority && res.conn != nil {
a.logf("[v1] controlhttp: high-priority success dialing %q @ %v from dial plan", a.Hostname, res.addr)
// Drain the channel and any existing connections in
// the background.
go func() {
for _, res := range results {
if res.conn != nil {
res.conn.Close()
}
}
for res := range resultsCh {
if res.conn != nil {
res.conn.Close()
}
}
if a.drainFinished != nil {
close(a.drainFinished)
}
}()
return res.conn, nil
}
// This isn't a highest-priority result, so just store it until
// we're done.
results = append(results, res)
}
// After we finish this function, close any remaining open connections.
defer func() {
for _, result := range results {
// Note: below, we nil out the returned connection (if
// any) in the slice so we don't close it.
if result.conn != nil {
result.conn.Close()
}
}
// We don't drain asynchronously after this point, so notify our
// channel when we return.
if a.drainFinished != nil {
close(a.drainFinished)
}
}()
// Sort by priority, then take the first non-error response.
sort.Slice(results, func(i, j int) bool {
// NOTE: intentionally inverted so that the highest priority
// item comes first
return results[i].priority > results[j].priority
})
var (
conn *ClientConn
errs []error
)
for i, result := range results {
if result.err != nil {
errs = append(errs, result.err)
continue
err error
}
resultsCh := make(chan dialResult) // unbuffered, never closed
dialCand := func(cand tailcfg.ControlIPCandidate) (*ClientConn, error) {
if cand.ACEHost != "" {
a.logf("[v2] controlhttp: waited %.2f seconds, dialing %q via ACE %s (%s)", cand.DialStartDelaySec, a.Hostname, cand.ACEHost, cmp.Or(cand.IP.String(), "dns"))
} else {
a.logf("[v2] controlhttp: waited %.2f seconds, dialing %q @ %s", cand.DialStartDelaySec, a.Hostname, cand.IP.String())
}
a.logf("[v1] controlhttp: succeeded dialing %q @ %v from dial plan", a.Hostname, result.addr)
conn = result.conn
results[i].conn = nil // so we don't close it in the defer
return conn, nil
ctx, cancel := context.WithTimeout(ctx, time.Duration(cand.DialTimeoutSec*float64(time.Second)))
defer cancel()
return a.dialHostOpt(ctx, cand.IP, cand.ACEHost)
}
merr := multierr.New(errs...)
// If we get here, then we didn't get anywhere with our dial plan; fall back to just using DNS.
a.logf("controlhttp: failed dialing using DialPlan, falling back to DNS; errs=%s", merr.Error())
return a.dialHost(ctx, netip.Addr{})
for _, cand := range candidates {
timer := time.AfterFunc(time.Duration(cand.DialStartDelaySec*float64(time.Second)), func() {
go func() {
conn, err := dialCand(cand)
select {
case resultsCh <- dialResult{conn, err}:
if err == nil {
a.logf("[v1] controlhttp: succeeded dialing %q @ %v from dial plan", a.Hostname, cmp.Or(cand.ACEHost, cand.IP.String()))
}
case <-ctx.Done():
if conn != nil {
conn.Close()
}
}
}()
})
defer timer.Stop()
}
var errs []error
for {
select {
case res := <-resultsCh:
if res.err == nil {
return res.conn, nil
}
errs = append(errs, res.err)
if len(errs) == len(candidates) {
// If we get here, then we didn't get anywhere with our dial plan; fall back to just using DNS.
a.logf("controlhttp: failed dialing using DialPlan, falling back to DNS; errs=%s", errors.Join(errs...))
return a.dialHost(ctx)
}
case <-ctx.Done():
a.logf("controlhttp: context aborted dialing")
return nil, ctx.Err()
}
}
}
// The TS_FORCE_NOISE_443 envknob forces the controlclient noise dialer to
@@ -270,6 +184,15 @@ var forceNoise443 = envknob.RegisterBool("TS_FORCE_NOISE_443")
// use HTTPS connections as its underlay connection (double crypto). This can
// be necessary when networks or middle boxes are messing with port 80.
func (d *Dialer) forceNoise443() bool {
if runtime.GOOS == "plan9" {
// For running demos of Plan 9 in a browser with network relays,
// we want to minimize the number of connections we're making.
// The main reason to use port 80 is to avoid double crypto
// costs server-side but the costs are tiny and number of Plan 9
// users doesn't make it worth it. Just disable this and always use
// HTTPS for Plan 9. That also reduces some log spam.
return true
}
if forceNoise443() {
return true
}
@@ -301,10 +224,19 @@ var debugNoiseDial = envknob.RegisterBool("TS_DEBUG_NOISE_DIAL")
// dialHost connects to the configured Dialer.Hostname and upgrades the
// connection into a controlbase.Conn.
func (a *Dialer) dialHost(ctx context.Context) (*ClientConn, error) {
return a.dialHostOpt(ctx,
netip.Addr{}, // no pre-resolved IP
"", // don't use ACE
)
}
// dialHostOpt connects to the configured Dialer.Hostname and upgrades the
// connection into a controlbase.Conn.
//
// If optAddr is valid, then no DNS is used and the connection will be made to the
// provided address.
func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn, error) {
func (a *Dialer) dialHostOpt(ctx context.Context, optAddr netip.Addr, optACEHost string) (*ClientConn, error) {
// Create one shared context used by both port 80 and port 443 dials.
// If port 80 is still in flight when 443 returns, this deferred cancel
// will stop the port 80 dial.
@@ -326,7 +258,7 @@ func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn,
Host: net.JoinHostPort(a.Hostname, strDef(a.HTTPSPort, "443")),
Path: serverUpgradePath,
}
if a.HTTPSPort == NoPort {
if a.HTTPSPort == NoPort || optACEHost != "" {
u443 = nil
}
@@ -338,11 +270,11 @@ func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn,
ch := make(chan tryURLRes) // must be unbuffered
try := func(u *url.URL) {
if debugNoiseDial() {
a.logf("trying noise dial (%v, %v) ...", u, optAddr)
a.logf("trying noise dial (%v, %v) ...", u, cmp.Or(optACEHost, optAddr.String()))
}
cbConn, err := a.dialURL(ctx, u, optAddr)
cbConn, err := a.dialURL(ctx, u, optAddr, optACEHost)
if debugNoiseDial() {
a.logf("noise dial (%v, %v) = (%v, %v)", u, optAddr, cbConn, err)
a.logf("noise dial (%v, %v) = (%v, %v)", u, cmp.Or(optACEHost, optAddr.String()), cbConn, err)
}
select {
case ch <- tryURLRes{u, cbConn, err}:
@@ -373,6 +305,9 @@ func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn,
}
var err80, err443 error
if forceTLS {
err80 = errors.New("TLS forced: no port 80 dialed")
}
for {
select {
case <-ctx.Done():
@@ -408,12 +343,12 @@ func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn,
//
// If optAddr is valid, then no DNS is used and the connection will be made to the
// provided address.
func (a *Dialer) dialURL(ctx context.Context, u *url.URL, optAddr netip.Addr) (*ClientConn, error) {
func (a *Dialer) dialURL(ctx context.Context, u *url.URL, optAddr netip.Addr, optACEHost string) (*ClientConn, error) {
init, cont, err := controlbase.ClientDeferred(a.MachineKey, a.ControlKey, a.ProtocolVersion)
if err != nil {
return nil, err
}
netConn, err := a.tryURLUpgrade(ctx, u, optAddr, init)
netConn, err := a.tryURLUpgrade(ctx, u, optAddr, optACEHost, init)
if err != nil {
return nil, err
}
@@ -459,13 +394,15 @@ var macOSScreenTime = health.Register(&health.Warnable{
ImpactsConnectivity: true,
})
var HookMakeACEDialer feature.Hook[func(dialer netx.DialFunc, aceHost string, optIP netip.Addr) netx.DialFunc]
// tryURLUpgrade connects to u, and tries to upgrade it to a net.Conn.
//
// If optAddr is valid, then no DNS is used and the connection will be made to
// the provided address.
//
// Only the provided ctx is used, not a.ctx.
func (a *Dialer) tryURLUpgrade(ctx context.Context, u *url.URL, optAddr netip.Addr, init []byte) (_ net.Conn, retErr error) {
func (a *Dialer) tryURLUpgrade(ctx context.Context, u *url.URL, optAddr netip.Addr, optACEHost string, init []byte) (_ net.Conn, retErr error) {
var dns *dnscache.Resolver
// If we were provided an address to dial, then create a resolver that just
@@ -480,13 +417,24 @@ func (a *Dialer) tryURLUpgrade(ctx context.Context, u *url.URL, optAddr netip.Ad
dns = a.resolver()
}
var dialer dnscache.DialContextFunc
var dialer netx.DialFunc
if a.Dialer != nil {
dialer = a.Dialer
} else {
dialer = stdDialer.DialContext
}
if optACEHost != "" {
if !buildfeatures.HasACE {
return nil, feature.ErrUnavailable
}
f, ok := HookMakeACEDialer.GetOk()
if !ok {
return nil, feature.ErrUnavailable
}
dialer = f(dialer, optACEHost, optAddr)
}
// On macOS, see if Screen Time is blocking things.
if runtime.GOOS == "darwin" {
var proxydIntercepted atomic.Bool // intercepted by macOS webfilterproxyd
@@ -513,13 +461,25 @@ func (a *Dialer) tryURLUpgrade(ctx context.Context, u *url.URL, optAddr netip.Ad
tr := http.DefaultTransport.(*http.Transport).Clone()
defer tr.CloseIdleConnections()
tr.Proxy = a.getProxyFunc()
tshttpproxy.SetTransportGetProxyConnectHeader(tr)
tr.DialContext = dnscache.Dialer(dialer, dns)
if optACEHost != "" {
// If using ACE, we don't want to use any HTTP proxy.
// ACE is already a tunnel+proxy.
// TODO(tailscale/corp#32483): use system proxy too?
tr.Proxy = nil
tr.DialContext = dialer
} else {
if buildfeatures.HasUseProxy {
tr.Proxy = a.getProxyFunc()
if set, ok := feature.HookProxySetTransportGetProxyConnectHeader.GetOk(); ok {
set(tr)
}
}
tr.DialContext = dnscache.Dialer(dialer, dns)
}
// Disable HTTP2, since h2 can't do protocol switching.
tr.TLSClientConfig.NextProtos = []string{}
tr.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{}
tr.TLSClientConfig = tlsdial.Config(a.Hostname, a.HealthTracker, tr.TLSClientConfig)
tr.TLSClientConfig = tlsdial.Config(a.HealthTracker, tr.TLSClientConfig)
if !tr.TLSClientConfig.InsecureSkipVerify {
panic("unexpected") // should be set by tlsdial.Config
}

View File

@@ -12,6 +12,7 @@ import (
"tailscale.com/health"
"tailscale.com/net/dnscache"
"tailscale.com/net/netmon"
"tailscale.com/net/netx"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
"tailscale.com/types/key"
@@ -66,7 +67,7 @@ type Dialer struct {
// Dialer is the dialer used to make outbound connections.
//
// If not specified, this defaults to net.Dialer.DialContext.
Dialer dnscache.DialContextFunc
Dialer netx.DialFunc
// DNSCache is the caching Resolver used by this Dialer.
//
@@ -77,8 +78,8 @@ type Dialer struct {
// dropped.
Logf logger.Logf
// NetMon is the [netmon.Monitor] to use for this Dialer. It must be
// non-nil.
// NetMon is the [netmon.Monitor] to use for this Dialer.
// It is optional.
NetMon *netmon.Monitor
// HealthTracker, if non-nil, is the health tracker to use.
@@ -97,7 +98,6 @@ type Dialer struct {
logPort80Failure atomic.Bool
// For tests only
drainFinished chan struct{}
omitCertErrorLogging bool
testFallbackDelay time.Duration