This commit is contained in:
2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions

View File

@@ -8,14 +8,18 @@ import (
"html"
"net/http"
"strconv"
"sync"
"sync/atomic"
"time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/syncs"
)
func init() {
if !buildfeatures.HasDNS {
return
}
health.RegisterDebugHandler("dnsfwd", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n, _ := strconv.Atoi(r.FormValue("n"))
if n <= 0 {
@@ -35,7 +39,7 @@ func init() {
var fwdLogAtomic atomic.Pointer[fwdLog]
type fwdLog struct {
mu sync.Mutex
mu syncs.Mutex
pos int // ent[pos] is next entry
ent []fwdLogEntry
}

View File

@@ -17,6 +17,7 @@ import (
"net/http"
"net/netip"
"net/url"
"runtime"
"sort"
"strings"
"sync"
@@ -26,13 +27,17 @@ import (
dns "golang.org/x/net/dns/dnsmessage"
"tailscale.com/control/controlknobs"
"tailscale.com/envknob"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/net/dns/publicdns"
"tailscale.com/net/dnscache"
"tailscale.com/net/neterror"
"tailscale.com/net/netmon"
"tailscale.com/net/netx"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsdial"
"tailscale.com/syncs"
"tailscale.com/types/dnstype"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
@@ -215,18 +220,19 @@ type resolverAndDelay struct {
// forwarder forwards DNS packets to a number of upstream nameservers.
type forwarder struct {
logf logger.Logf
netMon *netmon.Monitor // always non-nil
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
dialer *tsdial.Dialer
health *health.Tracker // always non-nil
logf logger.Logf
netMon *netmon.Monitor // always non-nil
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
dialer *tsdial.Dialer
health *health.Tracker // always non-nil
verboseFwd bool // if true, log all DNS forwarding
controlKnobs *controlknobs.Knobs // or nil
ctx context.Context // good until Close
ctxCancel context.CancelFunc // closes ctx
mu sync.Mutex // guards following
mu syncs.Mutex // guards following
dohClient map[string]*http.Client // urlBase -> client
@@ -243,26 +249,23 @@ type forwarder struct {
// /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub
// resolver lookup.
cloudHostFallback []resolverAndDelay
// missingUpstreamRecovery, if non-nil, is set called when a SERVFAIL is
// returned due to missing upstream resolvers.
//
// This should attempt to properly (re)set the upstream resolvers.
missingUpstreamRecovery func()
}
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *forwarder {
if !buildfeatures.HasDNS {
return nil
}
if netMon == nil {
panic("nil netMon")
}
f := &forwarder{
logf: logger.WithPrefix(logf, "forward: "),
netMon: netMon,
linkSel: linkSel,
dialer: dialer,
health: health,
controlKnobs: knobs,
missingUpstreamRecovery: func() {},
logf: logger.WithPrefix(logf, "forward: "),
netMon: netMon,
linkSel: linkSel,
dialer: dialer,
health: health,
controlKnobs: knobs,
verboseFwd: verboseDNSForward(),
}
f.ctx, f.ctxCancel = context.WithCancel(context.Background())
return f
@@ -520,15 +523,18 @@ var (
//
// send expects the reply to have the same txid as txidOut.
func (f *forwarder) send(ctx context.Context, fq *forwardQuery, rr resolverAndDelay) (ret []byte, err error) {
if verboseDNSForward() {
if f.verboseFwd {
id := forwarderCount.Add(1)
domain, typ, _ := nameFromQuery(fq.packet)
f.logf("forwarder.send(%q, %d, %v, %d) [%d] ...", rr.name.Addr, fq.txid, typ, len(domain), id)
f.logf("forwarder.send(%q, %d, %v, %d) from %v [%d] ...", rr.name.Addr, fq.txid, typ, len(domain), fq.src, id)
defer func() {
f.logf("forwarder.send(%q, %d, %v, %d) [%d] = %v, %v", rr.name.Addr, fq.txid, typ, len(domain), id, len(ret), err)
f.logf("forwarder.send(%q, %d, %v, %d) from %v [%d] = %v, %v", rr.name.Addr, fq.txid, typ, len(domain), fq.src, id, len(ret), err)
}()
}
if strings.HasPrefix(rr.name.Addr, "http://") {
if !buildfeatures.HasPeerAPIClient {
return nil, feature.ErrUnavailable
}
return f.sendDoH(ctx, rr.name.Addr, f.dialer.PeerAPIHTTPClient(), fq.packet)
}
if strings.HasPrefix(rr.name.Addr, "https://") {
@@ -739,18 +745,38 @@ func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAn
return out, nil
}
func (f *forwarder) getDialerType() dnscache.DialContextFunc {
if f.controlKnobs != nil && f.controlKnobs.UserDialUseRoutes.Load() {
// It is safe to use UserDial as it dials external servers without going through Tailscale
// and closes connections on interface change in the same way as SystemDial does,
// thus preventing DNS resolution issues when switching between WiFi and cellular,
// but can also dial an internal DNS server on the Tailnet or via a subnet router.
//
// TODO(nickkhyl): Update tsdial.Dialer to reuse the bart.Table we create in net/tstun.Wrapper
// to avoid having two bart tables in memory, especially on iOS. Once that's done,
// we can get rid of the nodeAttr/control knob and always use UserDial for DNS.
//
// See https://github.com/tailscale/tailscale/issues/12027.
var optDNSForwardUseRoutes = envknob.RegisterOptBool("TS_DEBUG_DNS_FORWARD_USE_ROUTES")
// ShouldUseRoutes reports whether the DNS resolver should consider routes when dialing
// upstream nameservers via TCP.
//
// If true, routes should be considered ([tsdial.Dialer.UserDial]), otherwise defer
// to the system routes ([tsdial.Dialer.SystemDial]).
//
// TODO(nickkhyl): Update [tsdial.Dialer] to reuse the bart.Table we create in net/tstun.Wrapper
// to avoid having two bart tables in memory, especially on iOS. Once that's done,
// we can get rid of the nodeAttr/control knob and always use UserDial for DNS.
//
// See tailscale/tailscale#12027.
func ShouldUseRoutes(knobs *controlknobs.Knobs) bool {
if !buildfeatures.HasDNS {
return false
}
switch runtime.GOOS {
case "android", "ios":
// On mobile platforms with lower memory limits (e.g., 50MB on iOS),
// this behavior is still gated by the "user-dial-routes" nodeAttr.
return knobs != nil && knobs.UserDialUseRoutes.Load()
default:
// On all other platforms, it is the default behavior,
// but it can be overridden with the "TS_DEBUG_DNS_FORWARD_USE_ROUTES" env var.
doNotUseRoutes := optDNSForwardUseRoutes().EqualBool(false)
return !doNotUseRoutes
}
}
func (f *forwarder) getDialerType() netx.DialFunc {
if ShouldUseRoutes(f.controlKnobs) {
return f.dialer.UserDial
}
return f.dialer.SystemDial
@@ -878,6 +904,7 @@ type forwardQuery struct {
txid txid
packet []byte
family string // "tcp" or "udp"
src netip.AddrPort
// closeOnCtxDone lets send register values to Close if the
// caller's ctx expires. This avoids send from allocating its
@@ -943,13 +970,6 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: ""})
f.logf("no upstream resolvers set, returning SERVFAIL")
// Attempt to recompile the DNS configuration
// If we are being asked to forward queries and we have no
// nameservers, the network is in a bad state.
if f.missingUpstreamRecovery != nil {
f.missingUpstreamRecovery()
}
res, err := servfailResponse(query)
if err != nil {
return err
@@ -969,11 +989,12 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
txid: getTxID(query.bs),
packet: query.bs,
family: query.family,
src: query.addr,
closeOnCtxDone: new(closePool),
}
defer fq.closeOnCtxDone.Close()
if verboseDNSForward() {
if f.verboseFwd {
domainSha256 := sha256.Sum256([]byte(domain))
domainSig := base64.RawStdEncoding.EncodeToString(domainSha256[:3])
f.logf("request(%d, %v, %d, %s) %d...", fq.txid, typ, len(domain), domainSig, len(fq.packet))
@@ -1018,7 +1039,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
metricDNSFwdErrorContext.Add(1)
return fmt.Errorf("waiting to send response: %w", ctx.Err())
case responseChan <- packet{v, query.family, query.addr}:
if verboseDNSForward() {
if f.verboseFwd {
f.logf("response(%d, %v, %d) = %d, nil", fq.txid, typ, len(domain), len(v))
}
metricDNSFwdSuccess.Add(1)
@@ -1048,7 +1069,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
}
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: strings.Join(resolverAddrs, ",")})
case responseChan <- res:
if verboseDNSForward() {
if f.verboseFwd {
f.logf("forwarder response(%d, %v, %d) = %d, %v", fq.txid, typ, len(domain), len(res.bs), firstErr)
}
return nil

View File

@@ -25,6 +25,8 @@ import (
dns "golang.org/x/net/dns/dnsmessage"
"tailscale.com/control/controlknobs"
"tailscale.com/envknob"
"tailscale.com/feature"
"tailscale.com/feature/buildfeatures"
"tailscale.com/health"
"tailscale.com/net/dns/resolvconffile"
"tailscale.com/net/netaddr"
@@ -212,7 +214,7 @@ type Resolver struct {
closed chan struct{}
// mu guards the following fields from being updated while used.
mu sync.Mutex
mu syncs.Mutex
localDomains []dnsname.FQDN
hostToIP map[dnsname.FQDN][]netip.Addr
ipToHost map[netip.Addr]dnsname.FQDN
@@ -251,18 +253,12 @@ func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, h
return r
}
// SetMissingUpstreamRecovery sets a callback to be called upon encountering
// a SERVFAIL due to missing upstream resolvers.
//
// This call should only happen before the resolver is used. It is not safe
// for concurrent use.
func (r *Resolver) SetMissingUpstreamRecovery(f func()) {
r.forwarder.missingUpstreamRecovery = f
}
func (r *Resolver) TestOnlySetHook(hook func(Config)) { r.saveConfigForTests = hook }
func (r *Resolver) SetConfig(cfg Config) error {
if !buildfeatures.HasDNS {
return nil
}
if r.saveConfigForTests != nil {
r.saveConfigForTests(cfg)
}
@@ -288,6 +284,9 @@ func (r *Resolver) SetConfig(cfg Config) error {
// Close shuts down the resolver and ensures poll goroutines have exited.
// The Resolver cannot be used again after Close is called.
func (r *Resolver) Close() {
if !buildfeatures.HasDNS {
return
}
select {
case <-r.closed:
return
@@ -305,6 +304,9 @@ func (r *Resolver) Close() {
const dnsQueryTimeout = 10 * time.Second
func (r *Resolver) Query(ctx context.Context, bs []byte, family string, from netip.AddrPort) ([]byte, error) {
if !buildfeatures.HasDNS {
return nil, feature.ErrUnavailable
}
metricDNSQueryLocal.Add(1)
select {
case <-r.closed:
@@ -332,6 +334,9 @@ func (r *Resolver) Query(ctx context.Context, bs []byte, family string, from net
// GetUpstreamResolvers returns the resolvers that would be used to resolve
// the given FQDN.
func (r *Resolver) GetUpstreamResolvers(name dnsname.FQDN) []*dnstype.Resolver {
if !buildfeatures.HasDNS {
return nil
}
return r.forwarder.GetUpstreamResolvers(name)
}
@@ -360,6 +365,9 @@ func parseExitNodeQuery(q []byte) *response {
// and a nil error.
// TODO: figure out if we even need an error result.
func (r *Resolver) HandlePeerDNSQuery(ctx context.Context, q []byte, from netip.AddrPort, allowName func(name string) bool) (res []byte, err error) {
if !buildfeatures.HasDNS {
return nil, feature.ErrUnavailable
}
metricDNSExitProxyQuery.Add(1)
ch := make(chan packet, 1)
@@ -436,6 +444,9 @@ var debugExitNodeDNSNetPkg = envknob.RegisterBool("TS_DEBUG_EXIT_NODE_DNS_NET_PK
// response contains the pre-serialized response, which notably
// includes the original question and its header.
func handleExitNodeDNSQueryWithNetPkg(ctx context.Context, logf logger.Logf, resolver *net.Resolver, resp *response) (res []byte, err error) {
if !buildfeatures.HasDNS {
return nil, feature.ErrUnavailable
}
logf = logger.WithPrefix(logf, "exitNodeDNSQueryWithNetPkg: ")
if resp.Question.Class != dns.ClassINET {
return nil, errors.New("unsupported class")
@@ -1256,6 +1267,9 @@ func (r *Resolver) respondReverse(query []byte, name dnsname.FQDN, resp *respons
// respond returns a DNS response to query if it can be resolved locally.
// Otherwise, it returns errNotOurName.
func (r *Resolver) respond(query []byte) ([]byte, error) {
if !buildfeatures.HasDNS {
return nil, feature.ErrUnavailable
}
parser := dnsParserPool.Get().(*dnsParser)
defer dnsParserPool.Put(parser)