This commit is contained in:
2026-02-19 10:07:43 +00:00
parent 007438e372
commit 6e637ecf77
1763 changed files with 60820 additions and 279516 deletions

345
vendor/tailscale.com/health/health.go generated vendored
View File

@@ -8,7 +8,6 @@ package health
import (
"context"
"errors"
"expvar"
"fmt"
"maps"
"net/http"
@@ -20,20 +19,19 @@ import (
"time"
"tailscale.com/envknob"
"tailscale.com/metrics"
"tailscale.com/feature/buildfeatures"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
"tailscale.com/types/opt"
"tailscale.com/util/cibuild"
"tailscale.com/util/eventbus"
"tailscale.com/util/mak"
"tailscale.com/util/multierr"
"tailscale.com/util/set"
"tailscale.com/util/usermetric"
"tailscale.com/version"
)
var (
mu sync.Mutex
mu syncs.Mutex
debugHandler map[string]http.Handler
)
@@ -64,6 +62,21 @@ var receiveNames = []string{
// Tracker tracks the health of various Tailscale subsystems,
// comparing each subsystems' state with each other to make sure
// they're consistent based on the user's intended state.
//
// If a client [Warnable] becomes unhealthy or its unhealthy state is updated,
// an event will be emitted with WarnableChanged set to true and the Warnable
// and its UnhealthyState:
//
// Change{WarnableChanged: true, Warnable: w, UnhealthyState: us}
//
// If a Warnable becomes healthy, an event will be emitted with
// WarnableChanged set to true, the Warnable set, and UnhealthyState set to nil:
//
// Change{WarnableChanged: true, Warnable: w, UnhealthyState: nil}
//
// If the health messages from the control-plane change, an event will be
// emitted with ControlHealthChanged set to true. Recipients can fetch the set of
// control-plane health messages by calling [Tracker.CurrentState]:
type Tracker struct {
// MagicSockReceiveFuncs tracks the state of the three
// magicsock receive functions: IPv4, IPv6, and DERP.
@@ -76,6 +89,9 @@ type Tracker struct {
testClock tstime.Clock // nil means use time.Now / tstime.StdClock{}
eventClient *eventbus.Client
changePub *eventbus.Publisher[Change]
// mu guards everything that follows.
mu sync.Mutex
@@ -87,35 +103,69 @@ type Tracker struct {
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
// Deprecated: using Warnables should be preferred
sysErr map[Subsystem]error
watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes
timer tstime.TimerController
sysErr map[Subsystem]error
timer tstime.TimerController
latestVersion *tailcfg.ClientVersion // or nil
checkForUpdates bool
applyUpdates opt.Bool
inMapPoll bool
inMapPollSince time.Time
lastMapPollEndedAt time.Time
lastStreamedMapResponse time.Time
lastNoiseDial time.Time
derpHomeRegion int
derpHomeless bool
derpRegionConnected map[int]bool
derpRegionHealthProblem map[int]string
derpRegionLastFrame map[int]time.Time
derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string
ipnWantRunning bool
ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true
anyInterfaceUp opt.Bool // empty means unknown (assume true)
controlHealth []string
lastLoginErr error
localLogConfigErr error
tlsConnectionErrors map[string]error // map[ServerName]error
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel]
inMapPoll bool
inMapPollSince time.Time
lastMapPollEndedAt time.Time
lastStreamedMapResponse time.Time
lastNoiseDial time.Time
derpHomeRegion int
derpHomeless bool
derpRegionConnected map[int]bool
derpRegionHealthProblem map[int]string
derpRegionLastFrame map[int]time.Time
derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string
ipnWantRunning bool
ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true
anyInterfaceUp opt.Bool // empty means unknown (assume true)
lastNotifiedControlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages processed, kept for change detection
controlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages received
lastLoginErr error
localLogConfigErr error
tlsConnectionErrors map[string]error // map[ServerName]error
metricHealthMessage any // nil or *metrics.MultiLabelMap[metricHealthMessageLabel]
}
// NewTracker contructs a new [Tracker] and attaches the given eventbus.
// NewTracker will panic is no eventbus is given.
func NewTracker(bus *eventbus.Bus) *Tracker {
if !buildfeatures.HasHealth {
return &Tracker{}
}
if bus == nil {
panic("no eventbus set")
}
ec := bus.Client("health.Tracker")
t := &Tracker{
eventClient: ec,
changePub: eventbus.Publish[Change](ec),
}
t.timer = t.clock().AfterFunc(time.Minute, t.timerSelfCheck)
ec.Monitor(t.awaitEventClientDone)
return t
}
func (t *Tracker) awaitEventClientDone(ec *eventbus.Client) {
<-ec.Done()
t.mu.Lock()
defer t.mu.Unlock()
for _, timer := range t.pendingVisibleTimers {
timer.Stop()
}
t.timer.Stop()
clear(t.pendingVisibleTimers)
}
func (t *Tracker) now() time.Time {
@@ -173,6 +223,9 @@ const legacyErrorArgKey = "LegacyError"
// temporarily (2024-06-14) while we migrate the old health infrastructure based
// on Subsystems to the new Warnables architecture.
func (s Subsystem) Warnable() *Warnable {
if !buildfeatures.HasHealth {
return &noopWarnable
}
w, ok := subsystemsWarnables[s]
if !ok {
panic(fmt.Sprintf("health: no Warnable for Subsystem %q", s))
@@ -182,10 +235,15 @@ func (s Subsystem) Warnable() *Warnable {
var registeredWarnables = map[WarnableCode]*Warnable{}
var noopWarnable Warnable
// Register registers a new Warnable with the health package and returns it.
// Register panics if the Warnable was already registered, because Warnables
// should be unique across the program.
func Register(w *Warnable) *Warnable {
if !buildfeatures.HasHealth {
return &noopWarnable
}
if registeredWarnables[w.Code] != nil {
panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code))
}
@@ -197,6 +255,9 @@ func Register(w *Warnable) *Warnable {
// unregister removes a Warnable from the health package. It should only be used
// for testing purposes.
func unregister(w *Warnable) {
if !buildfeatures.HasHealth {
return
}
if registeredWarnables[w.Code] == nil {
panic(fmt.Sprintf("health: attempting to unregister Warnable %q that was not registered", w.Code))
}
@@ -207,13 +268,15 @@ func unregister(w *Warnable) {
// the program.
type WarnableCode string
// A Warnable is something that we might want to warn the user about, or not. A Warnable is either
// in an healthy or unhealth state. A Warnable is unhealthy if the Tracker knows about a WarningState
// affecting the Warnable.
// In most cases, Warnables are components of the backend (for instance, "DNS" or "Magicsock").
// Warnables are similar to the Subsystem type previously used in this package, but they provide
// a unique identifying code for each Warnable, along with more metadata that makes it easier for
// a GUI to display the Warnable in a user-friendly way.
// A Warnable is something that we might want to warn the user about, or not. A
// Warnable is either in a healthy or unhealthy state. A Warnable is unhealthy if
// the Tracker knows about a WarningState affecting the Warnable.
//
// In most cases, Warnables are components of the backend (for instance, "DNS"
// or "Magicsock"). Warnables are similar to the Subsystem type previously used
// in this package, but they provide a unique identifying code for each
// Warnable, along with more metadata that makes it easier for a GUI to display
// the Warnable in a user-friendly way.
type Warnable struct {
// Code is a string that uniquely identifies this Warnable across the entire Tailscale backend,
// and can be mapped to a user-displayable localized string.
@@ -267,6 +330,9 @@ func StaticMessage(s string) func(Args) string {
// some lost Tracker plumbing, we want to capture stack trace
// samples when it occurs.
func (t *Tracker) nil() bool {
if !buildfeatures.HasHealth {
return true
}
if t != nil {
return false
}
@@ -335,31 +401,16 @@ func (w *Warnable) IsVisible(ws *warningState, clockNow func() time.Time) bool {
return clockNow().Sub(ws.BrokenSince) >= w.TimeToVisible
}
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
// a usermetric.Registry and registers the metrics there.
func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
if reg == nil || t.metricHealthMessage != nil {
return
// IsUnhealthy reports whether the current state is unhealthy because the given
// warnable is set.
func (t *Tracker) IsUnhealthy(w *Warnable) bool {
if !buildfeatures.HasHealth || t.nil() {
return false
}
t.metricHealthMessage = usermetric.NewMultiLabelMapWithRegistry[metricHealthMessageLabel](
reg,
"tailscaled_health_messages",
"gauge",
"Number of health messages broken down by type.",
)
t.metricHealthMessage.Set(metricHealthMessageLabel{
Type: MetricLabelWarning,
}, expvar.Func(func() any {
if t.nil() {
return 0
}
t.mu.Lock()
defer t.mu.Unlock()
t.updateBuiltinWarnablesLocked()
return int64(len(t.stringsLocked()))
}))
t.mu.Lock()
defer t.mu.Unlock()
_, exists := t.warnableVal[w]
return exists
}
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
@@ -367,7 +418,7 @@ func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is
// needed for the unhealthy state.
func (t *Tracker) SetUnhealthy(w *Warnable, args Args) {
if t.nil() {
if !buildfeatures.HasHealth || t.nil() {
return
}
t.mu.Lock()
@@ -376,7 +427,7 @@ func (t *Tracker) SetUnhealthy(w *Warnable, args Args) {
}
func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
if w == nil {
if !buildfeatures.HasHealth || w == nil {
return
}
@@ -397,25 +448,26 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
prevWs := t.warnableVal[w]
mak.Set(&t.warnableVal, w, ws)
if !ws.Equal(prevWs) {
for _, cb := range t.watchers {
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
// becomes visible.
if w.IsVisible(ws, t.now) {
go cb(w, w.unhealthyState(ws))
continue
}
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
// minus the time that has already passed since the Warnable became unhealthy.
change := Change{
WarnableChanged: true,
Warnable: w,
UnhealthyState: w.unhealthyState(ws),
}
// Publish the change to the event bus. If the change is already visible
// now, publish it immediately; otherwise queue a timer to publish it at
// a future time when it becomes visible.
if w.IsVisible(ws, t.now) {
t.changePub.Publish(change)
} else {
visibleIn := w.TimeToVisible - t.now().Sub(brokenSince)
var tc tstime.TimerController = t.clock().AfterFunc(visibleIn, func() {
tc := t.clock().AfterFunc(visibleIn, func() {
t.mu.Lock()
defer t.mu.Unlock()
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
// the timer was set for and the time it was executed.
if t.warnableVal[w] != nil {
go cb(w, w.unhealthyState(ws))
t.changePub.Publish(change)
delete(t.pendingVisibleTimers, w)
}
})
@@ -426,7 +478,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
// SetHealthy removes any warningState for the given Warnable.
func (t *Tracker) SetHealthy(w *Warnable) {
if t.nil() {
if !buildfeatures.HasHealth || t.nil() {
return
}
t.mu.Lock()
@@ -435,7 +487,7 @@ func (t *Tracker) SetHealthy(w *Warnable) {
}
func (t *Tracker) setHealthyLocked(w *Warnable) {
if t.warnableVal[w] == nil {
if !buildfeatures.HasHealth || t.warnableVal[w] == nil {
// Nothing to remove
return
}
@@ -448,9 +500,20 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
delete(t.pendingVisibleTimers, w)
}
for _, cb := range t.watchers {
go cb(w, nil)
change := Change{
WarnableChanged: true,
Warnable: w,
}
t.changePub.Publish(change)
}
// notifyWatchersControlChangedLocked calls each watcher to signal that control
// health messages have changed (and should be fetched via CurrentState).
func (t *Tracker) notifyWatchersControlChangedLocked() {
change := Change{
ControlHealthChanged: true,
}
t.changePub.Publish(change)
}
// AppendWarnableDebugFlags appends to base any health items that are currently in failed
@@ -476,35 +539,23 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string {
return ret
}
// RegisterWatcher adds a function that will be called whenever the health state of any Warnable changes.
// If a Warnable becomes unhealthy or its unhealthy state is updated, the callback will be called with its
// current Representation.
// If a Warnable becomes healthy, the callback will be called with ws set to nil.
// The provided callback function will be executed in its own goroutine. The returned function can be used
// to unregister the callback.
func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) {
if t.nil() {
return func() {}
}
t.initOnce.Do(t.doOnceInit)
t.mu.Lock()
defer t.mu.Unlock()
if t.watchers == nil {
t.watchers = set.HandleSet[func(*Warnable, *UnhealthyState)]{}
}
handle := t.watchers.Add(cb)
if t.timer == nil {
t.timer = t.clock().AfterFunc(time.Minute, t.timerSelfCheck)
}
return func() {
t.mu.Lock()
defer t.mu.Unlock()
delete(t.watchers, handle)
if len(t.watchers) == 0 && t.timer != nil {
t.timer.Stop()
t.timer = nil
}
}
// Change is used to communicate a change to health. This could either be due to
// a Warnable changing from health to unhealthy (or vice-versa), or because the
// health messages received from the control-plane have changed.
//
// Exactly one *Changed field will be true.
type Change struct {
// ControlHealthChanged indicates it was health messages from the
// control-plane server that changed.
ControlHealthChanged bool
// WarnableChanged indicates it was a client Warnable which changed state.
WarnableChanged bool
// Warnable is whose health changed, as indicated in UnhealthyState.
Warnable *Warnable
// UnhealthyState is set if the changed Warnable is now unhealthy, or nil
// if Warnable is now healthy.
UnhealthyState *UnhealthyState
}
// SetRouterHealth sets the state of the wgengine/router.Router.
@@ -637,13 +688,15 @@ func (t *Tracker) updateLegacyErrorWarnableLocked(key Subsystem, err error) {
}
}
func (t *Tracker) SetControlHealth(problems []string) {
func (t *Tracker) SetControlHealth(problems map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage) {
if t.nil() {
return
}
t.mu.Lock()
defer t.mu.Unlock()
t.controlHealth = problems
t.controlMessages = problems
t.selfCheckLocked()
}
@@ -927,8 +980,8 @@ func (t *Tracker) selfCheckLocked() {
// OverallError returns a summary of the health state.
//
// If there are multiple problems, the error will be of type
// multierr.Error.
// If there are multiple problems, the error will be joined using
// [errors.Join].
func (t *Tracker) OverallError() error {
if t.nil() {
return nil
@@ -939,13 +992,13 @@ func (t *Tracker) OverallError() error {
return t.multiErrLocked()
}
// Strings() returns a string array containing the Text of all Warnings
// currently known to the Tracker. These strings can be presented to the
// user, although ideally you would use the Code property on each Warning
// to show a localized version of them instead.
// This function is here for legacy compatibility purposes and is deprecated.
// Strings() returns a string array containing the Text of all Warnings and
// ControlHealth messages currently known to the Tracker. These strings can be
// presented to the user, although ideally you would use the Code property on
// each Warning to show a localized version of them instead. This function is
// here for legacy compatibility purposes and is deprecated.
func (t *Tracker) Strings() []string {
if t.nil() {
if !buildfeatures.HasHealth || t.nil() {
return nil
}
t.mu.Lock()
@@ -954,6 +1007,9 @@ func (t *Tracker) Strings() []string {
}
func (t *Tracker) stringsLocked() []string {
if !buildfeatures.HasHealth {
return nil
}
result := []string{}
for w, ws := range t.warnableVal {
if !w.IsVisible(ws, t.now) {
@@ -969,6 +1025,24 @@ func (t *Tracker) stringsLocked() []string {
result = append(result, w.Text(ws.Args))
}
}
warnLen := len(result)
for _, c := range t.controlMessages {
var msg string
if c.Title != "" && c.Text != "" {
msg = c.Title + ": " + c.Text
} else if c.Title != "" {
msg = c.Title + "."
} else if c.Text != "" {
msg = c.Text
}
if c.PrimaryAction != nil {
msg = msg + " " + c.PrimaryAction.Label + ": " + c.PrimaryAction.URL
}
result = append(result, msg)
}
sort.Strings(result[warnLen:])
return result
}
@@ -988,7 +1062,7 @@ func (t *Tracker) errorsLocked() []error {
// This function is here for legacy compatibility purposes and is deprecated.
func (t *Tracker) multiErrLocked() error {
errs := t.errorsLocked()
return multierr.New(errs...)
return errors.Join(errs...)
}
var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
@@ -996,6 +1070,9 @@ var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
// updateBuiltinWarnablesLocked performs a number of checks on the state of the backend,
// and adds/removes Warnings from the Tracker as needed.
func (t *Tracker) updateBuiltinWarnablesLocked() {
if !buildfeatures.HasHealth {
return
}
t.updateWarmingUpWarnableLocked()
if w, show := t.showUpdateWarnable(); show {
@@ -1149,14 +1226,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(derpRegionErrorWarnable)
}
if len(t.controlHealth) > 0 {
for _, s := range t.controlHealth {
t.setUnhealthyLocked(controlHealthWarnable, Args{
ArgError: s,
})
}
} else {
t.setHealthyLocked(controlHealthWarnable)
// Check if control health messages have changed
if !maps.EqualFunc(t.lastNotifiedControlMessages, t.controlMessages, tailcfg.DisplayMessage.Equal) {
t.lastNotifiedControlMessages = t.controlMessages
t.notifyWatchersControlChangedLocked()
}
if err := envknob.ApplyDiskConfigError(); err != nil {
@@ -1238,11 +1311,17 @@ func (s *ReceiveFuncStats) Name() string {
}
func (s *ReceiveFuncStats) Enter() {
if !buildfeatures.HasHealth {
return
}
s.numCalls.Add(1)
s.inCall.Store(true)
}
func (s *ReceiveFuncStats) Exit() {
if !buildfeatures.HasHealth {
return
}
s.inCall.Store(false)
}
@@ -1251,7 +1330,7 @@ func (s *ReceiveFuncStats) Exit() {
//
// If t is nil, it returns nil.
func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
if t == nil {
if !buildfeatures.HasHealth || t == nil {
return nil
}
t.initOnce.Do(t.doOnceInit)
@@ -1259,6 +1338,9 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
}
func (t *Tracker) doOnceInit() {
if !buildfeatures.HasHealth {
return
}
for i := range t.MagicSockReceiveFuncs {
f := &t.MagicSockReceiveFuncs[i]
f.name = (ReceiveFunc(i)).String()
@@ -1307,10 +1389,3 @@ func (t *Tracker) LastNoiseDialWasRecent() bool {
t.lastNoiseDial = now
return dur < 2*time.Minute
}
const MetricLabelWarning = "warning"
type metricHealthMessageLabel struct {
// TODO: break down by warnable.severity as well?
Type string
}

View File

@@ -8,8 +8,10 @@
package healthmsg
const (
WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false"
TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller
LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible"
WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false"
TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller
LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible"
DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310"
InMemoryTailnetLockState = "Tailnet Lock state is only being stored in-memory. Set --statedir to store state on disk, which is more secure. See https://tailscale.com/kb/1226/tailnet-lock#tailnet-lock-state"
)

95
vendor/tailscale.com/health/state.go generated vendored
View File

@@ -4,11 +4,20 @@
package health
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/tailcfg"
)
// State contains the health status of the backend, and is
// provided to the client UI via LocalAPI through ipn.Notify.
//
// It is also exposed via c2n for debugging purposes, so try
// not to change its structure too gratuitously.
type State struct {
// Each key-value pair in Warnings represents a Warnable that is currently
// unhealthy. If a Warnable is healthy, it will not be present in this map.
@@ -21,16 +30,56 @@ type State struct {
}
// UnhealthyState contains information to be shown to the user to inform them
// that a Warnable is currently unhealthy.
// that a [Warnable] is currently unhealthy or [tailcfg.DisplayMessage] is being
// sent from the control-plane.
type UnhealthyState struct {
WarnableCode WarnableCode
Severity Severity
Title string
Text string
BrokenSince *time.Time `json:",omitempty"`
Args Args `json:",omitempty"`
DependsOn []WarnableCode `json:",omitempty"`
ImpactsConnectivity bool `json:",omitempty"`
BrokenSince *time.Time `json:",omitempty"`
Args Args `json:",omitempty"`
DependsOn []WarnableCode `json:",omitempty"`
ImpactsConnectivity bool `json:",omitempty"`
PrimaryAction *UnhealthyStateAction `json:",omitempty"`
// ETag identifies a specific version of an UnhealthyState. If the contents
// of the other fields of two UnhealthyStates are the same, the ETags will
// be the same. If the contents differ, the ETags will also differ. The
// implementation is not defined and the value is opaque: it might be a
// hash, it might be a simple counter. Implementations should not rely on
// any specific implementation detail or format of the ETag string other
// than string (in)equality.
ETag string `json:",omitzero"`
}
// hash computes a deep hash of UnhealthyState which will be stable across
// different runs of the same binary.
func (u UnhealthyState) hash() []byte {
hasher := sha256.New()
enc := json.NewEncoder(hasher)
// hash.Hash.Write never returns an error, so this will only fail if u is
// not marshalable, in which case we have much bigger problems.
_ = enc.Encode(u)
return hasher.Sum(nil)
}
// withETag returns a copy of UnhealthyState with an ETag set. The ETag will be
// the same for all UnhealthyState instances that are equal. If calculating the
// ETag errors, it returns a copy of the UnhealthyState with an empty ETag.
func (u UnhealthyState) withETag() UnhealthyState {
u.ETag = ""
u.ETag = hex.EncodeToString(u.hash())
return u
}
// UnhealthyStateAction represents an action (URL and link) to be presented to
// the user associated with an [UnhealthyState]. Analogous to
// [tailcfg.DisplayMessageAction].
type UnhealthyStateAction struct {
URL string
Label string
}
// unhealthyState returns a unhealthyState of the Warnable given its current warningState.
@@ -72,7 +121,7 @@ func (w *Warnable) unhealthyState(ws *warningState) *UnhealthyState {
// The returned State is a snapshot of shared memory, and the caller should not
// mutate the returned value.
func (t *Tracker) CurrentState() *State {
if t.nil() {
if !buildfeatures.HasHealth || t.nil() {
return &State{}
}
@@ -95,7 +144,28 @@ func (t *Tracker) CurrentState() *State {
// that are unhealthy.
continue
}
wm[w.Code] = *w.unhealthyState(ws)
state := w.unhealthyState(ws)
wm[w.Code] = state.withETag()
}
for id, msg := range t.lastNotifiedControlMessages {
state := UnhealthyState{
WarnableCode: WarnableCode("control-health." + id),
Severity: severityFromTailcfg(msg.Severity),
Title: msg.Title,
Text: msg.Text,
ImpactsConnectivity: msg.ImpactsConnectivity,
// TODO(tailscale/corp#27759): DependsOn?
}
if msg.PrimaryAction != nil {
state.PrimaryAction = &UnhealthyStateAction{
URL: msg.PrimaryAction.URL,
Label: msg.PrimaryAction.Label,
}
}
wm[state.WarnableCode] = state.withETag()
}
return &State{
@@ -103,6 +173,17 @@ func (t *Tracker) CurrentState() *State {
}
}
func severityFromTailcfg(s tailcfg.DisplayMessageSeverity) Severity {
switch s {
case tailcfg.SeverityHigh:
return SeverityHigh
case tailcfg.SeverityLow:
return SeverityLow
default:
return SeverityMedium
}
}
// isEffectivelyHealthyLocked reports whether w is effectively healthy.
// That means it's either actually healthy or it has a dependency that
// that's unhealthy, so we should treat w as healthy to not spam users

52
vendor/tailscale.com/health/usermetrics.go generated vendored Normal file
View File

@@ -0,0 +1,52 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ts_omit_health && !ts_omit_usermetrics
package health
import (
"expvar"
"tailscale.com/feature/buildfeatures"
"tailscale.com/util/usermetric"
)
const MetricLabelWarning = "warning"
type metricHealthMessageLabel struct {
// TODO: break down by warnable.severity as well?
Type string
}
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
// a usermetric.Registry and registers the metrics there.
func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
if !buildfeatures.HasHealth {
return
}
if reg == nil || t.metricHealthMessage != nil {
return
}
m := usermetric.NewMultiLabelMapWithRegistry[metricHealthMessageLabel](
reg,
"tailscaled_health_messages",
"gauge",
"Number of health messages broken down by type.",
)
m.Set(metricHealthMessageLabel{
Type: MetricLabelWarning,
}, expvar.Func(func() any {
if t.nil() {
return 0
}
t.mu.Lock()
defer t.mu.Unlock()
t.updateBuiltinWarnablesLocked()
return int64(len(t.stringsLocked()))
}))
t.metricHealthMessage = m
}

8
vendor/tailscale.com/health/usermetrics_omit.go generated vendored Normal file
View File

@@ -0,0 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ts_omit_health || ts_omit_usermetrics
package health
func (t *Tracker) SetMetricsRegistry(any) {}

View File

@@ -8,244 +8,279 @@ import (
"runtime"
"time"
"tailscale.com/feature/buildfeatures"
"tailscale.com/tsconst"
"tailscale.com/version"
)
func condRegister(f func() *Warnable) *Warnable {
if !buildfeatures.HasHealth {
return nil
}
return f()
}
/**
This file contains definitions for the Warnables maintained within this `health` package.
*/
// updateAvailableWarnable is a Warnable that warns the user that an update is available.
var updateAvailableWarnable = Register(&Warnable{
Code: "update-available",
Title: "Update available",
Severity: SeverityLow,
Text: func(args Args) string {
if version.IsMacAppStore() || version.IsAppleTV() || version.IsMacSys() || version.IsWindowsGUI() || runtime.GOOS == "android" {
return fmt.Sprintf("An update from version %s to %s is available.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} else {
return fmt.Sprintf("An update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
}
},
var updateAvailableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableUpdateAvailable,
Title: "Update available",
Severity: SeverityLow,
Text: func(args Args) string {
if version.IsMacAppStore() || version.IsAppleTV() || version.IsMacSys() || version.IsWindowsGUI() || runtime.GOOS == "android" {
return fmt.Sprintf("An update from version %s to %s is available.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} else {
return fmt.Sprintf("An update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
}
},
}
})
// securityUpdateAvailableWarnable is a Warnable that warns the user that an important security update is available.
var securityUpdateAvailableWarnable = Register(&Warnable{
Code: "security-update-available",
Title: "Security update available",
Severity: SeverityMedium,
Text: func(args Args) string {
if version.IsMacAppStore() || version.IsAppleTV() || version.IsMacSys() || version.IsWindowsGUI() || runtime.GOOS == "android" {
return fmt.Sprintf("A security update from version %s to %s is available.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} else {
return fmt.Sprintf("A security update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
}
},
var securityUpdateAvailableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableSecurityUpdateAvailable,
Title: "Security update available",
Severity: SeverityMedium,
Text: func(args Args) string {
if version.IsMacAppStore() || version.IsAppleTV() || version.IsMacSys() || version.IsWindowsGUI() || runtime.GOOS == "android" {
return fmt.Sprintf("A security update from version %s to %s is available.", args[ArgCurrentVersion], args[ArgAvailableVersion])
} else {
return fmt.Sprintf("A security update from version %s to %s is available. Run `tailscale update` or `tailscale set --auto-update` to update now.", args[ArgCurrentVersion], args[ArgAvailableVersion])
}
},
}
})
// unstableWarnable is a Warnable that warns the user that they are using an unstable version of Tailscale
// so they won't be surprised by all the issues that may arise.
var unstableWarnable = Register(&Warnable{
Code: "is-using-unstable-version",
Title: "Using an unstable version",
Severity: SeverityLow,
Text: StaticMessage("This is an unstable version of Tailscale meant for testing and development purposes. Please report any issues to Tailscale."),
var unstableWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableIsUsingUnstableVersion,
Title: "Using an unstable version",
Severity: SeverityLow,
Text: StaticMessage("This is an unstable version of Tailscale meant for testing and development purposes. Please report any issues to Tailscale."),
}
})
// NetworkStatusWarnable is a Warnable that warns the user that the network is down.
var NetworkStatusWarnable = Register(&Warnable{
Code: "network-status",
Title: "Network down",
Severity: SeverityMedium,
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
ImpactsConnectivity: true,
TimeToVisible: 5 * time.Second,
var NetworkStatusWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableNetworkStatus,
Title: "Network down",
Severity: SeverityMedium,
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
ImpactsConnectivity: true,
TimeToVisible: 5 * time.Second,
}
})
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
var IPNStateWarnable = Register(&Warnable{
Code: "wantrunning-false",
Title: "Tailscale off",
Severity: SeverityLow,
Text: StaticMessage("Tailscale is stopped."),
var IPNStateWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableWantRunningFalse,
Title: "Tailscale off",
Severity: SeverityLow,
Text: StaticMessage("Tailscale is stopped."),
}
})
// localLogWarnable is a Warnable that warns the user that the local log is misconfigured.
var localLogWarnable = Register(&Warnable{
Code: "local-log-config-error",
Title: "Local log misconfiguration",
Severity: SeverityLow,
Text: func(args Args) string {
return fmt.Sprintf("The local log is misconfigured: %v", args[ArgError])
},
var localLogWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableLocalLogConfigError,
Title: "Local log misconfiguration",
Severity: SeverityLow,
Text: func(args Args) string {
return fmt.Sprintf("The local log is misconfigured: %v", args[ArgError])
},
}
})
// LoginStateWarnable is a Warnable that warns the user that they are logged out,
// and provides the last login error if available.
var LoginStateWarnable = Register(&Warnable{
Code: "login-state",
Title: "Logged out",
Severity: SeverityMedium,
Text: func(args Args) string {
if args[ArgError] != "" {
return fmt.Sprintf("You are logged out. The last login error was: %v", args[ArgError])
} else {
return "You are logged out."
}
},
DependsOn: []*Warnable{IPNStateWarnable},
var LoginStateWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableLoginState,
Title: "Logged out",
Severity: SeverityMedium,
Text: func(args Args) string {
if args[ArgError] != "" {
return fmt.Sprintf("You are logged out. The last login error was: %v", args[ArgError])
} else {
return "You are logged out."
}
},
DependsOn: []*Warnable{IPNStateWarnable},
}
})
// notInMapPollWarnable is a Warnable that warns the user that we are using a stale network map.
var notInMapPollWarnable = Register(&Warnable{
Code: "not-in-map-poll",
Title: "Out of sync",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
// 8 minutes reflects a maximum maintenance window for the coordination server.
TimeToVisible: 8 * time.Minute,
var notInMapPollWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableNotInMapPoll,
Title: "Out of sync",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
// 8 minutes reflects a maximum maintenance window for the coordination server.
TimeToVisible: 8 * time.Minute,
}
})
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
var noDERPHomeWarnable = Register(&Warnable{
Code: "no-derp-home",
Title: "No home relay server",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second,
var noDERPHomeWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableNoDERPHome,
Title: "No home relay server",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second,
}
})
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
var noDERPConnectionWarnable = Register(&Warnable{
Code: "no-derp-connection",
Title: "Relay server unavailable",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
var noDERPConnectionWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableNoDERPConnection,
Title: "Relay server unavailable",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
// Technically noDERPConnectionWarnable could be used to warn about
// failure to connect to a specific DERP server (e.g. your home is derp1
// but you're trying to connect to a peer's derp4 and are unable) but as
// of 2024-09-25 we only use this for connecting to your home DERP, so
// we depend on noDERPHomeWarnable which is the ability to figure out
// what your DERP home even is.
noDERPHomeWarnable,
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale could not connect to the '%s' relay server. Your Internet connection might be down, or the server might be temporarily unavailable.", n)
} else {
return fmt.Sprintf("Tailscale could not connect to the relay server with ID '%s'. Your Internet connection might be down, or the server might be temporarily unavailable.", args[ArgDERPRegionID])
}
},
ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second,
// Technically noDERPConnectionWarnable could be used to warn about
// failure to connect to a specific DERP server (e.g. your home is derp1
// but you're trying to connect to a peer's derp4 and are unable) but as
// of 2024-09-25 we only use this for connecting to your home DERP, so
// we depend on noDERPHomeWarnable which is the ability to figure out
// what your DERP home even is.
noDERPHomeWarnable,
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale could not connect to the '%s' relay server. Your Internet connection might be down, or the server might be temporarily unavailable.", n)
} else {
return fmt.Sprintf("Tailscale could not connect to the relay server with ID '%s'. Your Internet connection might be down, or the server might be temporarily unavailable.", args[ArgDERPRegionID])
}
},
ImpactsConnectivity: true,
TimeToVisible: 10 * time.Second,
}
})
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't
// heard from the home DERP region for a while.
var derpTimeoutWarnable = Register(&Warnable{
Code: "derp-timed-out",
Title: "Relay server timed out",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
noDERPConnectionWarnable, // don't warn about it being stalled if we're not connected
noDERPHomeWarnable, // same reason as noDERPConnectionWarnable's dependency
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale hasn't heard from the '%s' relay server in %v. The server might be temporarily unavailable, or your Internet connection might be down.", n, args[ArgDuration])
} else {
return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region ID '%v') in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgDERPRegionID], args[ArgDuration])
}
},
var derpTimeoutWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableDERPTimedOut,
Title: "Relay server timed out",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
noDERPConnectionWarnable, // don't warn about it being stalled if we're not connected
noDERPHomeWarnable, // same reason as noDERPConnectionWarnable's dependency
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale hasn't heard from the '%s' relay server in %v. The server might be temporarily unavailable, or your Internet connection might be down.", n, args[ArgDuration])
} else {
return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region ID '%v') in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgDERPRegionID], args[ArgDuration])
}
},
}
})
// derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue.
var derpRegionErrorWarnable = Register(&Warnable{
Code: "derp-region-error",
Title: "Relay server error",
Severity: SeverityLow,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: func(args Args) string {
return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgDERPRegionID], args[ArgError])
},
var derpRegionErrorWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableDERPRegionError,
Title: "Relay server error",
Severity: SeverityLow,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: func(args Args) string {
return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgDERPRegionID], args[ArgError])
},
}
})
// noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections.
var noUDP4BindWarnable = Register(&Warnable{
Code: "no-udp4-bind",
Title: "NAT traversal setup failure",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."),
ImpactsConnectivity: true,
var noUDP4BindWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableNoUDP4Bind,
Title: "NAT traversal setup failure",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."),
ImpactsConnectivity: true,
}
})
// mapResponseTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't received a network map from the coordination server in a while.
var mapResponseTimeoutWarnable = Register(&Warnable{
Code: "mapresponse-timeout",
Title: "Network map response timeout",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: func(args Args) string {
return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration])
},
var mapResponseTimeoutWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableMapResponseTimeout,
Title: "Network map response timeout",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: func(args Args) string {
return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration])
},
}
})
// tlsConnectionFailedWarnable is a Warnable that warns the user that Tailscale could not establish an encrypted connection with a server.
var tlsConnectionFailedWarnable = Register(&Warnable{
Code: "tls-connection-failed",
Title: "Encrypted connection failed",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: func(args Args) string {
return fmt.Sprintf("Tailscale could not establish an encrypted connection with '%q': %v", args[ArgServerName], args[ArgError])
},
var tlsConnectionFailedWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableTLSConnectionFailed,
Title: "Encrypted connection failed",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Text: func(args Args) string {
return fmt.Sprintf("Tailscale could not establish an encrypted connection with '%q': %v", args[ArgServerName], args[ArgError])
},
}
})
// magicsockReceiveFuncWarnable is a Warnable that warns the user that one of the Magicsock functions is not running.
var magicsockReceiveFuncWarnable = Register(&Warnable{
Code: "magicsock-receive-func-error",
Title: "MagicSock function not running",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("The MagicSock function %s is not running. You might experience connectivity issues.", args[ArgMagicsockFunctionName])
},
var magicsockReceiveFuncWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableMagicsockReceiveFuncError,
Title: "MagicSock function not running",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("The MagicSock function %s is not running. You might experience connectivity issues.", args[ArgMagicsockFunctionName])
},
}
})
// testWarnable is a Warnable that is used within this package for testing purposes only.
var testWarnable = Register(&Warnable{
Code: "test-warnable",
Title: "Test warnable",
Severity: SeverityLow,
Text: func(args Args) string {
return args[ArgError]
},
var testWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableTestWarnable,
Title: "Test warnable",
Severity: SeverityLow,
Text: func(args Args) string {
return args[ArgError]
},
}
})
// applyDiskConfigWarnable is a Warnable that warns the user that there was an error applying the envknob config stored on disk.
var applyDiskConfigWarnable = Register(&Warnable{
Code: "apply-disk-config",
Title: "Could not apply configuration",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("An error occurred applying the Tailscale envknob configuration stored on disk: %v", args[ArgError])
},
})
// controlHealthWarnable is a Warnable that warns the user that the coordination server is reporting an health issue.
var controlHealthWarnable = Register(&Warnable{
Code: "control-health",
Title: "Coordination server reports an issue",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError])
},
var applyDiskConfigWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableApplyDiskConfig,
Title: "Could not apply configuration",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("An error occurred applying the Tailscale envknob configuration stored on disk: %v", args[ArgError])
},
}
})
// warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user
@@ -255,9 +290,11 @@ const warmingUpWarnableDuration = 5 * time.Second
// warmingUpWarnable is a Warnable that is reported by the backend when it is starting up, for a maximum time of
// warmingUpWarnableDuration. The GUIs use the presence of this Warnable to prevent showing any other warnings until
// the backend is fully started.
var warmingUpWarnable = Register(&Warnable{
Code: "warming-up",
Title: "Tailscale is starting",
Severity: SeverityLow,
Text: StaticMessage("Tailscale is starting. Please wait."),
var warmingUpWarnable = condRegister(func() *Warnable {
return &Warnable{
Code: tsconst.HealthWarnableWarmingUp,
Title: "Tailscale is starting",
Severity: SeverityLow,
Text: StaticMessage("Tailscale is starting. Please wait."),
}
})