From a49e9ac28547b91163a67549c60a74eb12de5b28 Mon Sep 17 00:00:00 2001 From: jiuker Date: Wed, 10 Jul 2024 15:43:58 +0800 Subject: [PATCH 01/12] refactor refactor --- main.go | 235 +++++++++++++++++++++++++++++++++++------------------ metrics.go | 4 + 2 files changed, 159 insertions(+), 80 deletions(-) diff --git a/main.go b/main.go index a0ad09f..7e35b5e 100644 --- a/main.go +++ b/main.go @@ -31,11 +31,13 @@ import ( "net/http/pprof" "net/url" "os" + "os/signal" "sort" "strconv" "strings" "sync" "sync/atomic" + "syscall" "time" "github.com/dustin/go-humanize" @@ -63,17 +65,18 @@ const ( ) var ( - globalQuietEnabled bool - globalDebugEnabled bool - globalLoggingEnabled bool - globalTrace string - globalJSONEnabled bool - globalConsoleDisplay bool - globalErrorsOnly bool - globalStatusCodes []int - globalConnStats []*ConnStats - log2 *logrus.Logger - globalHostBalance string + globalQuietEnabled bool + globalDebugEnabled bool + globalLoggingEnabled bool + globalTrace string + globalJSONEnabled bool + globalConsoleDisplay bool + globalErrorsOnly bool + globalStatusCodes []int + globalConnStatsRWMutex sync.RWMutex + globalConnStats []*ConnStats + log2 *logrus.Logger + globalHostBalance string ) const ( @@ -305,14 +308,20 @@ func getHealthCheckURL(endpoint, healthCheckPath string, healthCheckPort int) (s } // healthCheck - background routine which checks if a backend is up or down. -func (b *Backend) healthCheck() { +func (b *Backend) healthCheck(ctxt context.Context) { + ticker := time.NewTicker(b.healthCheckDuration) + defer ticker.Stop() for { - err := b.doHealthCheck() - if err != nil { - console.Fatalln(err) - } - time.Sleep(b.healthCheckDuration) + select { + case <-ctxt.Done(): + return + case <-ticker.C: + err := b.doHealthCheck() + if err != nil { + console.Errorln(err) + } + } } } @@ -393,6 +402,9 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { b.Stats.MinLatency = time.Duration(int64(math.Min(float64(b.Stats.MinLatency), float64(t.CallStats.Latency)))) b.Stats.Rx += int64(t.CallStats.Rx) b.Stats.Tx += int64(t.CallStats.Tx) + // automatically update the global stats + // Read/Write Lock is not required here + globalConnStatsRWMutex.RLock() for _, c := range globalConnStats { if c == nil { continue @@ -407,38 +419,105 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { c.setTotalCalls(b.Stats.TotCalls) c.setTotalCallFailures(b.Stats.TotCallFailures) } + globalConnStatsRWMutex.RUnlock() } type multisite struct { - sites []*site + sites []*site + healthCanceler context.CancelFunc + rwLocker sync.RWMutex +} + +func (m *multisite) renewSite(ctx *cli.Context, healthCheckPath string, healthReadCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) { + ctxt, cancel := context.WithCancel(context.Background()) + var sites []*site + for i, siteStrs := range ctx.Args() { + if i == len(ctx.Args())-1 { + healthCheckPath = healthReadCheckPath + } + site := configureSite(ctxt, ctx, i+1, strings.Split(siteStrs, ","), healthCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + sites = append(sites, site) + } + m.rwLocker.Lock() + defer m.rwLocker.Unlock() + m.sites = sites + // cancel the previous health checker + if m.healthCanceler != nil { + m.healthCanceler() + } + m.healthCanceler = cancel +} +func (m *multisite) displayUI(show bool) { + if !show { + return + } + go func() { + // Clear screen before we start the table UI + clearScreen() + + ticker := time.NewTicker(500 * time.Millisecond) + for range ticker.C { + m.populate() + } + }() } -func (m *multisite) populate(cellText [][]string) { +func (m *multisite) populate() { + m.rwLocker.RLock() + defer m.rwLocker.RUnlock() + + dspOrder := []col{colGreen} // Header + for i := 0; i < len(m.sites); i++ { + for range m.sites[i].backends { + dspOrder = append(dspOrder, colGrey) + } + } + var printColors []*color.Color + for _, c := range dspOrder { + printColors = append(printColors, getPrintCol(c)) + } + + tbl := console.NewTable(printColors, []bool{ + false, false, false, false, false, false, + false, false, false, false, false, + }, 0) + + cellText := make([][]string, len(dspOrder)) + for i := range dspOrder { + cellText[i] = make([]string, len(headers)) + } + cellText[0] = headers for i, site := range m.sites { for j, b := range site.backends { + b.Stats.Lock() minLatency := "0s" maxLatency := "0s" if b.Stats.MaxLatency > 0 { minLatency = fmt.Sprintf("%2s", b.Stats.MinLatency.Round(time.Microsecond)) maxLatency = fmt.Sprintf("%2s", b.Stats.MaxLatency.Round(time.Microsecond)) } - cellText[i*len(site.backends)+j][0] = humanize.Ordinal(b.siteNumber) - cellText[i*len(site.backends)+j][1] = b.endpoint - cellText[i*len(site.backends)+j][2] = b.getServerStatus() - cellText[i*len(site.backends)+j][3] = strconv.FormatInt(b.Stats.TotCalls, 10) - cellText[i*len(site.backends)+j][4] = strconv.FormatInt(b.Stats.TotCallFailures, 10) - cellText[i*len(site.backends)+j][5] = humanize.IBytes(uint64(b.Stats.Rx)) - cellText[i*len(site.backends)+j][6] = humanize.IBytes(uint64(b.Stats.Tx)) - cellText[i*len(site.backends)+j][7] = b.Stats.CumDowntime.Round(time.Microsecond).String() - cellText[i*len(site.backends)+j][8] = b.Stats.LastDowntime.Round(time.Microsecond).String() - cellText[i*len(site.backends)+j][9] = minLatency - cellText[i*len(site.backends)+j][10] = maxLatency + cellText[i*len(site.backends)+j+1][0] = humanize.Ordinal(b.siteNumber) + cellText[i*len(site.backends)+j+1][1] = b.endpoint + cellText[i*len(site.backends)+j+1][2] = b.getServerStatus() + cellText[i*len(site.backends)+j+1][3] = strconv.FormatInt(b.Stats.TotCalls, 10) + cellText[i*len(site.backends)+j+1][4] = strconv.FormatInt(b.Stats.TotCallFailures, 10) + cellText[i*len(site.backends)+j+1][5] = humanize.IBytes(uint64(b.Stats.Rx)) + cellText[i*len(site.backends)+j+1][6] = humanize.IBytes(uint64(b.Stats.Tx)) + cellText[i*len(site.backends)+j+1][7] = b.Stats.CumDowntime.Round(time.Microsecond).String() + cellText[i*len(site.backends)+j+1][8] = b.Stats.LastDowntime.Round(time.Microsecond).String() + cellText[i*len(site.backends)+j+1][9] = minLatency + cellText[i*len(site.backends)+j+1][10] = maxLatency + b.Stats.Unlock() } } + console.RewindLines(len(cellText) + 2) + tbl.DisplayTable(cellText) } func (m *multisite) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Header().Set("Server", "SideKick") // indicate sidekick is serving + m.rwLocker.RLock() + defer m.rwLocker.RUnlock() for _, s := range m.sites { if s.Online() { if r.URL.Path == healthPath { @@ -766,7 +845,7 @@ func IsLoopback(addr string) bool { return net.ParseIP(host).IsLoopback() } -func configureSite(ctx *cli.Context, siteNum int, siteStrs []string, healthCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) *site { +func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs []string, healthCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) *site { var endpoints []string if ellipses.HasEllipses(siteStrs...) { @@ -790,6 +869,27 @@ func configureSite(ctx *cli.Context, siteNum int, siteStrs []string, healthCheck var backends []*Backend var prevScheme string var transport http.RoundTripper + globalConnStatsRWMutex.Lock() + defer globalConnStatsRWMutex.Unlock() + // reset connstats + globalConnStats = []*ConnStats{} + if len(endpoints) == 1 { + // guess it is LB config address + target, err := url.Parse(endpoints[0]) + if err != nil { + console.Fatalln(fmt.Errorf("Unable to parse input arg %s: %s", endpoints[0], err)) + } + hostName := target.Hostname() + ips, err := net.LookupHost(hostName) + if err != nil { + console.Fatalln(fmt.Errorf("Unable to lookup host %s", hostName)) + } + // set the new endpoints + endpoints = []string{} + for _, ip := range ips { + endpoints = append(endpoints, strings.Replace(target.String(), hostName, ip, 1)) + } + } for _, endpoint := range endpoints { endpoint = strings.TrimSuffix(endpoint, slashSeparator) target, err := url.Parse(endpoint) @@ -843,7 +943,7 @@ func configureSite(ctx *cli.Context, siteNum int, siteStrs []string, healthCheck backend := &Backend{siteNum, endpoint, proxy, &http.Client{ Transport: proxy.Transport, }, 0, healthCheckURL, healthCheckDuration, healthCheckTimeout, &stats} - go backend.healthCheck() + go backend.healthCheck(ctxt) proxy.ErrorHandler = backend.ErrorHandler backends = append(backends, backend) globalConnStats = append(globalConnStats, newConnStats(endpoint)) @@ -922,16 +1022,6 @@ func sidekickMain(ctx *cli.Context) { healthReadCheckPath = slashSeparator + healthReadCheckPath } - var sites []*site - for i, siteStrs := range ctx.Args() { - if i == len(ctx.Args())-1 { - healthCheckPath = healthReadCheckPath - } - - site := configureSite(ctx, i+1, strings.Split(siteStrs, ","), healthCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) - sites = append(sites, site) - } - if globalConsoleDisplay { console.SetColor("LogMsgType", color.New(color.FgHiMagenta)) console.SetColor("TraceMsgType", color.New(color.FgYellow)) @@ -960,42 +1050,9 @@ func sidekickMain(ctx *cli.Context) { console.Fatalln(err) } - m := &multisite{sites} - if !globalConsoleDisplay { - dspOrder := []col{colGreen} // Header - for i := 0; i < len(sites); i++ { - for range sites[i].backends { - dspOrder = append(dspOrder, colGrey) - } - } - var printColors []*color.Color - for _, c := range dspOrder { - printColors = append(printColors, getPrintCol(c)) - } - - tbl := console.NewTable(printColors, []bool{ - false, false, false, false, false, false, - false, false, false, false, false, - }, 0) - - cellText := make([][]string, len(dspOrder)) - for i := range dspOrder { - cellText[i] = make([]string, len(headers)) - } - cellText[0] = headers - - go func() { - // Clear screen before we start the table UI - clearScreen() - - ticker := time.NewTicker(500 * time.Millisecond) - for range ticker.C { - m.populate(cellText[1:]) - console.RewindLines(len(cellText) + 2) - tbl.DisplayTable(cellText) - } - }() - } + m := &multisite{} + m.renewSite(ctx, healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + m.displayUI(!globalConsoleDisplay) router.PathPrefix(slashSeparator).Handler(m) server := &http.Server{ @@ -1017,8 +1074,26 @@ func sidekickMain(ctx *cli.Context) { } server.TLSConfig = tlsConfig } - if err := server.ListenAndServe(); err != nil { - console.Fatalln(err) + go func() { + if err := server.ListenAndServe(); err != nil { + console.Fatalln(err) + } + }() + osSignalChannel := make(chan os.Signal, 1) + signal.Notify( + osSignalChannel, + syscall.SIGTERM, + syscall.SIGINT, + syscall.SIGHUP, + ) + for signal := range osSignalChannel { + switch signal { + case syscall.SIGHUP: + m.renewSite(ctx, healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + default: + console.Infof("caught signal '%s'\n", signal) + os.Exit(1) + } } } diff --git a/metrics.go b/metrics.go index 15a3baf..3c6a376 100644 --- a/metrics.go +++ b/metrics.go @@ -50,6 +50,10 @@ func (c *sidekickCollector) Describe(ch chan<- *prometheus.Desc) { // Collect is called by the Prometheus registry when collecting metrics. func (c *sidekickCollector) Collect(ch chan<- prometheus.Metric) { + // automatically read the global stats + // Read/Write Lock is not required here + globalConnStatsRWMutex.RLock() + defer globalConnStatsRWMutex.RUnlock() for _, c := range globalConnStats { if c == nil { continue From b0739f2524895f25662b4d20a4e8f6f6de856555 Mon Sep 17 00:00:00 2001 From: jiuker Date: Wed, 10 Jul 2024 15:48:14 +0800 Subject: [PATCH 02/12] lint lint --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 7e35b5e..d70695b 100644 --- a/main.go +++ b/main.go @@ -312,7 +312,6 @@ func (b *Backend) healthCheck(ctxt context.Context) { ticker := time.NewTicker(b.healthCheckDuration) defer ticker.Stop() for { - select { case <-ctxt.Done(): return @@ -447,6 +446,7 @@ func (m *multisite) renewSite(ctx *cli.Context, healthCheckPath string, healthRe } m.healthCanceler = cancel } + func (m *multisite) displayUI(show bool) { if !show { return From 1a634ecf3fbbacb58759fc52c420f058231acfbd Mon Sep 17 00:00:00 2001 From: jiuker Date: Wed, 10 Jul 2024 22:29:15 +0800 Subject: [PATCH 03/12] apply suggestion apply suggestion --- main.go | 5 +++-- metrics.go | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index d70695b..1216ef7 100644 --- a/main.go +++ b/main.go @@ -404,7 +404,9 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { // automatically update the global stats // Read/Write Lock is not required here globalConnStatsRWMutex.RLock() - for _, c := range globalConnStats { + connStats := globalConnStats + globalConnStatsRWMutex.RUnlock() + for _, c := range connStats { if c == nil { continue } @@ -418,7 +420,6 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { c.setTotalCalls(b.Stats.TotCalls) c.setTotalCallFailures(b.Stats.TotCallFailures) } - globalConnStatsRWMutex.RUnlock() } type multisite struct { diff --git a/metrics.go b/metrics.go index 3c6a376..278eb1c 100644 --- a/metrics.go +++ b/metrics.go @@ -53,8 +53,9 @@ func (c *sidekickCollector) Collect(ch chan<- prometheus.Metric) { // automatically read the global stats // Read/Write Lock is not required here globalConnStatsRWMutex.RLock() - defer globalConnStatsRWMutex.RUnlock() - for _, c := range globalConnStats { + connStats := globalConnStats + globalConnStatsRWMutex.RUnlock() + for _, c := range connStats { if c == nil { continue } From ccf6d6d83a99847d83cada08827fb311249d582e Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 09:01:44 +0800 Subject: [PATCH 04/12] add flags add flags --- main.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 1216ef7..8d3590d 100644 --- a/main.go +++ b/main.go @@ -739,7 +739,7 @@ func clientTransport(ctx *cli.Context, enableTLS bool) http.RoundTripper { tr.TLSClientConfig = &tls.Config{ RootCAs: getCertPool(ctx.GlobalString("cacert")), Certificates: getCertKeyPair(ctx.GlobalString("client-cert"), ctx.GlobalString("client-key")), - InsecureSkipVerify: ctx.GlobalBool("insecure"), + InsecureSkipVerify: ctx.GlobalBool("insecure") || ctx.GlobalBool("rr-dns-mode"), // Can't use SSLv3 because of POODLE and BEAST // Can't use TLSv1.0 because of POODLE and BEAST using CBC cipher // Can't use TLSv1.1 because of RC4 cipher usage @@ -874,7 +874,7 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs defer globalConnStatsRWMutex.Unlock() // reset connstats globalConnStats = []*ConnStats{} - if len(endpoints) == 1 { + if len(endpoints) == 1 && ctx.GlobalBool("rr-dns-mode") { // guess it is LB config address target, err := url.Parse(endpoints[0]) if err != nil { @@ -1141,6 +1141,10 @@ func main() { Name: "insecure, i", Usage: "disable TLS certificate verification", }, + cli.BoolFlag{ + Name: "rr-dns-mode", + Usage: "enable round-robin DNS mode", + }, cli.BoolFlag{ Name: "log, l", Usage: "enable logging", From 38b291d78399ecabd1a9530c63355672cba782d8 Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 16:54:19 +0800 Subject: [PATCH 05/12] apply suggestion apply suggestion --- main.go | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/main.go b/main.go index 8d3590d..d3529fa 100644 --- a/main.go +++ b/main.go @@ -423,9 +423,8 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { } type multisite struct { - sites []*site + sites atomic.Pointer[[]*site] healthCanceler context.CancelFunc - rwLocker sync.RWMutex } func (m *multisite) renewSite(ctx *cli.Context, healthCheckPath string, healthReadCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) { @@ -438,9 +437,7 @@ func (m *multisite) renewSite(ctx *cli.Context, healthCheckPath string, healthRe site := configureSite(ctxt, ctx, i+1, strings.Split(siteStrs, ","), healthCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) sites = append(sites, site) } - m.rwLocker.Lock() - defer m.rwLocker.Unlock() - m.sites = sites + m.sites.Store(&sites) // cancel the previous health checker if m.healthCanceler != nil { m.healthCanceler() @@ -464,12 +461,11 @@ func (m *multisite) displayUI(show bool) { } func (m *multisite) populate() { - m.rwLocker.RLock() - defer m.rwLocker.RUnlock() + sites := *m.sites.Load() dspOrder := []col{colGreen} // Header - for i := 0; i < len(m.sites); i++ { - for range m.sites[i].backends { + for i := 0; i < len(sites); i++ { + for range sites[i].backends { dspOrder = append(dspOrder, colGrey) } } @@ -488,7 +484,7 @@ func (m *multisite) populate() { cellText[i] = make([]string, len(headers)) } cellText[0] = headers - for i, site := range m.sites { + for i, site := range sites { for j, b := range site.backends { b.Stats.Lock() minLatency := "0s" @@ -497,17 +493,18 @@ func (m *multisite) populate() { minLatency = fmt.Sprintf("%2s", b.Stats.MinLatency.Round(time.Microsecond)) maxLatency = fmt.Sprintf("%2s", b.Stats.MaxLatency.Round(time.Microsecond)) } - cellText[i*len(site.backends)+j+1][0] = humanize.Ordinal(b.siteNumber) - cellText[i*len(site.backends)+j+1][1] = b.endpoint - cellText[i*len(site.backends)+j+1][2] = b.getServerStatus() - cellText[i*len(site.backends)+j+1][3] = strconv.FormatInt(b.Stats.TotCalls, 10) - cellText[i*len(site.backends)+j+1][4] = strconv.FormatInt(b.Stats.TotCallFailures, 10) - cellText[i*len(site.backends)+j+1][5] = humanize.IBytes(uint64(b.Stats.Rx)) - cellText[i*len(site.backends)+j+1][6] = humanize.IBytes(uint64(b.Stats.Tx)) - cellText[i*len(site.backends)+j+1][7] = b.Stats.CumDowntime.Round(time.Microsecond).String() - cellText[i*len(site.backends)+j+1][8] = b.Stats.LastDowntime.Round(time.Microsecond).String() - cellText[i*len(site.backends)+j+1][9] = minLatency - cellText[i*len(site.backends)+j+1][10] = maxLatency + index := i*len(site.backends) + j + 1 + cellText[index][0] = humanize.Ordinal(b.siteNumber) + cellText[index][1] = b.endpoint + cellText[index][2] = b.getServerStatus() + cellText[index][3] = strconv.FormatInt(b.Stats.TotCalls, 10) + cellText[index][4] = strconv.FormatInt(b.Stats.TotCallFailures, 10) + cellText[index][5] = humanize.IBytes(uint64(b.Stats.Rx)) + cellText[index][6] = humanize.IBytes(uint64(b.Stats.Tx)) + cellText[index][7] = b.Stats.CumDowntime.Round(time.Microsecond).String() + cellText[index][8] = b.Stats.LastDowntime.Round(time.Microsecond).String() + cellText[index][9] = minLatency + cellText[index][10] = maxLatency b.Stats.Unlock() } } @@ -517,9 +514,7 @@ func (m *multisite) populate() { func (m *multisite) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Header().Set("Server", "SideKick") // indicate sidekick is serving - m.rwLocker.RLock() - defer m.rwLocker.RUnlock() - for _, s := range m.sites { + for _, s := range *m.sites.Load() { if s.Online() { if r.URL.Path == healthPath { // Health check endpoint should return success From 96207534d7084a859178084a0947432a67c9113e Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 16:56:59 +0800 Subject: [PATCH 06/12] apply suggestion apply suggestion --- main.go | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/main.go b/main.go index d3529fa..050a811 100644 --- a/main.go +++ b/main.go @@ -493,18 +493,19 @@ func (m *multisite) populate() { minLatency = fmt.Sprintf("%2s", b.Stats.MinLatency.Round(time.Microsecond)) maxLatency = fmt.Sprintf("%2s", b.Stats.MaxLatency.Round(time.Microsecond)) } - index := i*len(site.backends) + j + 1 - cellText[index][0] = humanize.Ordinal(b.siteNumber) - cellText[index][1] = b.endpoint - cellText[index][2] = b.getServerStatus() - cellText[index][3] = strconv.FormatInt(b.Stats.TotCalls, 10) - cellText[index][4] = strconv.FormatInt(b.Stats.TotCallFailures, 10) - cellText[index][5] = humanize.IBytes(uint64(b.Stats.Rx)) - cellText[index][6] = humanize.IBytes(uint64(b.Stats.Tx)) - cellText[index][7] = b.Stats.CumDowntime.Round(time.Microsecond).String() - cellText[index][8] = b.Stats.LastDowntime.Round(time.Microsecond).String() - cellText[index][9] = minLatency - cellText[index][10] = maxLatency + cellText[i*len(site.backends)+j+1] = []string{ + humanize.Ordinal(b.siteNumber), + b.endpoint, + b.getServerStatus(), + strconv.FormatInt(b.Stats.TotCalls, 10), + strconv.FormatInt(b.Stats.TotCallFailures, 10), + humanize.IBytes(uint64(b.Stats.Rx)), + humanize.IBytes(uint64(b.Stats.Tx)), + b.Stats.CumDowntime.Round(time.Microsecond).String(), + b.Stats.LastDowntime.Round(time.Microsecond).String(), + minLatency, + maxLatency, + } b.Stats.Unlock() } } From d9e74582b5e294d4c2117ffbe44cf5f16fcf1efc Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 17:00:05 +0800 Subject: [PATCH 07/12] apply suggestion apply suggestion --- main.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.go b/main.go index 050a811..8caf996 100644 --- a/main.go +++ b/main.go @@ -480,9 +480,6 @@ func (m *multisite) populate() { }, 0) cellText := make([][]string, len(dspOrder)) - for i := range dspOrder { - cellText[i] = make([]string, len(headers)) - } cellText[0] = headers for i, site := range sites { for j, b := range site.backends { From 73af42a0ceb1d587703a09f0bef43a4eea444b08 Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 23:18:33 +0800 Subject: [PATCH 08/12] apply suggestion apply suggestion --- main.go | 39 +++++++++++++++------------------------ metrics.go | 7 +------ 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/main.go b/main.go index 8caf996..fd21729 100644 --- a/main.go +++ b/main.go @@ -65,18 +65,17 @@ const ( ) var ( - globalQuietEnabled bool - globalDebugEnabled bool - globalLoggingEnabled bool - globalTrace string - globalJSONEnabled bool - globalConsoleDisplay bool - globalErrorsOnly bool - globalStatusCodes []int - globalConnStatsRWMutex sync.RWMutex - globalConnStats []*ConnStats - log2 *logrus.Logger - globalHostBalance string + globalQuietEnabled bool + globalDebugEnabled bool + globalLoggingEnabled bool + globalTrace string + globalJSONEnabled bool + globalConsoleDisplay bool + globalErrorsOnly bool + globalStatusCodes []int + globalConnStats atomic.Pointer[[]*ConnStats] + log2 *logrus.Logger + globalHostBalance string ) const ( @@ -401,12 +400,7 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { b.Stats.MinLatency = time.Duration(int64(math.Min(float64(b.Stats.MinLatency), float64(t.CallStats.Latency)))) b.Stats.Rx += int64(t.CallStats.Rx) b.Stats.Tx += int64(t.CallStats.Tx) - // automatically update the global stats - // Read/Write Lock is not required here - globalConnStatsRWMutex.RLock() - connStats := globalConnStats - globalConnStatsRWMutex.RUnlock() - for _, c := range connStats { + for _, c := range *globalConnStats.Load() { if c == nil { continue } @@ -863,10 +857,7 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs var backends []*Backend var prevScheme string var transport http.RoundTripper - globalConnStatsRWMutex.Lock() - defer globalConnStatsRWMutex.Unlock() - // reset connstats - globalConnStats = []*ConnStats{} + var connStats []*ConnStats if len(endpoints) == 1 && ctx.GlobalBool("rr-dns-mode") { // guess it is LB config address target, err := url.Parse(endpoints[0]) @@ -940,9 +931,9 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs go backend.healthCheck(ctxt) proxy.ErrorHandler = backend.ErrorHandler backends = append(backends, backend) - globalConnStats = append(globalConnStats, newConnStats(endpoint)) + connStats = append(connStats, newConnStats(endpoint)) } - + globalConnStats.Store(&connStats) return &site{ backends: backends, } diff --git a/metrics.go b/metrics.go index 278eb1c..33fc200 100644 --- a/metrics.go +++ b/metrics.go @@ -50,12 +50,7 @@ func (c *sidekickCollector) Describe(ch chan<- *prometheus.Desc) { // Collect is called by the Prometheus registry when collecting metrics. func (c *sidekickCollector) Collect(ch chan<- prometheus.Metric) { - // automatically read the global stats - // Read/Write Lock is not required here - globalConnStatsRWMutex.RLock() - connStats := globalConnStats - globalConnStatsRWMutex.RUnlock() - for _, c := range connStats { + for _, c := range *globalConnStats.Load() { if c == nil { continue } From eb29bebcd267a566919ed483fea1a2209f4d5fb7 Mon Sep 17 00:00:00 2001 From: jiuker Date: Thu, 11 Jul 2024 23:24:19 +0800 Subject: [PATCH 09/12] apply suggesiton --- main.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index fd21729..807b19a 100644 --- a/main.go +++ b/main.go @@ -702,7 +702,7 @@ func newProxyDialContext(dialTimeout time.Duration) DialContext { // tlsClientSessionCacheSize is the cache size for TLS client sessions. const tlsClientSessionCacheSize = 100 -func clientTransport(ctx *cli.Context, enableTLS bool) http.RoundTripper { +func clientTransport(ctx *cli.Context, enableTLS bool, hostName string) http.RoundTripper { tr := &http.Transport{ Proxy: http.ProxyFromEnvironment, DialContext: dialContextWithDNSCache(dnsCache, newProxyDialContext(10*time.Second)), @@ -726,13 +726,14 @@ func clientTransport(ctx *cli.Context, enableTLS bool) http.RoundTripper { tr.TLSClientConfig = &tls.Config{ RootCAs: getCertPool(ctx.GlobalString("cacert")), Certificates: getCertKeyPair(ctx.GlobalString("client-cert"), ctx.GlobalString("client-key")), - InsecureSkipVerify: ctx.GlobalBool("insecure") || ctx.GlobalBool("rr-dns-mode"), + InsecureSkipVerify: ctx.GlobalBool("insecure"), // Can't use SSLv3 because of POODLE and BEAST // Can't use TLSv1.0 because of POODLE and BEAST using CBC cipher // Can't use TLSv1.1 because of RC4 cipher usage MinVersion: tls.VersionTLS12, PreferServerCipherSuites: true, ClientSessionCache: tls.NewLRUClientSessionCache(tlsClientSessionCacheSize), + ServerName: hostName, } } @@ -858,13 +859,14 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs var prevScheme string var transport http.RoundTripper var connStats []*ConnStats + var hostName string if len(endpoints) == 1 && ctx.GlobalBool("rr-dns-mode") { // guess it is LB config address target, err := url.Parse(endpoints[0]) if err != nil { console.Fatalln(fmt.Errorf("Unable to parse input arg %s: %s", endpoints[0], err)) } - hostName := target.Hostname() + hostName = target.Hostname() ips, err := net.LookupHost(hostName) if err != nil { console.Fatalln(fmt.Errorf("Unable to lookup host %s", hostName)) @@ -900,7 +902,7 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs endpoint, ctx.App.Name)) } if transport == nil { - transport = clientTransport(ctx, target.Scheme == "https") + transport = clientTransport(ctx, target.Scheme == "https", hostName) } // this is only used if r.RemoteAddr is localhost which means that // sidekick endpoint being accessed is 127.0.0.x From de72264691256c833c0dd4b010cff45b4eb3017e Mon Sep 17 00:00:00 2001 From: jiuker Date: Tue, 16 Jul 2024 09:43:21 +0800 Subject: [PATCH 10/12] metric --- http-tracer.go | 11 +++++++---- main.go | 5 ++++- metrics.go | 35 ++++++++++++++++++++++++++++------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/http-tracer.go b/http-tracer.go index bf9eea6..abcb07b 100644 --- a/http-tracer.go +++ b/http-tracer.go @@ -177,13 +177,13 @@ func (r *recordRequest) Data() []byte { return BodyPlaceHolder } -func traceHealthCheckReq(req *http.Request, resp *http.Response, reqTime, respTime time.Time, backend *Backend) { - ti := InternalTrace(req, resp, reqTime, respTime, backend.endpoint) +func traceHealthCheckReq(req *http.Request, resp *http.Response, reqTime, respTime time.Time, backend *Backend, err error) { + ti := InternalTrace(req, resp, reqTime, respTime, backend.endpoint, err) doTrace(ti, backend) } // InternalTrace returns trace for sidekick http requests -func InternalTrace(req *http.Request, resp *http.Response, reqTime, respTime time.Time, endpoint string) TraceInfo { +func InternalTrace(req *http.Request, resp *http.Response, reqTime, respTime time.Time, endpoint string, healthError error) TraceInfo { t := TraceInfo{} t.NodeName = endpoint reqHeaders := req.Header.Clone() @@ -216,7 +216,8 @@ func InternalTrace(req *http.Request, resp *http.Response, reqTime, respTime tim t.RespInfo = rs t.CallStats = traceCallStats{ - Latency: rs.Time.Sub(rq.Time), + Latency: rs.Time.Sub(rq.Time), + HealthError: healthError, } t.Type = TraceMsgType @@ -411,6 +412,7 @@ func shortTrace(t TraceInfo) shortTraceMsg { s.CallStats.Latency = t.CallStats.Latency s.CallStats.Rx = t.CallStats.Rx s.CallStats.Tx = t.CallStats.Tx + s.CallStats.HealthError = t.CallStats.HealthError s.Path = t.ReqInfo.Path s.Query = t.ReqInfo.RawQuery s.Method = t.ReqInfo.Method @@ -471,6 +473,7 @@ type traceCallStats struct { Tx int `json:"tx"` Latency time.Duration `json:"latency"` TimeToFirstByte time.Duration `json:"timetofirstbyte"` + HealthError error `json:"healthError"` } // traceRequestInfo represents trace of http request diff --git a/main.go b/main.go index 807b19a..f6a9df0 100644 --- a/main.go +++ b/main.go @@ -374,7 +374,7 @@ func (b *Backend) doHealthCheck() error { } if globalTrace != "application" { if resp != nil { - traceHealthCheckReq(req, resp, reqTime, respTime, b) + traceHealthCheckReq(req, resp, reqTime, respTime, b, err) } } @@ -413,6 +413,9 @@ func (b *Backend) updateCallStats(t shortTraceMsg) { c.setOutputBytes(b.Stats.Tx) c.setTotalCalls(b.Stats.TotCalls) c.setTotalCallFailures(b.Stats.TotCallFailures) + if t.CallStats.HealthError != nil { + c.addHealthErrorCounts(1) + } } } diff --git a/metrics.go b/metrics.go index 33fc200..5f56f4e 100644 --- a/metrics.go +++ b/metrics.go @@ -92,6 +92,16 @@ func (c *sidekickCollector) Collect(ch chan<- prometheus.Metric) { float64(c.getTotalOutputBytes()), c.endpoint, ) + + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName("sidekick", "health", "error_counts"), + "Total number of health check errors", + []string{"endpoint"}, nil), + prometheus.CounterValue, + float64(c.getHealthErrorCount()), + c.endpoint, + ) } } @@ -119,13 +129,14 @@ func metricsHandler() (http.Handler, error) { // ConnStats - statistics on backend type ConnStats struct { - endpoint string - totalInputBytes atomic.Uint64 - totalOutputBytes atomic.Uint64 - totalCalls atomic.Uint64 - totalFailedCalls atomic.Uint64 - minLatency atomic.Duration - maxLatency atomic.Duration + endpoint string + totalInputBytes atomic.Uint64 + totalOutputBytes atomic.Uint64 + totalCalls atomic.Uint64 + totalFailedCalls atomic.Uint64 + minLatency atomic.Duration + maxLatency atomic.Duration + healthErrorCounts atomic.Uint64 } // Store current total input bytes @@ -133,6 +144,11 @@ func (s *ConnStats) setInputBytes(n int64) { s.totalInputBytes.Store(uint64(n)) } +// Add current health error count +func (s *ConnStats) addHealthErrorCounts(n int64) { + s.healthErrorCounts.Add(uint64(n)) +} + // Store current total output bytes func (s *ConnStats) setOutputBytes(n int64) { s.totalOutputBytes.Store(uint64(n)) @@ -168,6 +184,11 @@ func (s *ConnStats) getTotalOutputBytes() uint64 { return s.totalOutputBytes.Load() } +// Return total health error count +func (s *ConnStats) getHealthErrorCount() uint64 { + return s.healthErrorCounts.Load() +} + // Prepare new ConnStats structure func newConnStats(endpoint string) *ConnStats { return &ConnStats{endpoint: endpoint} From 3de05cbcd370c6d2a78ecb03e435db2b519ffd03 Mon Sep 17 00:00:00 2001 From: jiuker Date: Tue, 16 Jul 2024 09:46:13 +0800 Subject: [PATCH 11/12] log log --- main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/main.go b/main.go index f6a9df0..1a028ed 100644 --- a/main.go +++ b/main.go @@ -864,6 +864,7 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs var connStats []*ConnStats var hostName string if len(endpoints) == 1 && ctx.GlobalBool("rr-dns-mode") { + console.Infof("RR DNS mode enabled, using %s as hostname", endpoints[0]) // guess it is LB config address target, err := url.Parse(endpoints[0]) if err != nil { From c087ada1bced0cf5048e73ee2f5479aace742375 Mon Sep 17 00:00:00 2001 From: jiuker Date: Tue, 16 Jul 2024 09:55:27 +0800 Subject: [PATCH 12/12] add opts add opts --- main.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/main.go b/main.go index 1a028ed..10bc6f3 100644 --- a/main.go +++ b/main.go @@ -424,14 +424,22 @@ type multisite struct { healthCanceler context.CancelFunc } -func (m *multisite) renewSite(ctx *cli.Context, healthCheckPath string, healthReadCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) { +type healthCheckOptions struct { + healthCheckPath string + healthReadCheckPath string + healthCheckPort int + healthCheckDuration time.Duration + healthCheckTimeout time.Duration +} + +func (m *multisite) renewSite(ctx *cli.Context, opts healthCheckOptions) { ctxt, cancel := context.WithCancel(context.Background()) var sites []*site for i, siteStrs := range ctx.Args() { if i == len(ctx.Args())-1 { - healthCheckPath = healthReadCheckPath + opts.healthCheckPath = opts.healthReadCheckPath } - site := configureSite(ctxt, ctx, i+1, strings.Split(siteStrs, ","), healthCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + site := configureSite(ctxt, ctx, i+1, strings.Split(siteStrs, ","), opts) sites = append(sites, site) } m.sites.Store(&sites) @@ -837,7 +845,7 @@ func IsLoopback(addr string) bool { return net.ParseIP(host).IsLoopback() } -func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs []string, healthCheckPath string, healthCheckPort int, healthCheckDuration, healthCheckTimeout time.Duration) *site { +func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs []string, opts healthCheckOptions) *site { var endpoints []string if ellipses.HasEllipses(siteStrs...) { @@ -927,13 +935,13 @@ func configureSite(ctxt context.Context, ctx *cli.Context, siteNum int, siteStrs ModifyResponse: modifyResponse(), } stats := BackendStats{MinLatency: 24 * time.Hour, MaxLatency: 0} - healthCheckURL, err := getHealthCheckURL(endpoint, healthCheckPath, healthCheckPort) + healthCheckURL, err := getHealthCheckURL(endpoint, opts.healthCheckPath, opts.healthCheckPort) if err != nil { console.Fatalln(err) } backend := &Backend{siteNum, endpoint, proxy, &http.Client{ Transport: proxy.Transport, - }, 0, healthCheckURL, healthCheckDuration, healthCheckTimeout, &stats} + }, 0, healthCheckURL, opts.healthCheckDuration, opts.healthCheckTimeout, &stats} go backend.healthCheck(ctxt) proxy.ErrorHandler = backend.ErrorHandler backends = append(backends, backend) @@ -1042,7 +1050,7 @@ func sidekickMain(ctx *cli.Context) { } m := &multisite{} - m.renewSite(ctx, healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + m.renewSite(ctx, healthCheckOptions{healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout}) m.displayUI(!globalConsoleDisplay) router.PathPrefix(slashSeparator).Handler(m) @@ -1080,7 +1088,7 @@ func sidekickMain(ctx *cli.Context) { for signal := range osSignalChannel { switch signal { case syscall.SIGHUP: - m.renewSite(ctx, healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout) + m.renewSite(ctx, healthCheckOptions{healthCheckPath, healthReadCheckPath, healthCheckPort, healthCheckDuration, healthCheckTimeout}) default: console.Infof("caught signal '%s'\n", signal) os.Exit(1)