Skip to content

Commit

Permalink
DEBUG: Add recovery handling and instance put logging
Browse files Browse the repository at this point in the history
Signed-off-by: Mark Laing <[email protected]>
  • Loading branch information
markylaing committed Nov 14, 2024
1 parent 9b67811 commit 3dc0987
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ifeq "$(TAG_SQLITE3)" ""
endif

ifeq "$(GOCOVERDIR)" ""
CC="$(CC)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" go install -v -tags "$(TAG_SQLITE3)" -trimpath $(DEBUG) ./lxd ./lxc-to-lxd
CC="$(CC)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" go install -v -tags "$(TAG_SQLITE3)" -trimpath $(DEBUG) -gcflags="all=-N -l" ./lxd ./lxc-to-lxd
CGO_ENABLED=0 go install -v -tags netgo -trimpath $(DEBUG) ./lxd-user ./lxd-benchmark
else
CC="$(CC)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" go install -v -tags "$(TAG_SQLITE3)" -trimpath -cover $(DEBUG) ./lxd ./lxc-to-lxd
Expand Down
38 changes: 35 additions & 3 deletions lxd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"os/exec"
"os/user"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -57,6 +58,7 @@ import (
"github.com/canonical/lxd/lxd/metrics"
networkZone "github.com/canonical/lxd/lxd/network/zone"
"github.com/canonical/lxd/lxd/node"
"github.com/canonical/lxd/lxd/recovery"
"github.com/canonical/lxd/lxd/request"
"github.com/canonical/lxd/lxd/response"
"github.com/canonical/lxd/lxd/rsync"
Expand Down Expand Up @@ -168,6 +170,8 @@ type Daemon struct {

// Ubuntu Pro settings
ubuntuPro *ubuntupro.Client

panics chan [2]any
}

// DaemonConfig holds configuration values for Daemon.
Expand Down Expand Up @@ -199,6 +203,7 @@ func newDaemon(config *DaemonConfig, os *sys.OS) *Daemon {
shutdownCtx: shutdownCtx,
shutdownCancel: shutdownCancel,
shutdownDoneCh: make(chan error),
panics: make(chan [2]any),
}

d.serverCert = func() *shared.CertInfo { return d.serverCertInt }
Expand Down Expand Up @@ -646,7 +651,7 @@ func (d *Daemon) UnixSocket() string {
return filepath.Join(d.os.VarDir, "unix.socket")
}

// createCmd creates API handlers for the provided endpoint including some useful behavior,
// createCmd creates API devLxdHandlers for the provided endpoint including some useful behavior,
// such as appropriate authentication, authorization and checking server availability.
//
// The created handler also keeps track of handled requests for the API metrics
Expand All @@ -661,7 +666,7 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c APIEndpoint) {
uri = fmt.Sprintf("/%s", c.Path)
}

route := restAPI.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) {
handler := func(w http.ResponseWriter, r *http.Request) {
// Only endpoints from the main API (version 1.0) should be counted for the metrics.
// This prevents internal endpoints from being included as well.
if version == "1.0" {
Expand Down Expand Up @@ -861,7 +866,9 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c APIEndpoint) {
logger.Warn("Failed writing error for HTTP response", logger.Ctx{"url": uri, "err": err, "writeErr": writeErr})
}
}
})
}

route := restAPI.Handle(uri, recoveryWrapper(handler))

// If the endpoint has a canonical name then record it so it can be used to build URLS
// and accessed in the context of the request by the handler function.
Expand All @@ -870,6 +877,31 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c APIEndpoint) {
}
}

func recoveryWrapper(h http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
defer func() {
rec := recover()
if rec == nil {
return
}

// If we panic, return an error to the user, then panic again to propagate the error back to the main
// go routine.
_ = response.InternalError(errors.New("Unexpected error occurred")).Render(w, r)

const size = 64 << 10
buf := make([]byte, size)
buf = buf[:runtime.Stack(buf, false)]
recovery.Panic <- recovery.PanicResult{
Err: fmt.Errorf("%v", rec),
Stacktrace: buf,
}
}()

h.ServeHTTP(w, r)
}
}

// have we setup shared mounts?
var sharedMountsLock sync.Mutex

Expand Down
4 changes: 2 additions & 2 deletions lxd/devlxd.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ func devlxdUbuntuProTokenPostHandler(d *Daemon, c instance.Instance, w http.Resp
return response.DevLxdResponse(http.StatusOK, tokenJSON, "json", c.Type() == instancetype.VM)
}

var handlers = []devLxdHandler{
var devLxdHandlers = []devLxdHandler{
{
path: "/",
handlerFunc: func(d *Daemon, c instance.Instance, w http.ResponseWriter, r *http.Request) response.Response {
Expand Down Expand Up @@ -398,7 +398,7 @@ func devLxdAPI(d *Daemon, f hoistFunc) http.Handler {
m := mux.NewRouter()
m.UseEncodedPath() // Allow encoded values in path segments.

for _, handler := range handlers {
for _, handler := range devLxdHandlers {
m.HandleFunc(handler.path, f(handler.handlerFunc, d))
}

Expand Down
11 changes: 11 additions & 0 deletions lxd/instance/drivers/driver_lxc.go
Original file line number Diff line number Diff line change
Expand Up @@ -4070,6 +4070,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
reverter := revert.New()
defer reverter.Fail()

logger.Info("Reached instance update backup file lock")
unlock, err := d.updateBackupFileLock(context.Background())
if err != nil {
return err
Expand Down Expand Up @@ -4122,6 +4123,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {

var profiles []string

logger.Info("Reached instance update get profile names")
err = d.state.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
// Validate the new profiles
profiles, err = tx.GetProfileNames(ctx, args.Project)
Expand All @@ -4132,6 +4134,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
return fmt.Errorf("Failed to get profiles: %w", err)
}

logger.Info("Reached instance update check profiles")
checkedProfiles := []string{}
for _, profile := range args.Profiles {
if !shared.ValueInSlice(profile.Name, profiles) {
Expand All @@ -4153,6 +4156,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
}
}

logger.Info("Reached instance update copy old config")
// Get a copy of the old configuration
oldDescription := d.Description()
oldArchitecture := 0
Expand Down Expand Up @@ -4254,6 +4258,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
}
}

logger.Info("Reached instance update device diff")
// Diff the devices
removeDevices, addDevices, updateDevices, allUpdatedKeys := oldExpandedDevices.Update(d.expandedDevices, func(oldDevice deviceConfig.Device, newDevice deviceConfig.Device) []string {
// This function needs to return a list of fields that are excluded from differences
Expand Down Expand Up @@ -4301,6 +4306,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
}

if userRequested {
logger.Info("Reached instance update config validation")
// Look for deleted idmap keys.
protectedKeys := []string{
"volatile.idmap.base",
Expand Down Expand Up @@ -4448,6 +4454,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {

// Apply the live changes
if isRunning {
logger.Info("Reached instance update apply live changes")
cc, err := d.initLXC(false)
if err != nil {
return err
Expand All @@ -4460,6 +4467,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {

// Live update the container config
for _, key := range changedConfig {
logger.Info("Reached instance update apply config key live", logger.Ctx{"key": key})
value := d.expandedConfig[key]

if key == "raw.apparmor" || key == "security.nesting" {
Expand Down Expand Up @@ -4785,6 +4793,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
}
}

logger.Info("Reached instance update apply database changes")
// Finally, apply the changes to the database
err = d.state.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
// Snapshots should update only their descriptions and expiry date.
Expand Down Expand Up @@ -4833,13 +4842,15 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error {
return fmt.Errorf("Failed to update database: %w", err)
}

logger.Info("Reached instance update apply backup file changes")
err = d.UpdateBackupFile()
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("Failed to write backup file: %w", err)
}

// Send devlxd notifications
if isRunning {
logger.Info("Reached instance update send devlxd notifications")
// Config changes (only for user.* keys
for _, key := range changedConfig {
if !strings.HasPrefix(key, "user.") {
Expand Down
5 changes: 5 additions & 0 deletions lxd/instance_put.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/canonical/lxd/lxd/util"
"github.com/canonical/lxd/shared"
"github.com/canonical/lxd/shared/api"
"github.com/canonical/lxd/shared/logger"
"github.com/canonical/lxd/shared/osarch"
"github.com/canonical/lxd/shared/revert"
"github.com/canonical/lxd/shared/version"
Expand Down Expand Up @@ -61,6 +62,7 @@ import (
// "500":
// $ref: "#/responses/InternalServerError"
func instancePut(d *Daemon, r *http.Request) response.Response {
logger.Info("Reached instance put")
// Don't mess with instance while in setup mode.
<-d.waitReady.Done()

Expand Down Expand Up @@ -166,6 +168,9 @@ func instancePut(d *Daemon, r *http.Request) response.Response {

// Update container configuration
do = func(op *operations.Operation) error {
var eggs []string
_ = eggs[0]
logger.Info("Reached instance put operation")
defer unlock()

args := db.InstanceArgs{
Expand Down
2 changes: 1 addition & 1 deletion lxd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (c *cmdGlobal) rawArgs(cmd *cobra.Command) []string {
func main() {
// daemon command (main)
daemonCmd := cmdDaemon{}
app := daemonCmd.Command()
app := daemonCmd.command()
app.SilenceUsage = true
app.CompletionOptions = cobra.CompletionOptions{DisableDefaultCmd: true}

Expand Down
13 changes: 10 additions & 3 deletions lxd/main_daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand All @@ -10,6 +11,7 @@ import (
"github.com/spf13/cobra"
"golang.org/x/sys/unix"

"github.com/canonical/lxd/lxd/recovery"
"github.com/canonical/lxd/lxd/sys"
"github.com/canonical/lxd/shared/logger"
)
Expand All @@ -21,7 +23,7 @@ type cmdDaemon struct {
flagGroup string
}

func (c *cmdDaemon) Command() *cobra.Command {
func (c *cmdDaemon) command() *cobra.Command {
cmd := &cobra.Command{}
cmd.Use = "lxd"
cmd.Short = "The LXD container manager (daemon)"
Expand All @@ -34,13 +36,13 @@ func (c *cmdDaemon) Command() *cobra.Command {
There are however a number of subcommands that let you interact directly with
the local LXD daemon and which may not be performed through the REST API alone.
`
cmd.RunE = c.Run
cmd.RunE = c.run
cmd.Flags().StringVar(&c.flagGroup, "group", "", "The group of users that will be allowed to talk to LXD"+"``")

return cmd
}

func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {
func (c *cmdDaemon) run(cmd *cobra.Command, args []string) error {
if len(args) > 1 || (len(args) == 1 && args[0] != "daemon" && args[0] != "") {
return fmt.Errorf("unknown command \"%s\" for \"%s\"", args[0], cmd.CommandPath())
}
Expand Down Expand Up @@ -93,6 +95,11 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error {

case err = <-d.shutdownDoneCh:
return err

case p := <-recovery.Panic:
_, _ = os.Stderr.Write([]byte(fmt.Sprintf("panic: %v\n", p.Err)))
_, _ = os.Stderr.Write(p.Stacktrace)
return errors.New("Unexpected error occurred")
}
}
}
35 changes: 35 additions & 0 deletions lxd/operations/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package operations

import (
"context"
"errors"
"fmt"
"net/http"
"runtime"
"sync"
"time"

Expand All @@ -12,6 +14,7 @@ import (
"github.com/canonical/lxd/lxd/auth"
"github.com/canonical/lxd/lxd/db/operationtype"
"github.com/canonical/lxd/lxd/events"
"github.com/canonical/lxd/lxd/recovery"
"github.com/canonical/lxd/lxd/request"
"github.com/canonical/lxd/lxd/response"
"github.com/canonical/lxd/lxd/state"
Expand Down Expand Up @@ -295,6 +298,38 @@ func (op *Operation) Start() error {

if op.onRun != nil {
go func(op *Operation) {
defer func() {
r := recover()
if r == nil {
return
}

if op != nil {
op.lock.Lock()
op.status = api.Failure
op.err = errors.New("Unexpected error occurred")
op.lock.Unlock()
op.done()

op.logger.Debug("Failure for operation", logger.Ctx{"err": op.err})
_, md, _ := op.Render()

op.lock.Lock()
op.sendEvent(md)
op.lock.Unlock()
time.Sleep(500 * time.Millisecond)
}

const size = 64 << 10
buf := make([]byte, size)
buf = buf[:runtime.Stack(buf, false)]

recovery.Panic <- recovery.PanicResult{
Err: fmt.Errorf("%v", r),
Stacktrace: buf,
}
}()

err := op.onRun(op)
if err != nil {
op.lock.Lock()
Expand Down
12 changes: 12 additions & 0 deletions lxd/recovery/recovery.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package recovery

// Panic is a channel that a single PanicResult can be sent on. This channel is listened to in the `lxd` daemon command
// and if received, the daemon exits. We are doing this rather than panicking directly in case any libraries implement
// their own panic recovery. For example, we need to bypass the recovery in the standard net/http library (see https://github.com/golang/go/issues/25245).
var Panic = make(chan PanicResult, 1)

// PanicResult contains the panic cause and a stacktrace.
type PanicResult struct {
Err error
Stacktrace []byte
}
1 change: 1 addition & 0 deletions test/godeps/lxd-agent.list
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ github.com/canonical/lxd/lxd/instance/instancetype
github.com/canonical/lxd/lxd/ip
github.com/canonical/lxd/lxd/metrics
github.com/canonical/lxd/lxd/operations
github.com/canonical/lxd/lxd/recovery
github.com/canonical/lxd/lxd/request
github.com/canonical/lxd/lxd/response
github.com/canonical/lxd/lxd/state
Expand Down

0 comments on commit 3dc0987

Please sign in to comment.