Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove wrapperd and launch processes directly #9489

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ linters-settings:
- golang.zx2c4.com/wireguard
- golang.zx2c4.com/wireguard/wgctrl
- cloud.google.com/go
- kernel.org/pub/linux/libs/security/libcap/cap
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: remove as this was only debug

# fd-leak related replacements: https://github.com/siderolabs/talos/issues/9412
- github.com/insomniacslk/dhcp
- github.com/safchain/ethtool
Expand Down
4 changes: 0 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,6 @@ RUN <<END
# some extensions like qemu-guest agent will call '/sbin/shutdown'
ln /rootfs/sbin/init /rootfs/sbin/shutdown
chmod +x /rootfs/sbin/shutdown
ln /rootfs/sbin/init /rootfs/sbin/wrapperd
chmod +x /rootfs/sbin/wrapperd
ln /rootfs/sbin/init /rootfs/sbin/dashboard
chmod +x /rootfs/sbin/dashboard
END
Expand Down Expand Up @@ -728,8 +726,6 @@ RUN <<END
# some extensions like qemu-guest agent will call '/sbin/shutdown'
ln /rootfs/sbin/init /rootfs/sbin/shutdown
chmod +x /rootfs/sbin/shutdown
ln /rootfs/sbin/init /rootfs/sbin/wrapperd
chmod +x /rootfs/sbin/wrapperd
ln /rootfs/sbin/init /rootfs/sbin/dashboard
chmod +x /rootfs/sbin/dashboard
END
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ replace (
// forked go-yaml that introduces RawYAML interface, which can be used to populate YAML fields using bytes
// which are then encoded as a valid YAML blocks with proper indentiation
gopkg.in/yaml.v3 => github.com/unix4ever/yaml v0.0.0-20220527175918-f17b0f05cf2c

// improved error logging
kernel.org/pub/linux/libs/security/libcap/cap => github.com/dsseng/go-libcap/cap v0.0.0-20241015195416-c3ab072bd718
Comment on lines +27 to +29
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo remove

)

// fd-leak related replacements: https://github.com/siderolabs/talos/issues/9412
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dsseng/go-libcap/cap v0.0.0-20241015195416-c3ab072bd718 h1:0rpLmrwXjXf7ySDkCDOmzlKFi8w2d1fhQ7Z9BCXXIQU=
github.com/dsseng/go-libcap/cap v0.0.0-20241015195416-c3ab072bd718/go.mod h1:/iBwcj9nbLejQitYvUm9caurITQ6WyNHibJk6Q9fiS4=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ecks/uefi v0.0.0-20221116212947-caef65d070eb h1:LZBZtPpqHDydudNAs2sHmo4Zp9bxEyxHdGCk3Fr6tv8=
Expand Down Expand Up @@ -1065,8 +1067,6 @@ k8s.io/pod-security-admission v0.31.1 h1:j++ISpfQU0mWpKhoS4tY06Wm5EKdn65teL4lPJh
k8s.io/pod-security-admission v0.31.1/go.mod h1:0aE5T6MGm/50Nr/diBrC6+wwpxsT2E7NECe+TepUuEg=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.70 h1:QnLPkuDWWbD5C+3DUA2IUXai5TK6w2zff+MAGccqdsw=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.70/go.mod h1:/iBwcj9nbLejQitYvUm9caurITQ6WyNHibJk6Q9fiS4=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.70 h1:HsB2G/rEQiYyo1bGoQqHZ/Bvd6x1rERQTNdPr1FyWjI=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.70/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24=
rsc.io/qr v0.2.0 h1:6vBLea5/NRMVTz8V66gipeLycZMl/+UlFmk8DvqQ6WY=
Expand Down
5 changes: 0 additions & 5 deletions internal/app/machined/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
"github.com/siderolabs/talos/internal/app/maintenance"
"github.com/siderolabs/talos/internal/app/poweroff"
"github.com/siderolabs/talos/internal/app/trustd"
"github.com/siderolabs/talos/internal/app/wrapperd"
"github.com/siderolabs/talos/internal/pkg/mount"
"github.com/siderolabs/talos/pkg/httpdefaults"
"github.com/siderolabs/talos/pkg/machinery/api/common"
Expand Down Expand Up @@ -313,10 +312,6 @@ func main() {
case "poweroff", "shutdown":
poweroff.Main(os.Args)

return
case "wrapperd":
wrapperd.Main()

return
case "dashboard":
dashboard.Main()
Expand Down
213 changes: 169 additions & 44 deletions internal/app/machined/pkg/system/runner/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@ import (
"fmt"
"io"
"os"
"os/exec"
"slices"
"strings"
"syscall"
"time"

"github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/containerd/containerd/v2/pkg/sys"
"github.com/siderolabs/gen/optional"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-cmd/pkg/cmd/proc/reaper"
"kernel.org/pub/linux/libs/security/libcap/cap"

"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/runner"
Expand Down Expand Up @@ -78,27 +85,85 @@ func (p *processRunner) Close() error {
}

type commandWrapper struct {
cmd *exec.Cmd
launcher *cap.Launcher
ctty optional.Optional[int]
stdin uintptr
stdout uintptr
stderr uintptr
afterStart func()
afterTermination func() error
}

func dropCaps(droppedCapabilities []string, launcher *cap.Launcher) error {
droppedCaps := strings.Join(droppedCapabilities, ",")

if droppedCaps != "" {
caps := strings.Split(droppedCaps, ",")
dropCaps := xslices.Map(caps, func(c string) cap.Value {
capability, capErr := cap.FromName(c)
if capErr != nil {
fmt.Printf("failed to parse capability: %s", capErr)
}

return capability
})

iab := cap.IABGetProc()
if err := iab.SetVector(cap.Bound, true, dropCaps...); err != nil {
return fmt.Errorf("failed to set capabilities: %w", err)
}

launcher.SetIAB(iab)
}

return nil
}

// This callback is run in the thread before executing child process.
func beforeExecCallback(pa *syscall.ProcAttr, data interface{}) error {
wrapper, ok := data.(*commandWrapper)
if !ok {
return fmt.Errorf("failed to get command info")
}

ctty, cttySet := wrapper.ctty.Get()
if cttySet {
if pa.Sys == nil {
pa.Sys = &syscall.SysProcAttr{}
}

pa.Sys.Ctty = ctty
pa.Sys.Setsid = true
pa.Sys.Setctty = true
}

pa.Files = []uintptr{
wrapper.stdin,
wrapper.stdout,
wrapper.stderr,
}

// TODO: use pa.Sys.CgroupFD here when we can be sure clone3 is available
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's absolutely do this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So implement 2 different pathways? We use this code in container mode as well, and we cannot really expect containers to be ran on kernel 5.5+ which has clone3

return nil
}

//nolint:gocyclo
func (p *processRunner) build() (commandWrapper, error) {
args := []string{
fmt.Sprintf("-name=%s", p.args.ID),
fmt.Sprintf("-dropped-caps=%s", strings.Join(p.opts.DroppedCapabilities, ",")),
fmt.Sprintf("-cgroup-path=%s", cgroup.Path(p.opts.CgroupPath)),
fmt.Sprintf("-oom-score=%d", p.opts.OOMScoreAdj),
fmt.Sprintf("-uid=%d", p.opts.UID),
}
wrapper := commandWrapper{}

env := slices.Concat([]string{"PATH=" + constants.PATH}, p.opts.Env, os.Environ())
launcher := cap.NewLauncher(p.args.ProcessArgs[0], p.args.ProcessArgs, env)

args = append(args, p.args.ProcessArgs...)
if p.opts.UID > 0 {
launcher.SetUID(int(p.opts.UID))
}

cmd := exec.Command("/sbin/wrapperd", args...)
// reduce capabilities and assign them to launcher
if err := dropCaps(p.opts.DroppedCapabilities, launcher); err != nil {
return commandWrapper{}, err
}

// Set the environment for the service.
cmd.Env = append([]string{fmt.Sprintf("PATH=%s", constants.PATH)}, p.opts.Env...)
launcher.Callback(beforeExecCallback)

// Setup logging.
w, err := p.opts.LoggingManager.ServiceLog(p.args.ID).Writer()
Expand All @@ -113,12 +178,40 @@ func (p *processRunner) build() (commandWrapper, error) {
writer = w
}

// As MultiWriter is not a file, we need to create a pipe
// Pipe writer is passed to the child process while we read from the read side
pr, pw, err := os.Pipe()
if err != nil {
return commandWrapper{}, err
}

go io.Copy(writer, pr) //nolint:errcheck

// close the writer if we exit early due to an error
closeWriter := true

closeLogging := func() (e error) {
err := w.Close()
if err != nil {
e = err
}

err = pr.Close()
if err != nil {
e = err
}

err = pw.Close()
if err != nil {
e = err
}

return e
}

defer func() {
if closeWriter {
w.Close() //nolint:errcheck
closeLogging() //nolint:errcheck
}
}()

Expand All @@ -130,7 +223,7 @@ func (p *processRunner) build() (commandWrapper, error) {
return commandWrapper{}, err
}

cmd.Stdin = stdin
wrapper.stdin = stdin.Fd()

afterStartFuncs = append(afterStartFuncs, func() {
stdin.Close() //nolint:errcheck
Expand All @@ -143,13 +236,13 @@ func (p *processRunner) build() (commandWrapper, error) {
return commandWrapper{}, err
}

cmd.Stdout = stdout
wrapper.stdout = stdout.Fd()

afterStartFuncs = append(afterStartFuncs, func() {
stdout.Close() //nolint:errcheck
})
} else {
cmd.Stdout = writer
wrapper.stdout = pw.Fd()
}

if p.opts.StderrFile != "" {
Expand All @@ -158,37 +251,60 @@ func (p *processRunner) build() (commandWrapper, error) {
return commandWrapper{}, err
}

cmd.Stderr = stderr
wrapper.stderr = stderr.Fd()

afterStartFuncs = append(afterStartFuncs, func() {
stderr.Close() //nolint:errcheck
})
} else {
cmd.Stderr = writer
wrapper.stderr = pw.Fd()
}

ctty, cttySet := p.opts.Ctty.Get()
if cttySet {
cmd.SysProcAttr = &syscall.SysProcAttr{
Setsid: true,
Setctty: true,
Ctty: ctty,
closeWriter = false

wrapper.launcher = launcher
wrapper.afterStart = func() {
for _, f := range afterStartFuncs {
f()
}
}
wrapper.afterTermination = closeLogging
wrapper.ctty = p.opts.Ctty

closeWriter = false
return wrapper, nil
}

return commandWrapper{
cmd: cmd,
afterStart: func() {
for _, f := range afterStartFuncs {
f()
}
},
afterTermination: func() error {
return w.Close()
},
}, nil
// Apply cgroup and OOM score after the process is launched.
func applyProperties(p *processRunner, pid int) error {
path := cgroup.Path(p.opts.CgroupPath)

if cgroups.Mode() == cgroups.Unified {
cgv2, err := cgroup2.Load(path)
if err != nil {
return fmt.Errorf("failed to load cgroup %s: %w", path, err)
}

if err := cgv2.AddProc(uint64(pid)); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we move to cgroupFD?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we also use this launch method when running in a container (thus not sure we have clone3 available)? Cgroup in clone3 is actually only available since kernel 5.7

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, let's skip it for now

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we optionally have two paths? like auto-detect?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsure whether that's going to be beneficial, but it's possible for sure

return fmt.Errorf("failed to move process %s to cgroup: %w", p, err)
}
} else {
cgv1, err := cgroup1.Load(cgroup1.StaticPath(path))
if err != nil {
return fmt.Errorf("failed to load cgroup %s: %w", path, err)
}

if err := cgv1.Add(cgroup1.Process{
Pid: pid,
}); err != nil {
return fmt.Errorf("failed to move process %s to cgroup: %w", p, err)
}
}

if err := sys.AdjustOOMScore(pid, p.opts.OOMScoreAdj); err != nil {
return fmt.Errorf("failed to change OOMScoreAdj of process %s to %d: %w", p, p.opts.OOMScoreAdj, err)
}

return nil
}

func (p *processRunner) run(eventSink events.Recorder) error {
Expand All @@ -206,20 +322,29 @@ func (p *processRunner) run(eventSink events.Recorder) error {
defer reaper.Stop(notifyCh)
}

err = cmdWrapper.cmd.Start()
pid, err := cmdWrapper.launcher.Launch(&cmdWrapper)
if err != nil {
return fmt.Errorf("error starting process: %w", err)
}

if err := applyProperties(p, pid); err != nil {
return err
}

cmdWrapper.afterStart()

eventSink(events.StateRunning, "Process %s started with PID %d", p, pid)

process, err := os.FindProcess(pid)
if err != nil {
return fmt.Errorf("error starting process: %w", err)
return fmt.Errorf("could not find process: %w", err)
}

eventSink(events.StateRunning, "Process %s started with PID %d", p, cmdWrapper.cmd.Process.Pid)

waitCh := make(chan error)

go func() {
waitCh <- reaper.WaitWrapper(usingReaper, notifyCh, cmdWrapper.cmd)
_, err := process.Wait()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should use reaper, don't change this part please

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

waitCh <- err
}()

select {
Expand All @@ -231,7 +356,7 @@ func (p *processRunner) run(eventSink events.Recorder) error {
eventSink(events.StateStopping, "Sending SIGTERM to %s", p)

//nolint:errcheck
_ = cmdWrapper.cmd.Process.Signal(syscall.SIGTERM)
_ = process.Signal(syscall.SIGTERM)
}

select {
Expand All @@ -243,7 +368,7 @@ func (p *processRunner) run(eventSink events.Recorder) error {
eventSink(events.StateStopping, "Sending SIGKILL to %s", p)

//nolint:errcheck
_ = cmdWrapper.cmd.Process.Signal(syscall.SIGKILL)
_ = process.Signal(syscall.SIGKILL)
}

// wait for process to terminate
Expand Down
Loading
Loading