From 4f9c07d1786c2e0e2233f644da28980194fa8616 Mon Sep 17 00:00:00 2001 From: "changxin.dong" Date: Mon, 10 Oct 2022 17:31:10 +0800 Subject: [PATCH] feat: linux control group version 2 API support cgroup v2 (#1262) --- .../util/platforms/unix/linux/CgroupV2.java | 74 +++++ .../unix/linux/CgroupV2FreezerState.java | 26 ++ .../platforms/unix/linux/LinuxPlatform.java | 20 +- .../LinuxSystemResourceControllerV2.java | 273 ++++++++++++++++++ .../LinuxSystemResourceControllerV2Test.java | 109 +++++++ 5 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2.java create mode 100644 src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2FreezerState.java create mode 100644 src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2.java create mode 100644 src/test/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2Test.java diff --git a/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2.java b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2.java new file mode 100644 index 0000000000..ff98909911 --- /dev/null +++ b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2.java @@ -0,0 +1,74 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.aws.greengrass.util.platforms.unix.linux; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; + +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Represents Linux cgroup v2. + */ +@SuppressFBWarnings(value = "DMI_HARDCODED_ABSOLUTE_FILENAME", justification = "Cgroup virtual filesystem path " + + "cannot be relative") +public final class CgroupV2 { + + private static final String CGROUP_ROOT = "/sys/fs/cgroup"; + private static final String GG_NAMESPACE = "greengrass"; + private static final String CPU_MAX = "cpu.max"; + private static final String MEMORY_MAX = "memory.max"; + private static final String CGROUP_PROCS = "cgroup.procs"; + private static final String CGROUP_SUBTREE_CONTROL = "cgroup.subtree_control"; + private static final String CGROUP_FREEZE = "cgroup.freeze"; + + private CgroupV2() { + } + + public static Path getRootPath() { + return Paths.get(CGROUP_ROOT); + } + + public static String rootMountCmd() { + return String.format("mount -t cgroup2 none %s", CGROUP_ROOT); + } + + public static Path getSubsystemRootPath() { + return Paths.get(CGROUP_ROOT); + } + + public static Path getRootSubTreeControlPath() { + return getSubsystemRootPath().resolve(CGROUP_SUBTREE_CONTROL); + } + + public static Path getSubsystemGGPath() { + return getSubsystemRootPath().resolve(GG_NAMESPACE); + } + + public static Path getGGSubTreeControlPath() { + return getSubsystemGGPath().resolve(CGROUP_SUBTREE_CONTROL); + } + + public static Path getSubsystemComponentPath(String componentName) { + return getSubsystemGGPath().resolve(componentName); + } + + public static Path getComponentCpuMaxPath(String componentName) { + return getSubsystemComponentPath(componentName).resolve(CPU_MAX); + } + + public static Path getComponentMemoryMaxPath(String componentName) { + return getSubsystemComponentPath(componentName).resolve(MEMORY_MAX); + } + + public static Path getCgroupProcsPath(String componentName) { + return getSubsystemComponentPath(componentName).resolve(CGROUP_PROCS); + } + + public static Path getCgroupFreezePath(String componentName) { + return getSubsystemComponentPath(componentName).resolve(CGROUP_FREEZE); + } +} diff --git a/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2FreezerState.java b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2FreezerState.java new file mode 100644 index 0000000000..09ebdb79b1 --- /dev/null +++ b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/CgroupV2FreezerState.java @@ -0,0 +1,26 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.aws.greengrass.util.platforms.unix.linux; + +public enum CgroupV2FreezerState { + THAWED(0), + FROZEN(1); + + private int index; + + CgroupV2FreezerState(int index) { + this.index = index; + } + + /** + * Get the index value associated with this CgroupV2FreezerState. + * + * @return the integer index value associated with this CgroupV2FreezerState. + */ + public int getIndex() { + return index; + } +} diff --git a/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxPlatform.java b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxPlatform.java index 83dcd8c9af..928f2a693d 100644 --- a/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxPlatform.java +++ b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxPlatform.java @@ -8,11 +8,29 @@ import com.aws.greengrass.util.platforms.SystemResourceController; import com.aws.greengrass.util.platforms.unix.UnixPlatform; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + public class LinuxPlatform extends UnixPlatform { - SystemResourceController systemResourceController = new LinuxSystemResourceController(this); + private static final String CGROUP_ROOT = "/sys/fs/cgroup"; + private static final String CGROUP_CONTROLLERS = "cgroup.controllers"; + + SystemResourceController systemResourceController; @Override public SystemResourceController getSystemResourceController() { + //if the path exists, identify it as cgroupv1, otherwise identify it as cgroupv2 + if (Files.exists(getControllersRootPath())) { + systemResourceController = new LinuxSystemResourceControllerV2(this); + } else { + systemResourceController = new LinuxSystemResourceController(this); + } + return systemResourceController; } + + private Path getControllersRootPath() { + return Paths.get(CGROUP_ROOT).resolve(CGROUP_CONTROLLERS); + } } diff --git a/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2.java b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2.java new file mode 100644 index 0000000000..74bdce25fd --- /dev/null +++ b/src/main/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2.java @@ -0,0 +1,273 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.aws.greengrass.util.platforms.unix.linux; + +import com.aws.greengrass.lifecyclemanager.GreengrassService; +import com.aws.greengrass.logging.api.Logger; +import com.aws.greengrass.logging.impl.LogManager; +import com.aws.greengrass.util.Coerce; +import com.aws.greengrass.util.Utils; +import com.aws.greengrass.util.platforms.SystemResourceController; +import org.apache.commons.lang3.StringUtils; +import org.zeroturnaround.process.PidUtil; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.apache.commons.io.FileUtils.ONE_KB; + +public class LinuxSystemResourceControllerV2 implements SystemResourceController { + + private static final Logger logger = LogManager.getLogger(LinuxSystemResourceControllerV2.class); + private static final String COMPONENT_NAME = "componentName"; + private static final String MEMORY_KEY = "memory"; + private static final String CPUS_KEY = "cpus"; + + private static final String UNICODE_SPACE = "\\040"; + private static final String CGROUP_SUBTREE_CONTROL_CONTENT = "+cpuset +cpu +io +memory +pids"; + + protected final LinuxPlatform platform; + + public LinuxSystemResourceControllerV2(LinuxPlatform platform) { + this.platform = platform; + } + + @Override + public void removeResourceController(GreengrassService component) { + try { + // Assumes processes belonging to cgroups would already be terminated/killed. + Files.deleteIfExists(CgroupV2.getSubsystemComponentPath(component.getServiceName())); + } catch (IOException e) { + logger.atError().setCause(e).kv(COMPONENT_NAME, component.getServiceName()) + .log("Failed to remove the resource controller"); + } + } + + @Override + public void updateResourceLimits(GreengrassService component, Map resourceLimit) { + try { + if (!Files.exists(CgroupV2.getSubsystemComponentPath(component.getServiceName()))) { + initializeCgroup(component); + } + + if (resourceLimit.containsKey(MEMORY_KEY)) { + long memoryLimitInKB = Coerce.toLong(resourceLimit.get(MEMORY_KEY)); + + if (memoryLimitInKB > 0) { + String memoryLimit = Long.toString(memoryLimitInKB * ONE_KB); + Files.write(CgroupV2.getComponentMemoryMaxPath(component.getServiceName()), + memoryLimit.getBytes(StandardCharsets.UTF_8)); + } else { + logger.atWarn().kv(COMPONENT_NAME, component.getServiceName()).kv(MEMORY_KEY, memoryLimitInKB) + .log("The provided memory limit is invalid"); + } + } + + if (resourceLimit.containsKey(CPUS_KEY)) { + double cpu = Coerce.toDouble(resourceLimit.get(CPUS_KEY)); + if (cpu > 0) { + byte[] content = Files.readAllBytes( + CgroupV2.getComponentCpuMaxPath(component.getServiceName())); + String cpuMaxContent = new String(content, StandardCharsets.UTF_8).trim(); + String[] cpuMaxContentArr = cpuMaxContent.split(" "); + String cpuMaxStr = "max"; + String cpuPeriodStr = "100000"; + + if (cpuMaxContentArr.length >= 2) { + cpuMaxStr = cpuMaxContentArr[0]; + cpuPeriodStr = cpuMaxContentArr[1]; + + if (!StringUtils.isEmpty(cpuPeriodStr)) { + int period = Integer.parseInt(cpuPeriodStr.trim()); + int max = (int) (period * cpu); + cpuMaxStr = Integer.toString(max); + } + } + + String latestCpuMaxContent = String.format("%s %s", cpuMaxStr, cpuPeriodStr); + Files.write(CgroupV2.getComponentCpuMaxPath(component.getServiceName()), + latestCpuMaxContent.getBytes(StandardCharsets.UTF_8)); + } else { + logger.atWarn().kv(COMPONENT_NAME, component.getServiceName()).kv(CPUS_KEY, cpu) + .log("The provided cpu limit is invalid"); + } + } + } catch (IOException e) { + logger.atError().setCause(e).kv(COMPONENT_NAME, component.getServiceName()) + .log("Failed to apply resource limits"); + } + } + + @Override + public void resetResourceLimits(GreengrassService component) { + try { + if (Files.exists(CgroupV2.getSubsystemComponentPath(component.getServiceName()))) { + Files.delete(CgroupV2.getSubsystemComponentPath(component.getServiceName())); + Files.createDirectory(CgroupV2.getSubsystemComponentPath(component.getServiceName())); + } + } catch (IOException e) { + logger.atError().setCause(e).kv(COMPONENT_NAME, component.getServiceName()) + .log("Failed to remove the resource controller"); + } + } + + @Override + public void addComponentProcess(GreengrassService component, Process process) { + try { + addComponentProcessToCgroup(component.getServiceName(), process); + + // Child processes of a process in a cgroup are auto-added to the same cgroup by linux kernel. But in + // case of a race condition in starting a child process and us adding pids to cgroup, neither us nor + // the linux kernel will add it to the cgroup. To account for this, re-list all pids for the component + // after 1 second and add to cgroup again so that all component processes are resource controlled. + component.getContext().get(ScheduledExecutorService.class).schedule(() -> { + try { + addComponentProcessToCgroup(component.getServiceName(), process); + } catch (IOException e) { + handleErrorAddingPidToCgroup(e, component.getServiceName()); + } + }, 1, TimeUnit.SECONDS); + + } catch (IOException e) { + handleErrorAddingPidToCgroup(e, component.getServiceName()); + } + } + + @Override + public void pauseComponentProcesses(GreengrassService component, List processes) throws IOException { + initializeCgroup(component); + + for (Process process : processes) { + addComponentProcessToCgroup(component.getServiceName(), process); + } + + Files.write(freezerCgroupStateFile(component.getServiceName()), + String.valueOf(CgroupV2FreezerState.FROZEN.getIndex()).getBytes(StandardCharsets.UTF_8), + StandardOpenOption.TRUNCATE_EXISTING); + } + + @Override + public void resumeComponentProcesses(GreengrassService component) throws IOException { + Files.write(freezerCgroupStateFile(component.getServiceName()), + String.valueOf(CgroupV2FreezerState.THAWED.getIndex()).getBytes(StandardCharsets.UTF_8), + StandardOpenOption.TRUNCATE_EXISTING); + } + + private void addComponentProcessToCgroup(String component, Process process) + throws IOException { + + if (!Files.exists(CgroupV2.getSubsystemComponentPath(component))) { + logger.atDebug().kv(COMPONENT_NAME, component) + .log("Resource controller is not enabled"); + return; + } + + if (process != null) { + try { + Set childProcesses = platform.getChildPids(process); + childProcesses.add(PidUtil.getPid(process)); + Set pidsInCgroup = pidsInComponentCgroup(component); + if (!Utils.isEmpty(childProcesses) && Objects.nonNull(pidsInCgroup) + && !childProcesses.equals(pidsInCgroup)) { + + // Writing pid to cgroup.procs file should auto add the pid to tasks file + // Once a process is added to a cgroup, its forked child processes inherit its (parent's) settings + for (Integer pid : childProcesses) { + if (pid == null) { + logger.atError().log("The process doesn't exist and is skipped"); + continue; + } + + Files.write(CgroupV2.getCgroupProcsPath(component), + Integer.toString(pid).getBytes(StandardCharsets.UTF_8)); + } + } + } catch (InterruptedException e) { + logger.atWarn().setCause(e) + .log("Interrupted while getting processes to add to system limit controller"); + Thread.currentThread().interrupt(); + } + } + } + + private void handleErrorAddingPidToCgroup(IOException e, String component) { + // The process might have exited (if it's a short running process). + // Check the exception message here to avoid the exception stacktrace failing the tests. + if (e.getMessage() != null && e.getMessage().contains("No such process")) { + logger.atWarn().kv(COMPONENT_NAME, component) + .log("Failed to add pid to the cgroupv2 because the process doesn't exist anymore"); + } else { + logger.atError().setCause(e).kv(COMPONENT_NAME, component) + .log("Failed to add pid to the cgroupv2"); + } + } + + private Set getMountedPaths() throws IOException { + Set mountedPaths = new HashSet<>(); + + Path procMountsPath = Paths.get("/proc/self/mounts"); + List mounts = Files.readAllLines(procMountsPath); + for (String mount : mounts) { + String[] split = mount.split(" "); + // As reported in fstab(5) manpage, struct is: + // 1st field is volume name + // 2nd field is path with spaces escaped as \040 + // 3rd field is fs type + // 4th field is mount options + // 5th field is used by dump(8) (ignored) + // 6th field is fsck order (ignored) + if (split.length < 6) { + continue; + } + + // We only need the path of the mounts to verify whether cgroup is mounted + String path = split[1].replace(UNICODE_SPACE, " "); + mountedPaths.add(path); + } + return mountedPaths; + } + + private void initializeCgroup(GreengrassService component) throws IOException { + Set mounts = getMountedPaths(); + + if (!mounts.contains(CgroupV2.getRootPath().toString())) { + platform.runCmd(CgroupV2.rootMountCmd(), o -> { + }, "Failed to mount cgroup2 root"); + Utils.createPaths(CgroupV2.getSubsystemRootPath()); + } + + //Enable controllers for root group + Files.write(CgroupV2.getRootSubTreeControlPath(), + CGROUP_SUBTREE_CONTROL_CONTENT.getBytes(StandardCharsets.UTF_8)); + + Utils.createPaths(CgroupV2.getSubsystemGGPath()); + //Enable controllers for gg group + Files.write(CgroupV2.getGGSubTreeControlPath(), + CGROUP_SUBTREE_CONTROL_CONTENT.getBytes(StandardCharsets.UTF_8)); + Utils.createPaths(CgroupV2.getSubsystemComponentPath(component.getServiceName())); + } + + private Set pidsInComponentCgroup(String component) throws IOException { + return Files.readAllLines(CgroupV2.getCgroupProcsPath(component)) + .stream().map(Integer::parseInt).collect(Collectors.toSet()); + } + + private Path freezerCgroupStateFile(String component) { + return CgroupV2.getCgroupFreezePath(component); + } +} diff --git a/src/test/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2Test.java b/src/test/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2Test.java new file mode 100644 index 0000000000..920616c1b2 --- /dev/null +++ b/src/test/java/com/aws/greengrass/util/platforms/unix/linux/LinuxSystemResourceControllerV2Test.java @@ -0,0 +1,109 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.aws.greengrass.util.platforms.unix.linux; + +import com.aws.greengrass.lifecyclemanager.GreengrassService; +import com.aws.greengrass.testcommons.testutilities.GGExtension; +import com.aws.greengrass.util.Utils; +import com.aws.greengrass.util.platforms.SystemResourceController; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mockStatic; + +@ExtendWith({MockitoExtension.class, GGExtension.class}) +@DisabledOnOs(OS.WINDOWS) +class LinuxSystemResourceControllerV2Test { + private final SystemResourceController systemResourceController = new LinuxSystemResourceControllerV2(new LinuxPlatform()); + @Mock + GreengrassService component; + + private static final String FILE_PATH = "/cgroupv2test"; + private static final String CGROUP_MEMORY_LIMIT_FILE_NAME = "memory.txt"; + private static final String CGROUP_CPU_LIMIT_FILE_NAME = "cpu.txt"; + private static final long MEMORY_IN_KB = 2048000; + private static final double CPU_TIME = 0.5; + private static final String COMPONENT_NAME = "testComponentName"; + + @Test + void GIVEN_cgroupv2_WHEN_memory_limit_updated_THEN_memory_limit_file_updated() throws IOException { + Map resourceLimit = new HashMap<>(); + resourceLimit.put("memory", String.valueOf(MEMORY_IN_KB)); + doReturn("testComponentName").when(component).getServiceName(); + + Path path = Paths.get(FILE_PATH + "/" + CGROUP_MEMORY_LIMIT_FILE_NAME); + + Path componentNameFolderPath = Paths.get(FILE_PATH); + Utils.createPaths(componentNameFolderPath); + File file = new File(FILE_PATH + "/" + CGROUP_MEMORY_LIMIT_FILE_NAME); + if (!Files.exists(path)) { + file.createNewFile(); + } + + try (MockedStatic utilities = mockStatic(CgroupV2.class)) { + utilities.when(() -> CgroupV2.getComponentMemoryMaxPath(COMPONENT_NAME)) + .thenReturn(path); + utilities.when(() -> CgroupV2.getSubsystemComponentPath(COMPONENT_NAME)) + .thenReturn(componentNameFolderPath); + systemResourceController.updateResourceLimits(component, resourceLimit); + } + + List mounts = Files.readAllLines(path); + assertEquals(String.valueOf(MEMORY_IN_KB * 1024), mounts.get(0)); + + Files.deleteIfExists(path); + Files.deleteIfExists(componentNameFolderPath); + } + + @Test + void GIVEN_cgroupv2_WHEN_cpu_limit_updated_THEN_cpu_limit_file_updated() throws IOException { + Map resourceLimit = new HashMap<>(); + resourceLimit.put("cpus", String.valueOf(CPU_TIME)); + doReturn("testComponentName").when(component).getServiceName(); + + Path path = Paths.get(FILE_PATH + "/" + CGROUP_CPU_LIMIT_FILE_NAME); + + Path componentNameFolderPath = Paths.get(FILE_PATH); + Utils.createPaths(componentNameFolderPath); + File file = new File(FILE_PATH + "/" + CGROUP_CPU_LIMIT_FILE_NAME); + if (!Files.exists(path)) { + file.createNewFile(); + } + + Files.write(path, "max 100000".getBytes(StandardCharsets.UTF_8)); + + try (MockedStatic utilities = mockStatic(CgroupV2.class)) { + utilities.when(() -> CgroupV2.getComponentCpuMaxPath(COMPONENT_NAME)) + .thenReturn(path); + utilities.when(() -> CgroupV2.getSubsystemComponentPath(COMPONENT_NAME)) + .thenReturn(componentNameFolderPath); + systemResourceController.updateResourceLimits(component, resourceLimit); + } + + List mounts = Files.readAllLines(path); + assertEquals((int) (CPU_TIME * 100000) + " 100000", mounts.get(0)); + + Files.deleteIfExists(path); + Files.deleteIfExists(componentNameFolderPath); + } +} \ No newline at end of file