Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Linux Control Group version 2 API support cgroup v2 #1329

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
58f4ec1
feat: linux control group version 2 API support cgroup v2
ChangxinDong Oct 10, 2022
52800bd
Merge branch 'aws-greengrass:main' into main
ChangxinDong Oct 21, 2022
1abc7cc
feat: linux control group version 2 API support cgroup v2
yiwenTS Oct 25, 2022
5832fb2
feat: linux control group version 2 API support cgroup v2
ChangxinDong Oct 25, 2022
8132b86
Merge branch 'aws-greengrass:main' into main
ChangxinDong Oct 27, 2022
1b95b7d
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 3, 2022
b8cd781
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 4, 2022
901efe1
Merge branch 'aws-greengrass:main' into main
ChangxinDong Nov 8, 2022
58441a7
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 8, 2022
8ef0143
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 11, 2022
ec2348b
feat: linux control group version 2 API support cgroup v2
yiwenTS Nov 11, 2022
3db48d7
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 11, 2022
996d87c
Merge branch 'aws-greengrass:main' into main
ChangxinDong Nov 18, 2022
c1626ce
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 18, 2022
acfe1eb
Merge branch 'aws-greengrass:main' into main
ChangxinDong Nov 18, 2022
6004a64
feat: linux control group version 2 API support cgroup v2
ChangxinDong Nov 18, 2022
936e8eb
Merge branch 'main' of ssh://github.com/ChangxinDong/aws-greengrass-n…
ChangxinDong Nov 18, 2022
1a35f5d
Merge branch 'main' into main
junfuchen99 Aug 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import com.aws.greengrass.lifecyclemanager.Kernel;
import com.aws.greengrass.testcommons.testutilities.GGExtension;
import com.aws.greengrass.testcommons.testutilities.TestUtils;
import com.aws.greengrass.util.platforms.unix.linux.Cgroup;
import com.aws.greengrass.util.platforms.unix.linux.CGroupV1;
import com.aws.greengrass.util.platforms.unix.linux.LinuxSystemResourceController;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
Expand All @@ -19,6 +19,7 @@
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.ExtensionContext;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.junit.jupiter.MockitoExtension;
import software.amazon.awssdk.aws.greengrass.GreengrassCoreIPCClient;
import software.amazon.awssdk.aws.greengrass.model.PauseComponentRequest;
import software.amazon.awssdk.aws.greengrass.model.ResumeComponentRequest;
Expand All @@ -30,6 +31,7 @@
import java.nio.file.FileSystemException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;
import java.util.concurrent.TimeUnit;

Expand All @@ -42,8 +44,9 @@
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

@ExtendWith({GGExtension.class})
@ExtendWith({GGExtension.class, MockitoExtension.class})
class IPCHibernateTest {
private static final String TARGET_COMPONENT_NAME = "HibernateTarget";
private static final String CONTROLLER_COMPONENT_NAME = "HibernateController";
Expand Down Expand Up @@ -92,6 +95,7 @@ void beforeEach(ExtensionContext context) throws Exception {
@Test
void GIVEN_LifeCycleEventStreamClient_WHEN_pause_resume_component_THEN_target_service_paused_and_resumed()
throws Exception {
assumeTrue(!ifCgroupV2(), "skip this test case if v2 is enabled.");
GenericExternalService component = (GenericExternalService) kernel.locate(TARGET_COMPONENT_NAME);

PauseComponentRequest pauseRequest = new PauseComponentRequest();
Expand All @@ -115,8 +119,12 @@ void GIVEN_LifeCycleEventStreamClient_WHEN_pause_resume_component_THEN_target_se
private LinuxSystemResourceController.CgroupFreezerState getCgroupFreezerState(String serviceName)
throws IOException {
return LinuxSystemResourceController.CgroupFreezerState.valueOf(
new String(Files.readAllBytes(Cgroup.Freezer.getCgroupFreezerStateFilePath(serviceName)),
new String(Files.readAllBytes(CGroupV1.Freezer.getCgroupFreezerStateFilePath(serviceName)),
StandardCharsets.UTF_8).trim());
}

private boolean ifCgroupV2() {
return Files.exists(Paths.get("/sys/fs/cgroup/cgroup.controllers"));
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,31 @@
import com.aws.greengrass.logging.impl.GreengrassLogMessage;
import com.aws.greengrass.logging.impl.Slf4jLogAdapter;
import com.aws.greengrass.status.model.ComponentStatusDetails;
import com.aws.greengrass.testcommons.testutilities.GGExtension;
import com.aws.greengrass.testcommons.testutilities.NoOpPathOwnershipHandler;
import com.aws.greengrass.util.Pair;
import com.aws.greengrass.util.platforms.unix.linux.Cgroup;
import com.aws.greengrass.util.platforms.unix.linux.CGroupV1;
import com.aws.greengrass.util.platforms.unix.linux.CGroupV2;
import com.aws.greengrass.util.platforms.unix.linux.LinuxSystemResourceController;
import org.apache.commons.lang3.SystemUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.ExtensionContext;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystemException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
Expand Down Expand Up @@ -81,11 +86,13 @@
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;


@ExtendWith({GGExtension.class, MockitoExtension.class})
class GenericExternalServiceIntegTest extends BaseITCase {

private Kernel kernel;

private final static String ROOT_PATH_STRING = "/sys/fs/cgroup";
private final static String GG_PATH_STRING = "greengrass";

static Stream<Arguments> posixTestUserConfig() {
return Stream.of(
arguments("config_run_with_user.yaml", "nobody", "nobody"),
Expand Down Expand Up @@ -536,7 +543,7 @@ void GIVEN_posix_default_user_WHEN_runs_THEN_runs_with_default_user(String file,
String messageOnStdout = m.getMessage();
if (STDOUT.equals(m.getEventType()) && messageOnStdout != null
&& (messageOnStdout.contains("run as")
|| messageOnStdout.contains("install as") )) {
|| messageOnStdout.contains("install as") )) {
stdouts.add(messageOnStdout);
countDownLatch.countDown();
}
Expand Down Expand Up @@ -578,6 +585,7 @@ void GIVEN_posix_default_user_WHEN_runs_THEN_runs_with_default_user(String file,
@EnabledOnOs({OS.LINUX})
@Test
void GIVEN_linux_resource_limits_WHEN_it_changes_THEN_component_runs_with_new_resource_limits() throws Exception {
assumeTrue(!ifCgroupV2(), "skip this test case if v2 is enabled.");
String componentName = "echo_service";
// Run with no resource limit
ConfigPlatformResolver.initKernelWithMultiPlatformConfig(kernel,
Expand Down Expand Up @@ -624,6 +632,48 @@ void GIVEN_linux_resource_limits_WHEN_it_changes_THEN_component_runs_with_new_re
assertResourceLimits(componentName, 10240l * 1024, 1.5);
}

@EnabledOnOs({OS.LINUX})
@Test
void GIVEN_linux_resource_limits_WHEN_it_changes_THEN_component_runs_with_new_resource_limits_V2() throws Exception {
assumeTrue(ifCgroupV2(), "skip this test case if v1 is enabled.");

String echoComponentName = "echo_service";
// Run with no resource limit
ConfigPlatformResolver.initKernelWithMultiPlatformConfig(kernel,
getClass().getResource("config_run_with_user.yaml"));
CountDownLatch service = new CountDownLatch(1);
kernel.getContext().addGlobalStateChangeListener((s, oldState, newState) -> {
if (s.getName().equals(echoComponentName) && newState.equals(State.RUNNING)) {
service.countDown();
}
});

kernel.launch();
assertResourceLimitsCgroupV2(10240l * 1024, 1.5);

// Run with updated component resource limit
kernel.getConfig().lookup(SERVICES_NAMESPACE_TOPIC, echoComponentName, RUN_WITH_NAMESPACE_TOPIC,
SYSTEM_RESOURCE_LIMITS_TOPICS, "memory").withValue(51200l);
kernel.getConfig().lookup(SERVICES_NAMESPACE_TOPIC, echoComponentName, RUN_WITH_NAMESPACE_TOPIC,
SYSTEM_RESOURCE_LIMITS_TOPICS, "cpus").withValue(0.35);
kernel.getConfig().lookup(SERVICES_NAMESPACE_TOPIC, echoComponentName, VERSION_CONFIG_KEY).withValue("2.0.0");
// Block until events are completed
kernel.getContext().waitForPublishQueueToClear();

assertResourceLimitsCgroupV2(51200l * 1024, 0.35);

//Remove component resource limit, should fall back to default
kernel.getConfig().lookupTopics(SERVICES_NAMESPACE_TOPIC, echoComponentName, RUN_WITH_NAMESPACE_TOPIC,
SYSTEM_RESOURCE_LIMITS_TOPICS).remove();
kernel.getContext().waitForPublishQueueToClear();

assertResourceLimitsCgroupV2(10240l * 1024, 1.5);
}

private boolean ifCgroupV2() {
return Files.exists(Paths.get("/sys/fs/cgroup/cgroup.controllers"));
}

@Test
void GIVEN_service_starts_up_WHEN_service_breaks_THEN_status_details_persisted_for_errored_and_broken_states()
throws Exception {
Expand Down Expand Up @@ -754,21 +804,80 @@ void GIVEN_service_starts_up_WHEN_startup_times_out_THEN_timeout_error_code_pers
assertThat(statusB.get().getStatusReason(), containsString(ComponentStatusCode.RUN_TIMEOUT.getDescription()));
}

@Test
@EnabledOnOs({OS.LINUX})
void GIVEN_running_service_WHEN_pause_resume_requested_THEN_pause_resume_Service_and_freeze_thaw_cgroup_V2(
ExtensionContext context) throws Exception {
assumeTrue(ifCgroupV2(), "skip this test case if v1 is enabled.");
ignoreExceptionOfType(context, FileSystemException.class);
ConfigPlatformResolver.initKernelWithMultiPlatformConfig(kernel,
getClass().getResource("long_running_services.yaml"));
kernel.launch();

CountDownLatch mainRunningLatch = new CountDownLatch(1);
kernel.getContext().addGlobalStateChangeListener((service, oldState, newState) -> {
if (kernel.getMain().equals(service) && newState.isRunning()) {
mainRunningLatch.countDown();
}
});

// wait for main to run
assertTrue(mainRunningLatch.await(60, TimeUnit.SECONDS), "main running");

GenericExternalService component = (GenericExternalService) kernel.locate("sleeperA");
assertThat(component.getState(), is(State.RUNNING));

component.pause();
assertTrue(component.isPaused());
assertEquals(getCgroupFreezerStateV2(component.getServiceName()),
"1");

component.resume();
assertFalse(component.isPaused());
assertEquals(getCgroupFreezerStateV2(component.getServiceName()),
"0");
}

private String getCgroupFreezerStateV2(String serviceName)
throws IOException {
return new String(Files.readAllBytes(CGroupV2.Freezer.getCgroupFreezerStateFilePath(serviceName))
, StandardCharsets.UTF_8).trim();
}

private void assertResourceLimits(String componentName, long memory, double cpus) throws Exception {
byte[] buf1 = Files.readAllBytes(Cgroup.Memory.getComponentMemoryLimitPath(componentName));
byte[] buf1 = Files.readAllBytes(CGroupV1.Memory.getComponentMemoryLimitPath(componentName));
assertThat(memory, equalTo(Long.parseLong(new String(buf1, StandardCharsets.UTF_8).trim())));

byte[] buf2 = Files.readAllBytes(Cgroup.CPU.getComponentCpuQuotaPath(componentName));
byte[] buf3 = Files.readAllBytes(Cgroup.CPU.getComponentCpuPeriodPath(componentName));
byte[] buf2 = Files.readAllBytes(CGroupV1.CPU.getComponentCpuQuotaPath(componentName));
byte[] buf3 = Files.readAllBytes(CGroupV1.CPU.getComponentCpuPeriodPath(componentName));

int quota = Integer.parseInt(new String(buf2, StandardCharsets.UTF_8).trim());
int period = Integer.parseInt(new String(buf3, StandardCharsets.UTF_8).trim());
int expectedQuota = (int) (cpus * period);
assertThat(expectedQuota, equalTo(quota));
}

private void assertResourceLimitsCgroupV2(long memory, double cpus) throws Exception {
byte[] buf1 = Files.readAllBytes(Paths.get(String.format("%s/%s/echo_service/memory.max", ROOT_PATH_STRING, GG_PATH_STRING)));
assertThat(memory, equalTo(Long.parseLong(new String(buf1, StandardCharsets.UTF_8).trim())));

byte[] buf2 = Files.readAllBytes(Paths.get(String.format("%s/%s/echo_service/cpu.max", ROOT_PATH_STRING, GG_PATH_STRING)));

String cpuMaxContent = new String(buf2, StandardCharsets.UTF_8).trim();
String[] cpuMaxContentArr = cpuMaxContent.split(" ");

String cpuMaxStr = cpuMaxContentArr[0];
String cpuPeriodStr = cpuMaxContentArr[1];
int quota = Integer.parseInt(cpuMaxStr);
int expectedQuota = (int) (cpus * Integer.parseInt(cpuPeriodStr));
assertThat(expectedQuota, equalTo(quota));
}

@Test
@EnabledOnOs({OS.LINUX})
void GIVEN_running_service_WHEN_pause_resume_requested_THEN_pause_resume_Service_and_freeze_thaw_cgroup(
ExtensionContext context) throws Exception {
assumeTrue(!ifCgroupV2(), "skip this test case if v2 is enabled.");
ignoreExceptionOfType(context, FileSystemException.class);
ConfigPlatformResolver.initKernelWithMultiPlatformConfig(kernel,
getClass().getResource("long_running_services.yaml"));
Expand Down Expand Up @@ -803,7 +912,7 @@ void GIVEN_running_service_WHEN_pause_resume_requested_THEN_pause_resume_Service
private LinuxSystemResourceController.CgroupFreezerState getCgroupFreezerState(String serviceName)
throws IOException {
return LinuxSystemResourceController.CgroupFreezerState
.valueOf(new String(Files.readAllBytes(Cgroup.Freezer.getCgroupFreezerStateFilePath(serviceName))
.valueOf(new String(Files.readAllBytes(CGroupV1.Freezer.getCgroupFreezerStateFilePath(serviceName))
, StandardCharsets.UTF_8).trim());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/

package com.aws.greengrass.util.platforms.unix.linux;

import com.aws.greengrass.lifecyclemanager.GreengrassService;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

@SuppressFBWarnings(value = "DMI_HARDCODED_ABSOLUTE_FILENAME",
justification = "CGroupSubSystemPath virtual filesystem path cannot be relative")
public interface CGroupSubSystemPaths {
Path CGROUP_ROOT = Paths.get("/sys/fs/cgroup");
String GG_NAMESPACE = "greengrass";
String CGROUP_MEMORY_LIMITS = "memory.limit_in_bytes";
String CPU_CFS_PERIOD_US = "cpu.cfs_period_us";
String CPU_CFS_QUOTA_US = "cpu.cfs_quota_us";
String CGROUP_PROCS = "cgroup.procs";
String FREEZER_STATE_FILE = "freezer.state";
String CPU_MAX = "cpu.max";
String MEMORY_MAX = "memory.max";
String CGROUP_SUBTREE_CONTROL = "cgroup.subtree_control";
String CGROUP_FREEZE = "cgroup.freeze";
String MOUNT_PATH = "/proc/self/mounts";
String UNICODE_SPACE = "\\040";

default Path getRootPath() {
return CGROUP_ROOT;
}

String rootMountCmd();

Path getSubsystemRootPath();

default Path getSubsystemGGPath() {
return getSubsystemRootPath().resolve(GG_NAMESPACE);
}

default Path getSubsystemComponentPath(String componentName) {
return getSubsystemGGPath().resolve(componentName);
}

Path getComponentMemoryLimitPath(String componentName);

default Path getCgroupProcsPath(String componentName) {
return getSubsystemComponentPath(componentName).resolve(CGROUP_PROCS);
}

Path getCgroupFreezerStateFilePath(String componentName);

void initializeCgroup(GreengrassService component, LinuxPlatform platform)
throws IOException;

/**
* Initialize cgroup core method.
*
* @param component component
* @param platform platform
* @param mountSubSystem mount subsystem method
* @throws IOException IOException
*/
default void initializeCgroupCore(GreengrassService component, LinuxPlatform platform,
InitializeCgroup mountSubSystem) throws IOException {
Set<String> mounts = getMountedPaths();

if (!mounts.contains(getRootPath().toString())) {
platform.runCmd(rootMountCmd(), o -> {
}, "Failed to mount cgroup root");
Files.createDirectory(getSubsystemRootPath());
}

if (!mounts.contains(getSubsystemRootPath().toString())) {
mountSubSystem.add();
}

if (!Files.exists(getSubsystemGGPath())) {
Files.createDirectory(getSubsystemGGPath());
}
if (!Files.exists(getSubsystemComponentPath(component.getServiceName()))) {
Files.createDirectory(getSubsystemComponentPath(component.getServiceName()));
}
}

void handleCpuLimits(GreengrassService component, double cpu) throws IOException;

void pauseComponentProcessesCore(GreengrassService component) throws IOException;

void resumeComponentProcesses(GreengrassService component) throws IOException;

/**
* Get mounted paths.
*
* @return A set of String
* @throws IOException IOException
*/
default Set<String> getMountedPaths() throws IOException {
Set<String> mountedPaths = new HashSet<>();

Path procMountsPath = Paths.get(MOUNT_PATH);
List<String> mounts = Files.readAllLines(procMountsPath);
for (String mount : mounts) {
String[] split = mount.split(" ");
// As reported in fstab(5) manpage, struct is:
// 1st field is volume name
// 2nd field is path with spaces escaped as \040
// 3rd field is fs type
// 4th field is mount options
// 5th field is used by dump(8) (ignored)
// 6th field is fsck order (ignored)
if (split.length < 6) {
continue;
}

// We only need the path of the mounts to verify whether cgroup is mounted
String path = split[1].replace(UNICODE_SPACE, " ");
mountedPaths.add(path);
}
return mountedPaths;
}
}
Loading
Loading