Skip to content

Commit

Permalink
feat: pipe system logs to nucleus log path
Browse files Browse the repository at this point in the history
  • Loading branch information
alter-mage committed Oct 31, 2024
1 parent 7bb2213 commit 499419f
Show file tree
Hide file tree
Showing 22 changed files with 181 additions and 23 deletions.
2 changes: 1 addition & 1 deletion scripts/greengrass.service.template
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PIDFile=REPLACE_WITH_GG_LOADER_PID_FILE
RemainAfterExit=no
Restart=on-failure
RestartSec=10
ExecStart=/bin/sh REPLACE_WITH_GG_LOADER_FILE
ExecStart=/bin/sh -c "REPLACE_WITH_GG_LOADER_FILE >> REPLACE_WITH_NUCLEUS_LOG_FILE 2>&1"
KillMode=mixed

[Install]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ public class DeviceConfiguration {
public static final String FALLBACK_VERSION = "0.0.0";
private final Configuration config;
private final KernelCommandLine kernelCommandLine;

private final Validator deTildeValidator;
private final Validator regionValidator;
private final AtomicBoolean rootCA3Downloaded = new AtomicBoolean(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
import com.aws.greengrass.lifecyclemanager.Kernel;
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.util.NucleusLogsSummarizer;
import com.aws.greengrass.util.Pair;
import com.aws.greengrass.util.Utils;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CancellationException;
Expand All @@ -46,6 +49,7 @@ public class KernelUpdateDeploymentTask implements DeploymentTask {
private final Deployment deployment;
private final ComponentManager componentManager;
private final CompletableFuture<DeploymentResult> deploymentResultCompletableFuture;
private final Path nucleusLogsPath;

/**
* Constructor for DefaultDeploymentTask.
Expand All @@ -62,6 +66,7 @@ public KernelUpdateDeploymentTask(Kernel kernel, Logger logger, Deployment deplo
this.logger = logger.dfltKv(DEPLOYMENT_ID_LOG_KEY, deployment.getGreengrassDeploymentId());
this.componentManager = componentManager;
this.deploymentResultCompletableFuture = new CompletableFuture<>();
this.nucleusLogsPath = kernel.getNucleusPaths().nucleusLogsPath();
}

@SuppressWarnings({"PMD.AvoidDuplicateLiterals"})
Expand Down Expand Up @@ -138,6 +143,7 @@ private void waitForServicesToStart() {
getDeploymentStatusDetails());
}
}

deploymentResultCompletableFuture.complete(result);
}

Expand All @@ -156,9 +162,20 @@ private DeploymentException getDeploymentStatusDetails() {
if (Files.deleteIfExists(
kernel.getNucleusPaths().workPath(DEFAULT_NUCLEUS_COMPONENT_NAME)
.resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath())) {
return new DeploymentException(
"Nucleus update workflow failed to restart Nucleus. See loader logs for more details",
String nucleusLogs;
try {
nucleusLogs = new String(Files.readAllBytes(this.nucleusLogsPath), StandardCharsets.UTF_8);
return new DeploymentException(
String.format("Nucleus update workflow failed to restart Nucleus.%n%s",
NucleusLogsSummarizer.summarizeLogs(nucleusLogs)),
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
} catch (IOException e) {
logger.atWarn().log("Unable to read Nucleus logs for restart failure", e);
return new DeploymentException(
"Nucleus update workflow failed to restart Nucleus. Please look at the device and loader "
+ "logs for more info.",
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
}
} else {
return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an "
+ "unexpected device IO error",
Expand All @@ -170,7 +187,7 @@ private DeploymentException getDeploymentStatusDetails() {
DeploymentErrorCode.IO_WRITE_ERROR);
}
}

List<DeploymentErrorCode> errorStack = deployment.getErrorStack() == null ? Collections.emptyList()
: deployment.getErrorStack().stream().map(DeploymentErrorCode::valueOf).collect(Collectors.toList());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ public void updateKernelConfigWithIotConfiguration(Kernel kernel, ThingInfo thin
Path certFilePath = certPath.resolve("thingCert.crt");
Files.write(certFilePath, thing.certificatePem.getBytes(StandardCharsets.UTF_8));

new DeviceConfiguration(kernel.getConfig(), kernel.getKernelCommandLine(), thing.thingName, thing.dataEndpoint,
thing.credEndpoint, privKeyFilePath.toString(), certFilePath.toString(), caFilePath.toString(),
awsRegion, roleAliasName);
new DeviceConfiguration(kernel.getConfig(), kernel.getKernelCommandLine(),
thing.thingName, thing.dataEndpoint, thing.credEndpoint, privKeyFilePath.toString(),
certFilePath.toString(), caFilePath.toString(), awsRegion, roleAliasName);
// Make sure tlog persists the device configuration
kernel.getContext().waitForPublishQueueToClear();
outStream.println("Created device configuration");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,8 @@ void performSetup() throws IOException, DeviceConfigurationException, URISyntaxE
if (setupSystemService) {
kernel.getContext().get(KernelLifecycle.class).softShutdown(30);
boolean ok = kernel.getContext().get(SystemServiceUtilsFactory.class).getInstance()
.setupSystemService(kernel.getContext().get(KernelAlternatives.class), kernelStart);
.setupSystemService(kernel.getContext().get(KernelAlternatives.class), kernel.getNucleusPaths(),
kernelStart);
if (ok) {
outStream.println("Successfully set up Nucleus as a system service");
// Nucleus will be launched by OS as a service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ public Kernel() {
this.shutdown(-1);
}));

nucleusPaths = new NucleusPaths();
nucleusPaths = new NucleusPaths(Platform.getPlatformLoaderLogsFileName());
context.put(NucleusPaths.class, nucleusPaths);
kernelCommandLine = new KernelCommandLine(this);
kernelLifecycle = new KernelLifecycle(this, kernelCommandLine, nucleusPaths);
Expand Down Expand Up @@ -215,6 +215,7 @@ public static String findServiceForNode(Node node) {
*/
@SuppressWarnings("PMD.MissingBreakInSwitch")
public Kernel launch() {
logger.atInfo().log("We are logging from a good Nucleus version");
try {
Platform.getInstance().getRunWithGenerator()
.validateDefaultConfiguration(context.get(DeviceConfiguration.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
Expand Down Expand Up @@ -394,6 +397,37 @@ public void prepareBootstrap(String deploymentId) throws IOException {
setupLinkToDirectory(getCurrentDir(), newLaunchDir);
Files.delete(getNewDir());
logger.atInfo().log("Finished setup of launch directory for new Nucleus");

cleanupNucleusLogs();
}

/**
* Cleans up loader logs dumped in aws.greengrass.Nucleus.log by acquiring a lock on the file first as
* Windows FS does not allow a brute force truncate.
*/
@SuppressWarnings("PMD.AvoidFileStream")
protected void cleanupNucleusLogs() {
logger.atDebug().kv("logs-path", getNucleusLogPath().toAbsolutePath()).log("Cleaning up Nucleus logs");
try (FileOutputStream fos = new FileOutputStream(getNucleusLogPath().toAbsolutePath().toString());
FileChannel channel = fos.getChannel()) {
// Try to acquire a lock
FileLock lock = channel.tryLock();

if (lock == null) {
logger.atWarn().log("Cannot clean Nucleus logs, the log file is locked by another process");
} else {
try {
// Truncate the file
channel.truncate(0);
} finally {
// Release and close the lock
lock.close();
logger.atDebug().log("Finished cleaning up Nucleus logs");
}
}
} catch (IOException e) {
logger.atError().setCause(e).log("Error while cleaning the Nucleus logs file");
}
}

/**
Expand Down Expand Up @@ -529,4 +563,8 @@ private void cleanupLaunchDirectorySingleLevel(File filePath) throws IOException
}
Files.deleteIfExists(filePath.toPath());
}

public Path getNucleusLogPath() {
return nucleusPaths.nucleusLogsPath().toAbsolutePath();
}
}
58 changes: 58 additions & 0 deletions src/main/java/com/aws/greengrass/util/NucleusLogsSummarizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/

package com.aws.greengrass.util;

import java.util.Scanner;

public final class NucleusLogsSummarizer {
public static final String STARTING_SUBSEQUENCE_REGEX =
"^Nucleus exited ([0-9])*\\.\\s*(Attempt 2 out of 3|Retrying 2 times)$";
public static final String ENDING_SUBSEQUENCE_REGEX =
"^Nucleus exited ([0-9])*\\.\\s*(Attempt 3 out of 3|Retrying 3 times)$";

private NucleusLogsSummarizer() {
}

/**
* Summarizes loader logs that can be published as part of the deployment status FSS message when deployment fails
* with NRF.
*
* @param blob string blob containing loader logs
* @return string containing summarized logs
*/
public static String summarizeLogs(String blob) {
try (Scanner scanner = new Scanner(blob)) {
StringBuilder parsedLogsStringBuilder = new StringBuilder();

// Skip until the last restart failure
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
// process the line
if (line.matches(STARTING_SUBSEQUENCE_REGEX)) {
break;
}
}

while (scanner.hasNextLine()) {
String line = scanner.nextLine();

if (line.matches(ENDING_SUBSEQUENCE_REGEX)) {
parsedLogsStringBuilder.append(line);
break;
}

if (line.startsWith("+")) {
continue;
}

parsedLogsStringBuilder.append(line).append(System.lineSeparator());
}

scanner.close();
return parsedLogsStringBuilder.toString();
}
}
}
11 changes: 11 additions & 0 deletions src/main/java/com/aws/greengrass/util/NucleusPaths.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package com.aws.greengrass.util;

import com.aws.greengrass.componentmanager.models.ComponentIdentifier;
import com.aws.greengrass.logging.impl.LogManager;

import java.io.IOException;
import java.nio.file.Path;
Expand All @@ -16,6 +17,7 @@

@SuppressWarnings("checkstyle:MissingJavadocMethod")
public class NucleusPaths {
private final String loaderLogFileName;
private Path rootPath;
private Path workPath;
private Path componentStorePath;
Expand All @@ -25,6 +27,10 @@ public class NucleusPaths {
private Path cliIpcInfoPath;
private Path binPath;

public NucleusPaths(String loaderLogFileName) {
this.loaderLogFileName = loaderLogFileName;
}

public void initPaths(Path root, Path workPath, Path componentStorePath, Path configPath, Path kernelAlts,
Path deployment, Path cliIpcInfo, Path binPath) throws IOException {
setRootPath(root);
Expand Down Expand Up @@ -191,4 +197,9 @@ public static void setLoggerPath(Path p) throws IOException {
Utils.createPaths(p);
Permissions.setLoggerPermission(p);
}

public Path nucleusLogsPath() {
return LogManager.getRootLogConfiguration().getStoreDirectory()
.resolve(this.loaderLogFileName).toAbsolutePath();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.logging.impl.LogManager;
import com.aws.greengrass.util.NucleusPaths;

public class InitUtils implements SystemServiceUtils {
protected static final Logger logger = LogManager.getLogger(InitUtils.class);

@Override
public boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start) {
public boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start) {
logger.atError().log("System service registration is not implemented for this device");
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.logging.impl.LogManager;
import com.aws.greengrass.util.NucleusPaths;

import java.io.BufferedReader;
import java.io.BufferedWriter;
Expand All @@ -30,7 +31,7 @@ public class ProcdUtils implements SystemServiceUtils {
private static final String PROCD_SERVICE_TEMPLATE = "greengrass.service.procd.template";

@Override
public boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start) {
public boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start) {
logger.atInfo(LOG_EVENT_NAME).log("Start procd setup");
try {
kernelAlternatives.setupInitLaunchDirIfAbsent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.util.Exec;
import com.aws.greengrass.util.NucleusPaths;
import com.aws.greengrass.util.platforms.Platform;

import java.io.IOException;
Expand All @@ -17,10 +18,11 @@ public interface SystemServiceUtils {
* Setup Greengrass as a system service.
*
* @param kernelAlternatives KernelAlternatives instance which manages launch directory
* @param nucleusPaths NucleusPaths instance which manages Nucleus root paths
* @param start Whether or not to start the service right away
* @return true if setup is successful, false otherwise
*/
boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start);
boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start);

/**
* Simply run a command with privileges.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.logging.api.Logger;
import com.aws.greengrass.logging.impl.LogManager;
import com.aws.greengrass.util.NucleusPaths;

import java.io.BufferedReader;
import java.io.BufferedWriter;
Expand All @@ -22,13 +23,14 @@ public class SystemdUtils implements SystemServiceUtils {
protected static final Logger logger = LogManager.getLogger(SystemdUtils.class);
private static final String PID_FILE_PARAM = "REPLACE_WITH_GG_LOADER_PID_FILE";
private static final String LOADER_FILE_PARAM = "REPLACE_WITH_GG_LOADER_FILE";
private static final String NUCLEUS_LOG_FILE_PARAM = "REPLACE_WITH_NUCLEUS_LOG_FILE";
private static final String SERVICE_CONFIG_FILE_PATH = "/etc/systemd/system/greengrass.service";
private static final String LOG_EVENT_NAME = "systemd-setup";
private static final String SYSTEMD_SERVICE_FILE = "greengrass.service";
private static final String SYSTEMD_SERVICE_TEMPLATE = "greengrass.service.template";

@Override
public boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start) {
public boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start) {
logger.atDebug(LOG_EVENT_NAME).log("Start systemd setup");
try {
kernelAlternatives.setupInitLaunchDirIfAbsent();
Expand Down Expand Up @@ -72,7 +74,8 @@ private void interpolateServiceTemplate(Path src, Path dst, KernelAlternatives k
String line = r.readLine();
while (line != null) {
w.write(line.replace(PID_FILE_PARAM, kernelAlternatives.getLoaderPidPath().toString())
.replace(LOADER_FILE_PARAM, kernelAlternatives.getLoaderPath().toString()));
.replace(LOADER_FILE_PARAM, kernelAlternatives.getLoaderPath().toString())
.replace(NUCLEUS_LOG_FILE_PARAM, kernelAlternatives.getNucleusLogPath().toString()));
w.newLine();
line = r.readLine();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public WinswUtils(NucleusPaths nucleusPaths) {
}

@Override
public boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start) {
public boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start) {
logger.atDebug(LOG_EVENT_NAME).log("Start Windows service setup");
try {
kernelAlternatives.setupInitLaunchDirIfAbsent();
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/com/aws/greengrass/util/platforms/Platform.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ public static Platform getInstance() {
}
}

/**
* Get the appropriate loader logs file name for the current platform.
*
* @return String object containing the loader log file name
*/
public static String getPlatformLoaderLogsFileName() {
if (PlatformResolver.isWindows) {
return WindowsPlatform.LOADER_LOGS_FILE_NAME;
} else {
return UnixPlatform.LOADER_LOGS_FILE_NAME;
}
}

public abstract Set<Integer> killProcessAndChildren(Process process, boolean force, Set<Integer> additionalPids,
UserDecorator decorator)
throws IOException, InterruptedException;
Expand Down
Loading

0 comments on commit 499419f

Please sign in to comment.