From 2c670d5ae615648d5dc7539cf27c00461f6b78dc Mon Sep 17 00:00:00 2001 From: kqhzz Date: Mon, 22 Jul 2024 10:08:10 +0800 Subject: [PATCH] [#1924] feat(dashboard): Show Thread Dump, Conf and Metrics in DashBoard (#1927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Add jetty_port in message ShuffleServerId. Modify dashboard, add some link in dashboard 企业微信截图_ede78628-56ac-4b7e-86ab-84b224da7ce4 企业微信截图_88b2c855-68e4-4054-94a3-d730f074a4b9 ### Why are the changes needed? Enhance dashboard capabilities Fix: #1924 ### Does this PR introduce _any_ user-facing change? No. --- .../uniffle/common/util/ThreadUtils.java | 18 ++++ .../uniffle/common/web/JettyServer.java | 4 + .../coordinator/CoordinatorGrpcService.java | 3 +- .../uniffle/coordinator/ServerNode.java | 40 +++++++++ .../resource/CoordinatorServerResource.java | 21 +++++ .../dashboard/web/proxy/WebProxyServlet.java | 14 ++-- dashboard/src/main/webapp/src/api/api.js | 78 ++++++++++++++++++ .../src/pages/CoordinatorServerPage.vue | 35 +++++++- .../src/pages/serverstatus/NodeListPage.vue | 54 +++++++++++- dashboard/src/main/webapp/src/utils/http.js | 7 +- .../impl/grpc/CoordinatorGrpcClient.java | 7 +- .../request/RssSendHeartBeatRequest.java | 9 +- proto/src/main/proto/Rss.proto | 1 + .../uniffle/server/RegisterHeartBeat.java | 9 +- .../apache/uniffle/server/ShuffleServer.java | 11 ++- .../server/web/resource/ServerResource.java | 82 +++++++++++++++++++ .../uniffle/server/web/vo/ServerConfVO.java | 44 ++++++++++ 17 files changed, 415 insertions(+), 22 deletions(-) create mode 100644 server/src/main/java/org/apache/uniffle/server/web/resource/ServerResource.java create mode 100644 server/src/main/java/org/apache/uniffle/server/web/vo/ServerConfVO.java diff --git a/common/src/main/java/org/apache/uniffle/common/util/ThreadUtils.java b/common/src/main/java/org/apache/uniffle/common/util/ThreadUtils.java index eb20039953..d84e6dc232 100644 --- a/common/src/main/java/org/apache/uniffle/common/util/ThreadUtils.java +++ b/common/src/main/java/org/apache/uniffle/common/util/ThreadUtils.java @@ -17,6 +17,9 @@ package org.apache.uniffle.common.util; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -39,6 +42,7 @@ public class ThreadUtils { private static final Logger LOGGER = LoggerFactory.getLogger(ThreadUtils.class); + private static final ThreadMXBean THREAD_BEAN = ManagementFactory.getThreadMXBean(); /** Provide a general method to create a thread factory to make the code more standardized */ public static ThreadFactory getThreadFactory(String factoryName) { @@ -183,4 +187,18 @@ public static List executeTasks( String taskMsg) { return executeTasks(executorService, items, task, timeoutMs, taskMsg, future -> null); } + + public static synchronized void printThreadInfo(StringBuilder builder, String title) { + builder.append("Process Thread Dump: " + title + "\n"); + builder.append(THREAD_BEAN.getThreadCount() + " active threads\n"); + long[] threadIds = THREAD_BEAN.getAllThreadIds(); + for (long id : threadIds) { + ThreadInfo info = THREAD_BEAN.getThreadInfo(id, Integer.MAX_VALUE); + if (info == null) { + // The thread is no longer active, ignore + continue; + } + builder.append(info + "\n"); + } + } } diff --git a/common/src/main/java/org/apache/uniffle/common/web/JettyServer.java b/common/src/main/java/org/apache/uniffle/common/web/JettyServer.java index 87e52cbc67..0ace09414e 100644 --- a/common/src/main/java/org/apache/uniffle/common/web/JettyServer.java +++ b/common/src/main/java/org/apache/uniffle/common/web/JettyServer.java @@ -175,4 +175,8 @@ public void start() throws Exception { public void stop() throws Exception { server.stop(); } + + public int getHttpPort() { + return httpPort; + } } diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java index ddca8a1e33..b3df63db58 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java @@ -433,6 +433,7 @@ private ServerNode toServerNode(ShuffleServerHeartBeatRequest request) { Sets.newHashSet(request.getTagsList()), serverStatus, StorageInfoUtils.fromProto(request.getStorageInfoMap()), - request.getServerId().getNettyPort()); + request.getServerId().getNettyPort(), + request.getServerId().getJettyPort()); } } diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/ServerNode.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/ServerNode.java index ec33d262a7..a9723e00bf 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/ServerNode.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/ServerNode.java @@ -42,6 +42,7 @@ public class ServerNode implements Comparable { private ServerStatus status; private Map storageInfo; private int nettyPort = -1; + private int jettyPort = -1; public ServerNode(String id) { this(id, "", 0, 0, 0, 0, 0, Sets.newHashSet(), ServerStatus.EXCLUDED); @@ -115,6 +116,7 @@ public ServerNode( tags, status, storageInfoMap, + -1, -1); } @@ -130,6 +132,34 @@ public ServerNode( ServerStatus status, Map storageInfoMap, int nettyPort) { + this( + id, + ip, + grpcPort, + usedMemory, + preAllocatedMemory, + availableMemory, + eventNumInFlush, + tags, + status, + storageInfoMap, + nettyPort, + -1); + } + + public ServerNode( + String id, + String ip, + int grpcPort, + long usedMemory, + long preAllocatedMemory, + long availableMemory, + int eventNumInFlush, + Set tags, + ServerStatus status, + Map storageInfoMap, + int nettyPort, + int jettyPort) { this.id = id; this.ip = ip; this.grpcPort = grpcPort; @@ -145,6 +175,9 @@ public ServerNode( if (nettyPort > 0) { this.nettyPort = nettyPort; } + if (jettyPort > 0) { + this.jettyPort = jettyPort; + } } public ShuffleServerId convertToGrpcProto() { @@ -153,6 +186,7 @@ public ShuffleServerId convertToGrpcProto() { .setIp(ip) .setPort(grpcPort) .setNettyPort(nettyPort) + .setJettyPort(jettyPort) .build(); } @@ -214,6 +248,8 @@ public String toString() { + grpcPort + "], netty port[" + nettyPort + + "], jettyPort[" + + jettyPort + "], usedMemory[" + usedMemory + "], preAllocatedMemory[" @@ -277,4 +313,8 @@ public long getTotalMemory() { public int getNettyPort() { return nettyPort; } + + public int getJettyPort() { + return jettyPort; + } } diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/web/resource/CoordinatorServerResource.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/web/resource/CoordinatorServerResource.java index e09d3dbdcd..788c224617 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/web/resource/CoordinatorServerResource.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/web/resource/CoordinatorServerResource.java @@ -31,7 +31,10 @@ import org.apache.hbase.thirdparty.javax.ws.rs.core.MediaType; import org.apache.uniffle.common.util.RssUtils; +import org.apache.uniffle.common.util.ThreadUtils; import org.apache.uniffle.common.web.resource.BaseResource; +import org.apache.uniffle.common.web.resource.MetricResource; +import org.apache.uniffle.common.web.resource.PrometheusMetricResource; import org.apache.uniffle.common.web.resource.Response; import org.apache.uniffle.coordinator.CoordinatorConf; import org.apache.uniffle.coordinator.CoordinatorServer; @@ -85,4 +88,22 @@ private CoordinatorServer getCoordinatorServer() { return (CoordinatorServer) servletContext.getAttribute(CoordinatorServer.class.getCanonicalName()); } + + @Path("/metrics") + public Class getMetricResource() { + return MetricResource.class; + } + + @Path("/prometheus/metrics") + public Class getPrometheusMetricResource() { + return PrometheusMetricResource.class; + } + + @GET + @Path("/stacks") + public String getCoordinatorStacks() { + StringBuilder builder = new StringBuilder(); + ThreadUtils.printThreadInfo(builder, ""); + return builder.toString(); + } } diff --git a/dashboard/src/main/java/org/apache/uniffle/dashboard/web/proxy/WebProxyServlet.java b/dashboard/src/main/java/org/apache/uniffle/dashboard/web/proxy/WebProxyServlet.java index 64fab2eaf4..084478030e 100644 --- a/dashboard/src/main/java/org/apache/uniffle/dashboard/web/proxy/WebProxyServlet.java +++ b/dashboard/src/main/java/org/apache/uniffle/dashboard/web/proxy/WebProxyServlet.java @@ -45,11 +45,15 @@ protected String rewriteTarget(HttpServletRequest clientRequest) { if (!validateDestination(clientRequest.getServerName(), clientRequest.getServerPort())) { return null; } - String targetAddress = - coordinatorServerAddressesMap.get(clientRequest.getHeader("targetAddress")); - if (targetAddress == null) { - // Get random one from coordinatorServerAddressesMap - targetAddress = coordinatorServerAddressesMap.values().iterator().next(); + String targetAddress; + if (clientRequest.getHeader("serverType").equals("coordinator")) { + targetAddress = coordinatorServerAddressesMap.get(clientRequest.getHeader("targetAddress")); + if (targetAddress == null) { + // Get random one from coordinatorServerAddressesMap + targetAddress = coordinatorServerAddressesMap.values().iterator().next(); + } + } else { + targetAddress = clientRequest.getHeader("targetAddress"); } StringBuilder target = new StringBuilder(); if (targetAddress.endsWith("/")) { diff --git a/dashboard/src/main/webapp/src/api/api.js b/dashboard/src/main/webapp/src/api/api.js index 08277f0c98..b837658ce6 100644 --- a/dashboard/src/main/webapp/src/api/api.js +++ b/dashboard/src/main/webapp/src/api/api.js @@ -26,6 +26,84 @@ export function getCoordinatorConf(params, headers) { return http.get('/coordinator/conf', params, headers, 0) } +export async function getShuffleServerConf(address, params, headers) { + if (typeof headers === 'undefined') { + headers = {}; + } + headers.targetAddress = address; + const response = await http.get('/shuffleServer/conf', params, headers, 0); + const newWindow = window.open('', '_blank'); + let tableHTML = ` + + + + + + + `; + for (const item of response.data.data) { + tableHTML += ``; + } + tableHTML += '
KeyValue
${item.argumentKey}${item.argumentValue}
'; + newWindow.document.write(tableHTML); +} + +export async function getCoordinatorMetrics(params, headers) { + const response = await http.get('/coordinator/metrics', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + JSON.stringify(response.data, null, 2) + '
'); +} + +export async function getShuffleServerMetrics(address, params, headers) { + if (typeof headers === 'undefined') { + headers = {} + } + headers.targetAddress = address + const response = await http.get('/shuffleServer/metrics', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + JSON.stringify(response.data, null, 2) + '
'); +} + +export async function getCoordinatorPrometheusMetrics(params, headers) { + const response = await http.get('/coordinator/prometheus/metrics/all', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + response.data + '
'); +} + +export async function getShuffleServerPrometheusMetrics(address, params, headers) { + if (typeof headers === 'undefined') { + headers = {} + } + headers.targetAddress = address + const response = await http.get('/shuffleServer/prometheus/metrics/all', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + response.data + '
'); +} + +export async function getCoordinatorStacks(params, headers) { + const response = await http.get('/coordinator/stacks', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + response.data + '
'); +} + +export async function getShuffleServerStacks(address, params, headers) { + if (typeof headers === 'undefined') { + headers = {} + } + headers.targetAddress = address + const response = await http.get('/shuffleServer/stacks', params, headers, 0) + const newWindow = window.open('', '_blank'); + newWindow.document.write('
' + response.data + '
'); +} + // Create an interface for the total number of nodes export function getShufflegetStatusTotal(params, headers) { return http.get('/server/nodes/summary', params, headers, 0) diff --git a/dashboard/src/main/webapp/src/pages/CoordinatorServerPage.vue b/dashboard/src/main/webapp/src/pages/CoordinatorServerPage.vue index a13be3bde7..aac7cf317f 100644 --- a/dashboard/src/main/webapp/src/pages/CoordinatorServerPage.vue +++ b/dashboard/src/main/webapp/src/pages/CoordinatorServerPage.vue @@ -74,19 +74,50 @@ + + + + + + metrics + + + + + + + + prometheus metrics + + + + + + + + stacks + +