Skip to content

Commit

Permalink
optimization: 灰度回滚逻辑优化 (closed #1893)
Browse files Browse the repository at this point in the history
  • Loading branch information
wyyalt authored and ZhuoZhuoCrayon committed Nov 10, 2023
1 parent bf82c22 commit 5439253
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 27 deletions.
125 changes: 104 additions & 21 deletions apps/core/gray/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,34 @@ def update_host_ap_by_host_ids(

# 切换接入点
update_kwargs: typing.Dict[str, typing.Any] = {"updated_at": timezone.now()}
partial_host_ids: typing.List[int] = [host_node["bk_host_id"] for host_node in partial_host_nodes]
gse_v1_ap_id_is_none_host_count: int = 0
# 若需要更新gse_v1_ap_id使用F方式,顺序不可以变
if rollback:
# 先更新gse_v1_ap_id 为None的主机更新成映射对应的v1_ap_id(需要保证映射为一对一关系)
gse_v1_ap_id_is_none_host_ids: typing.List[int] = list(
node_man_models.Host.objects.filter(
bk_biz_id__in=bk_biz_ids,
bk_host_id__in=partial_host_ids,
gse_v1_ap_id=None,
).values_list("bk_host_id", flat=True)
)

gse_v1_ap_id_is_none_host_count: int = node_man_models.Host.objects.filter(
bk_host_id__in=gse_v1_ap_id_is_none_host_ids
).update(ap_id=v1_ap_id)

logger.info(
f"[update_host_ap_by_host_ids][rollback={rollback}] "
f"gse_v1_ap_id_is_none_host_ids -> {gse_v1_ap_id_is_none_host_ids}, "
f"Replacement AP ID -> {v1_ap_id}"
)

need_update_host_ids: typing.List[int] = list(
set(partial_host_ids) - set(gse_v1_ap_id_is_none_host_ids)
)

# 更新gse_v1_ap_id不为None的主机
update_kwargs.update(
ap_id=F("gse_v1_ap_id"),
gse_v1_ap_id=None,
Expand All @@ -173,13 +199,48 @@ def update_host_ap_by_host_ids(
gse_v1_ap_id=F("ap_id"),
ap_id=v2_ap_id,
)
need_update_host_ids: typing.List[int] = partial_host_ids

update_count: int = node_man_models.Host.objects.filter(
bk_biz_id__in=bk_biz_ids, bk_host_id__in=[host_node["bk_host_id"] for host_node in partial_host_nodes]
bk_biz_id__in=bk_biz_ids, bk_host_id__in=need_update_host_ids
).update(**update_kwargs)

update_count: int = update_count + gse_v1_ap_id_is_none_host_count
if all(
[
update_count,
not is_biz_gray,
rollback,
]
):
# 如果按业务回滚在上层已进行了回滚,此处不做处理
# 将与回滚主机关联的业务和管控区域回滚
rollback_host_infos: typing.List[typing.Dict[str, int]] = list(
node_man_models.Host.objects.filter(bk_host_id__in=partial_host_ids)
.values("bk_biz_id", "bk_cloud_id")
.distinct()
.order_by("bk_biz_id")
)

bk_biz_ids: typing.Set[int] = set()
bk_cloud_ids: typing.Set[int] = set()
for host_info in rollback_host_infos:
bk_biz_ids.add(host_info["bk_biz_id"])
bk_cloud_ids.add(host_info["bk_cloud_id"])

logger.info(
f"[update_host_ap_by_host_ids][rollback={rollback}] "
f"bk_biz_ids -> {bk_biz_ids}, bk_cloud_ids -> {bk_cloud_ids}"
)

cls.update_cloud_ap_id(
validated_data={"bk_biz_ids": bk_biz_ids}, cloud_ids=list(bk_cloud_ids), rollback=True
)
cls.update_gray_scope_list(validated_data={"bk_biz_ids": bk_biz_ids}, rollback=True)

logger.info(
f"[update_host_ap_by_host_ids][rollback={rollback}] Update count -> {update_count}, "
f"[update_host_ap_by_host_ids][rollback={rollback}] "
f"Update count -> {update_count}, "
f"expect count -> {len(partial_host_nodes)}"
)

Expand Down Expand Up @@ -222,39 +283,45 @@ def update_gray_scope_list(cls, validated_data: typing.Dict[str, typing.List[typ
logger.info("[update_gray_scope_list][rollback={rollback}] flush cache")

@classmethod
def update_cloud_ap_id(cls, validated_data: typing.Dict[str, typing.List[typing.Any]], rollback: bool = False):
def update_cloud_ap_id(
cls,
validated_data: typing.Dict[str, typing.List[typing.Any]],
cloud_ids: typing.Optional[typing.List[int]] = None,
rollback: bool = False,
):
gray_ap_map: typing.Dict[int, int] = cls.get_gray_ap_map()
gray_scope_list: typing.List[int] = GrayTools.get_or_create_gse2_gray_scope_list(get_cache=False)

clouds = (
node_man_models.Host.objects.filter(bk_biz_id__in=validated_data["bk_biz_ids"])
.values("bk_cloud_id")
.distinct()
.order_by("bk_cloud_id")
cloud_ids: typing.List[int] = cloud_ids or list(
set(
node_man_models.Host.objects.filter(bk_biz_id__in=validated_data["bk_biz_ids"]).values_list(
"bk_cloud_id", flat=True
)
)
)

ap_id_obj_map: typing.Dict[int, node_man_models.AccessPoint] = node_man_models.AccessPoint.ap_id_obj_map()

for cloud in clouds:
for cloud_id in cloud_ids:
cloud_obj: typing.Optional[node_man_models.Cloud] = node_man_models.Cloud.objects.filter(
bk_cloud_id=cloud["bk_cloud_id"]
bk_cloud_id=cloud_id
).first()

# 跳过管控区域不存在的情况
if not cloud_obj:
continue

cloud_bizs = (
node_man_models.Host.objects.filter(bk_cloud_id=cloud["bk_cloud_id"])
.values("bk_biz_id")
.distinct()
.order_by("bk_biz_id")
cloud_bk_biz_ids: typing.List[int] = list(
set(node_man_models.Host.objects.filter(bk_cloud_id=cloud_id).values_list("bk_biz_id", flat=True))
)
cloud_bk_biz_ids: typing.List[int] = [cloud_biz["bk_biz_id"] for cloud_biz in cloud_bizs]

if ap_id_obj_map[cloud_obj.ap_id].gse_version == GseVersion.V2.value and rollback:
# 当管控区域覆盖的业务(cloud_bk_biz_ids)完全包含于灰度业务集(gray_scope_list)时,需要操作回滚
if not set(cloud_bk_biz_ids) - set(gray_scope_list):
logger.info(
f"update_cloud_ap_id[rollback]: bk_cloud_id -> {cloud_obj.bk_cloud_id}",
f"cloud_ap_id -> {cloud_obj.ap_id}, gse_v1_ap_id -> {cloud_obj.gse_v1_ap_id}",
)
cloud_obj.ap_id = cloud_obj.gse_v1_ap_id
cloud_obj.gse_v1_ap_id = None
cloud_obj.save()
Expand Down Expand Up @@ -314,7 +381,7 @@ def rollback(cls, validated_data: typing.Dict[str, typing.List[typing.Any]]):
# 更新管控区域接入点
cls.update_cloud_ap_id(validated_data, rollback=True)

# 更新灰度业务范围
# 更新灰度业务范围, 无论是按业务还是ip回滚都去掉业务灰度标记
cls.update_gray_scope_list(validated_data, rollback=True)

# 更新主机ap
Expand All @@ -330,9 +397,10 @@ def list_biz_ids(cls) -> typing.Set[int]:
return set(GrayTools.get_or_create_gse2_gray_scope_list(get_cache=False))

@classmethod
def upgrade_or_rollback_agent_id(
def generate_upgrade_to_agent_id_request_params(
cls, validated_data: typing.Dict[str, typing.List[typing.Any]], rollback: bool = False
) -> typing.Dict[str, typing.List[typing.List[str]]]:
) -> typing.Dict[str, typing.Any]:

is_biz_gray: bool = cls.is_biz_gray(validated_data)
if is_biz_gray:
host_query_params: typing.Dict[str, typing.List[int]] = {"bk_biz_id__in": validated_data["bk_biz_ids"]}
Expand Down Expand Up @@ -363,9 +431,24 @@ def upgrade_or_rollback_agent_id(
}
)

return {
"request_hosts": request_hosts,
"no_bk_agent_id_hosts": no_bk_agent_id_hosts,
}

@classmethod
def upgrade_or_rollback_agent_id(
cls, validated_data: typing.Dict[str, typing.List[typing.Any]], rollback: bool = False
) -> typing.Dict[str, typing.List[typing.List[str]]]:
request_params: typing.Dict[str, typing.Any] = cls.generate_upgrade_to_agent_id_request_params(
validated_data=validated_data, rollback=rollback
)

# 请求GSE接口更新AgentID配置
result: typing.Dict[str, typing.List[typing.Any]] = {"no_bk_agent_id_hosts": no_bk_agent_id_hosts}
result.update(**get_gse_api_helper(GseVersion.V2.value).upgrade_to_agent_id(request_hosts))
result: typing.Dict[str, typing.List[typing.Any]] = {
"no_bk_agent_id_hosts": request_params["no_bk_agent_id_hosts"]
}
result.update(**get_gse_api_helper(GseVersion.V2.value).upgrade_to_agent_id(request_params["request_hosts"]))
return result

@classmethod
Expand Down
135 changes: 132 additions & 3 deletions apps/core/gray/tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,141 @@
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from typing import Any, Dict, List, Type

import mock
from django.test import override_settings

from apps.mock_data import utils as mock_data_utils
from apps.node_man import models
from apps.node_man.tests.utils import CcApi, NodeApi
from apps.utils.unittest import testcase

# from .. import constants, handlers
# from . import utils
from .. import handlers
from .utils import GrayTestObjFactory


class GrayHandlerTestCase(testcase.CustomBaseTestCase):
pass

OBJ_FACTORY_CLASS: Type[GrayTestObjFactory] = GrayTestObjFactory
TEST_IP: str = "0.0.0.1"
TEST_CLOUD_IP: str = f"0:{TEST_IP}"

def setUp(self) -> None:
mock.patch("apps.node_man.handlers.job.NodeApi", NodeApi).start()
mock.patch("apps.core.ipchooser.query.resource.CCApi", CcApi).start()

@classmethod
def setUpClass(cls):
cls.obj_factory = cls.OBJ_FACTORY_CLASS()
super().setUpClass()

@classmethod
def setUpTestData(cls):
cls.obj_factory.init_db()

@override_settings(BK_BACKEND_CONFIG=True)
def test_biz_gray_build(self):
validated_data: Dict[str, List[Any]] = {"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID]}
handlers.GrayHandler.build(validated_data=validated_data)
# 断言主机已灰度为2.0接入点
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).ap_id == self.OBJ_FACTORY_CLASS.GSE_V2_AP_ID)
self.assertTrue(
models.Host.objects.get(inner_ip=self.TEST_IP).gse_v1_ap_id == self.OBJ_FACTORY_CLASS.GSE_V1_AP_ID
)
# 断言业务已写入灰度列表
self.assertTrue(mock_data_utils.DEFAULT_BK_BIZ_ID in handlers.GrayTools.get_or_create_gse2_gray_scope_list())

@override_settings(BK_BACKEND_CONFIG=True)
def test_cloud_ips_gray_build(self):
validated_data: Dict[str, List[Any]] = {
"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID],
"cloud_ips": [self.TEST_CLOUD_IP],
}
handlers.GrayHandler.build(validated_data=validated_data)
# 断言主机已灰度为2.0接入点
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).ap_id == self.OBJ_FACTORY_CLASS.GSE_V2_AP_ID)
self.assertTrue(
models.Host.objects.get(inner_ip=self.TEST_IP).gse_v1_ap_id == self.OBJ_FACTORY_CLASS.GSE_V1_AP_ID
)
# 断言业务未写入灰度列表
self.assertTrue(
mock_data_utils.DEFAULT_BK_BIZ_ID not in handlers.GrayTools.get_or_create_gse2_gray_scope_list()
)

@override_settings(BK_BACKEND_CONFIG=True)
def test_biz_gray_rollback(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID]}
handlers.GrayHandler.rollback(validated_data=validated_data)
# 断言主机已灰度为1.0接入点
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).ap_id == self.OBJ_FACTORY_CLASS.GSE_V1_AP_ID)
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).gse_v1_ap_id is None)
# 断言业务未写入灰度列表
self.assertTrue(
mock_data_utils.DEFAULT_BK_BIZ_ID not in handlers.GrayTools.get_or_create_gse2_gray_scope_list()
)

@override_settings(BK_BACKEND_CONFIG=True)
def test_cloud_ips_rollback(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {
"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID],
"cloud_ips": [self.TEST_CLOUD_IP],
}
handlers.GrayHandler.rollback(validated_data=validated_data)
# 断言主机已灰度为1.0接入点
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).ap_id == self.OBJ_FACTORY_CLASS.GSE_V1_AP_ID)
self.assertTrue(models.Host.objects.get(inner_ip=self.TEST_IP).gse_v1_ap_id is None)
# 断言业务未写入灰度列表
self.assertTrue(
mock_data_utils.DEFAULT_BK_BIZ_ID not in handlers.GrayTools.get_or_create_gse2_gray_scope_list()
)

def upgrade_to_agent_id_checker(self, validated_data: Dict[str, List[Any]]):
result: Dict[str, List[List[str]]] = handlers.GrayHandler.generate_upgrade_to_agent_id_request_params(
validated_data=validated_data
)
# 断言no_bk_agent_id_hosts列表
self.assertTrue(result["no_bk_agent_id_hosts"][0] == self.TEST_CLOUD_IP)

models.Host.objects.update(bk_agent_id="test_agent_id")
# 断言success列表
result: Dict[str, List[List[str]]] = handlers.GrayHandler.generate_upgrade_to_agent_id_request_params(
validated_data=validated_data
)
self.assertTrue(result["request_hosts"][0]["bk_agent_id"] == "test_agent_id")

def rollback_agent_id_checker(self, validated_data: Dict[str, List[Any]]):
result: Dict[str, List[List[str]]] = handlers.GrayHandler.generate_upgrade_to_agent_id_request_params(
validated_data=validated_data, rollback=True
)
self.assertTrue(result["request_hosts"][0]["bk_agent_id"] == self.TEST_CLOUD_IP)

def test_biz_upgrade_to_agent_id(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID]}
self.upgrade_to_agent_id_checker(validated_data=validated_data)

def test_cloud_ip_upgrade_to_agent_id(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {
"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID],
"cloud_ips": [self.TEST_CLOUD_IP],
}
self.upgrade_to_agent_id_checker(validated_data=validated_data)

def test_biz_rollback_agent_id(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {
"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID],
}
self.rollback_agent_id_checker(validated_data=validated_data)

def test_cloud_ip_rollback_agent_id(self):
self.OBJ_FACTORY_CLASS.structure_biz_gray_data()
validated_data: Dict[str, List[Any]] = {
"bk_biz_ids": [mock_data_utils.DEFAULT_BK_BIZ_ID],
"cloud_ips": [self.TEST_CLOUD_IP],
}
self.rollback_agent_id_checker(validated_data=validated_data)
39 changes: 38 additions & 1 deletion apps/core/gray/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,41 @@
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
# import random
from typing import List

from apps.backend.tests.components.collections.agent_new.utils import (
AgentTestObjFactory,
)
from apps.mock_data import utils
from apps.node_man import models


class GrayTestObjFactory(AgentTestObjFactory):

GSE_V1_AP_ID: int = -1
GSE_V2_AP_ID: int = 2

def structure_gse2_gray_ap_map(self) -> None:
global_settings_key = models.GlobalSettings.KeyEnum.GSE2_GRAY_AP_MAP.value

models.GlobalSettings.set_config(key=global_settings_key, value={str(self.GSE_V1_AP_ID): self.GSE_V2_AP_ID})

def structure_process_status(self):
bk_host_ids: List[int] = list(models.Host.objects.values_list("bk_host_id", flat=True))
process_status_objs: List[models.ProcessStatus] = []
for bk_host_id in bk_host_ids:
process_status_objs.append(models.ProcessStatus(bk_host_id=bk_host_id, status="RUNNING", name="gseagent"))

models.ProcessStatus.objects.bulk_create(objs=process_status_objs)

def init_db(self):
super().init_db()
self.structure_gse2_gray_ap_map()
self.structure_process_status()

@classmethod
def structure_biz_gray_data(cls):
models.Host.objects.update(ap_id=cls.GSE_V2_AP_ID, gse_v1_ap_id=cls.GSE_V1_AP_ID)
models.GlobalSettings.set_config(
key=models.GlobalSettings.KeyEnum.GSE2_GRAY_SCOPE_LIST.value, value=[utils.DEFAULT_BK_BIZ_ID]
)
Loading

0 comments on commit 5439253

Please sign in to comment.