Skip to content

Commit

Permalink
Merge pull request #1 from triton-inference-server/main
Browse files Browse the repository at this point in the history
Merge main from tif
  • Loading branch information
CGranger-sorenson authored May 17, 2024
2 parents 2f848ee + 0089bb7 commit 2585eb9
Show file tree
Hide file tree
Showing 27 changed files with 405 additions and 144 deletions.
12 changes: 11 additions & 1 deletion include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 30
#define TRITONSERVER_API_VERSION_MINOR 31

/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
Expand Down Expand Up @@ -1828,6 +1828,16 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_ServerOptionsSetStrictModelConfig(
struct TRITONSERVER_ServerOptions* options, bool strict);

/// Set the custom model configuration name to load for all models.
/// Fall back to default config file if empty.
///
/// \param options The server options object.
/// \param config_name The name of the config file to load for all models.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_ServerOptionsSetModelConfigName(
struct TRITONSERVER_ServerOptions* options, const char* model_config_name);

/// Set the rate limit mode in a server options.
///
/// TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
Expand Down
2 changes: 1 addition & 1 deletion python/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def test_ready(self):
self.assertTrue(server.ready())

@pytest.mark.xfail(
tritonserver.__version__ <= "2.43.0",
tritonserver.__version__ <= "2.46.0",
reason="Known issue on stop: Exit timeout expired. Exiting immediately",
raises=tritonserver.InternalError,
)
Expand Down
6 changes: 3 additions & 3 deletions python/tritonserver/_api/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class Options:
List of models to load at startup. Only relevant with ModelControlMode.EXPLICIT.
See :c:func:`TRITONSERVER_ServerOptionsSetStartupModel`
strict_model_config : bool, default True
strict_model_config : bool, default False
Enable or disable strict model configuration.
See :c:func:`TRITONSERVER_ServerOptionsSetStrictModelConfig`
Expand Down Expand Up @@ -275,7 +275,7 @@ class Options:
server_id: str = "triton"
model_control_mode: ModelControlMode = ModelControlMode.NONE
startup_models: list[str] = field(default_factory=list[str])
strict_model_config: bool = True
strict_model_config: bool = False

rate_limiter_mode: RateLimitMode = RateLimitMode.OFF
rate_limiter_resources: list[RateLimiterResource] = field(
Expand Down Expand Up @@ -507,7 +507,7 @@ def __init__(
Options(server_id='triton', model_repository='/workspace/models',
model_control_mode=<TRITONSERVER_ModelControlMode.NONE: 0>,
startup_models=[], strict_model_config=True,
startup_models=[], strict_model_config=False,
rate_limiter_mode=<TRITONSERVER_RateLimitMode.OFF: 0>,
rate_limiter_resources=[], pinned_memory_pool_size=268435456,
cuda_memory_pool_sizes={}, cache_config={},
Expand Down
17 changes: 10 additions & 7 deletions src/backend_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ TritonModel::Create(
InferenceServer* server, const std::string& model_path,
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
const int64_t version, inference::ModelConfig model_config,
const bool is_config_provided, std::unique_ptr<TritonModel>* model)
const ModelIdentifier& model_id, const int64_t version,
inference::ModelConfig model_config, const bool is_config_provided,
std::unique_ptr<TritonModel>* model)
{
model->reset();

Expand Down Expand Up @@ -143,8 +144,8 @@ TritonModel::Create(

// Create and initialize the model.
std::unique_ptr<TritonModel> local_model(new TritonModel(
server, localized_model_dir, backend, min_compute_capability, version,
model_config, auto_complete_config, backend_cmdline_config_map,
server, localized_model_dir, backend, min_compute_capability, model_id,
version, model_config, auto_complete_config, backend_cmdline_config_map,
host_policy_map));

TritonModel* raw_local_model = local_model.get();
Expand Down Expand Up @@ -929,12 +930,14 @@ TritonModel::TritonModel(
InferenceServer* server,
const std::shared_ptr<LocalizedPath>& localized_model_dir,
const std::shared_ptr<TritonBackend>& backend,
const double min_compute_capability, const int64_t version,
const inference::ModelConfig& config, const bool auto_complete_config,
const double min_compute_capability, const ModelIdentifier& model_id,
const int64_t version, const inference::ModelConfig& config,
const bool auto_complete_config,
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map)
: Model(
min_compute_capability, localized_model_dir->Path(), version, config),
min_compute_capability, localized_model_dir->Path(), model_id,
version, config),
server_(server), min_compute_capability_(min_compute_capability),
auto_complete_config_(auto_complete_config),
backend_cmdline_config_map_(backend_cmdline_config_map),
Expand Down
10 changes: 6 additions & 4 deletions src/backend_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ class TritonModel : public Model {
InferenceServer* server, const std::string& model_path,
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
const int64_t version, inference::ModelConfig model_config,
const bool is_config_provided, std::unique_ptr<TritonModel>* model);
const ModelIdentifier& model_id, const int64_t version,
inference::ModelConfig model_config, const bool is_config_provided,
std::unique_ptr<TritonModel>* model);
~TritonModel();

// Return path to the localized model directory.
Expand Down Expand Up @@ -118,8 +119,9 @@ class TritonModel : public Model {
InferenceServer* server,
const std::shared_ptr<LocalizedPath>& localized_model_dir,
const std::shared_ptr<TritonBackend>& backend,
const double min_compute_capability, const int64_t version,
const inference::ModelConfig& config, const bool auto_complete_config,
const double min_compute_capability, const ModelIdentifier& model_id,
const int64_t version, const inference::ModelConfig& config,
const bool auto_complete_config,
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map);

Expand Down
2 changes: 1 addition & 1 deletion src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ TritonModelInstance::TritonModelInstance(
model_->ResponseCacheEnabled() &&
model_->Server()->ResponseCacheEnabled();
MetricModelReporter::Create(
model_->Name(), model_->Version(), id, response_cache_enabled,
model_->ModelId(), model_->Version(), id, response_cache_enabled,
model_->Config().metric_tags(), &reporter_);
}
#endif // TRITON_ENABLE_METRICS
Expand Down
5 changes: 4 additions & 1 deletion src/constants.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -71,7 +71,10 @@ constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
"auto_mixed_precision";

constexpr char kModelConfigPbTxt[] = "config.pbtxt";
constexpr char kPbTxtExtension[] = ".pbtxt";
constexpr char kModelConfigFolder[] = "configs";

constexpr char kMetricsLabelModelNamespace[] = "namespace";
constexpr char kMetricsLabelModelName[] = "model";
constexpr char kMetricsLabelModelVersion[] = "version";
constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
Expand Down
35 changes: 2 additions & 33 deletions src/dynamic_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,6 @@

namespace triton { namespace core {

uint64_t
CaptureTimeNs()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
}

bool
IsStaleState(Payload::State payload_state)
{
Expand Down Expand Up @@ -753,32 +745,9 @@ DynamicBatchScheduler::CacheLookUp(
std::unique_ptr<InferenceRequest>& request,
std::unique_ptr<InferenceResponse>& cached_response)
{
Status status;
auto cache = model_->Server()->CacheManager()->Cache();
std::unique_ptr<InferenceResponse> local_response;
request->ResponseFactory()->CreateResponse(&local_response);
// Hash request into cache key
std::string key = "";
if (!request->CacheKeyIsSet()) {
status = cache->Hash(*request, &key);
if (!status.IsOk()) {
LOG_ERROR << "Failed to hash request: " << status.Message();
return;
}
request->SetCacheKey(key);
} else {
key = request->CacheKey();
}

// Lookup and capture timestamps
{
request->CaptureCacheLookupStartNs();
status = cache->Lookup(local_response.get(), key);
request->CaptureCacheLookupEndNs();
}

if (status.IsOk() && (local_response != nullptr)) {
cached_response = std::move(local_response);
bool is_lookup_success = CacheLookUpUtil(request, cached_response, cache);
if (is_lookup_success) {
#ifdef TRITON_ENABLE_STATS
// Update model metrics/stats on cache hits
// Backends will update metrics as normal on cache misses
Expand Down
12 changes: 7 additions & 5 deletions src/ensemble_scheduler/ensemble_model.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -37,19 +37,21 @@ namespace triton { namespace core {

Status
EnsembleModel::Create(
InferenceServer* server, const std::string& path, const int64_t version,
InferenceServer* server, const std::string& path,
const ModelIdentifier& model_id, const int64_t version,
const inference::ModelConfig& model_config, const bool is_config_provided,
const double min_compute_capability, std::unique_ptr<Model>* model)
{
// Create the ensemble model.
std::unique_ptr<EnsembleModel> local_model(
new EnsembleModel(min_compute_capability, path, version, model_config));
std::unique_ptr<EnsembleModel> local_model(new EnsembleModel(
min_compute_capability, path, model_id, version, model_config));

RETURN_IF_ERROR(local_model->Init(is_config_provided));

std::unique_ptr<Scheduler> scheduler;
RETURN_IF_ERROR(EnsembleScheduler::Create(
local_model->MutableStatsAggregator(), server, model_config, &scheduler));
local_model->MutableStatsAggregator(), server, local_model->ModelId(),
model_config, &scheduler));
RETURN_IF_ERROR(local_model->SetScheduler(std::move(scheduler)));

LOG_VERBOSE(1) << "ensemble model for " << local_model->Name() << std::endl;
Expand Down
10 changes: 6 additions & 4 deletions src/ensemble_scheduler/ensemble_model.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -39,7 +39,8 @@ class EnsembleModel : public Model {
EnsembleModel(EnsembleModel&&) = default;

static Status Create(
InferenceServer* server, const std::string& path, const int64_t version,
InferenceServer* server, const std::string& path,
const ModelIdentifier& model_id, const int64_t version,
const inference::ModelConfig& model_config, const bool is_config_provided,
const double min_compute_capability, std::unique_ptr<Model>* model);

Expand All @@ -48,8 +49,9 @@ class EnsembleModel : public Model {

explicit EnsembleModel(
const double min_compute_capability, const std::string& model_dir,
const int64_t version, const inference::ModelConfig& config)
: Model(min_compute_capability, model_dir, version, config)
const ModelIdentifier& model_id, const int64_t version,
const inference::ModelConfig& config)
: Model(min_compute_capability, model_dir, model_id, version, config)
{
}
friend std::ostream& operator<<(std::ostream&, const EnsembleModel&);
Expand Down
Loading

0 comments on commit 2585eb9

Please sign in to comment.