From da2b6d144f8cae043e20bc15ec7756278cb53ebe Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Wed, 17 Jul 2024 12:33:07 +0200 Subject: [PATCH 1/7] revise ml processes and add mlm extension --- meta/subtype-schemas.json | 8 ++++---- proposals/load_ml_model.json | 10 +++++----- proposals/ml_fit_class_random_forest.json | 16 +++++----------- proposals/ml_fit_regr_random_forest.json | 18 ++++++------------ proposals/ml_predict.json | 4 ++-- proposals/save_ml_model.json | 8 ++++---- 6 files changed, 26 insertions(+), 38 deletions(-) diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 83ce72ba..a880847e 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -232,11 +232,11 @@ } } }, - "ml-model": { + "mlm-model": { "type": "object", - "subtype": "ml-model", + "subtype": "mlm-model", "title": "Machine Learning Model", - "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." + "description": "A machine learning model, accompanied with STAC metadata that implements the Machine Learning Model STAC mlm-model extension." }, "output-format": { "type": "string", @@ -426,4 +426,4 @@ "description": "Year as integer, can be any number of digits and can be negative." } } -} +} \ No newline at end of file diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 7fa86d89..3e44a515 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -10,7 +10,7 @@ "parameters": [ { "name": "uri", - "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", + "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `mlm-model` extension.", "schema": [ { "title": "URL", @@ -32,15 +32,15 @@ "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.", "schema": { "type": "object", - "subtype": "ml-model" + "subtype": "mlm-model" } }, "links": [ { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", + "href": "https://github.com/crim-ca/mlm-extension", + "title": "Machine Learning Model STAC extension", "type": "text/html", "rel": "about" } ] -} +} \ No newline at end of file diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json index 63da48a1..e4f718ba 100644 --- a/proposals/ml_fit_class_random_forest.json +++ b/proposals/ml_fit_class_random_forest.json @@ -8,8 +8,8 @@ "experimental": true, "parameters": [ { - "name": "predictors", - "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "name": "training_set", + "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.", "schema": [ { "type": "object", @@ -39,15 +39,9 @@ }, { "name": "target", - "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "description": "The column name in the training set that represents the dependent variable for Random Forest classification.", "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] + "type": "string" } }, { @@ -107,4 +101,4 @@ "rel": "about" } ] -} +} \ No newline at end of file diff --git a/proposals/ml_fit_regr_random_forest.json b/proposals/ml_fit_regr_random_forest.json index 39207324..dc774b01 100644 --- a/proposals/ml_fit_regr_random_forest.json +++ b/proposals/ml_fit_regr_random_forest.json @@ -8,8 +8,8 @@ "experimental": true, "parameters": [ { - "name": "predictors", - "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "name": "training_set", + "description": "The training set for the Random Forest regression model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.", "schema": [ { "type": "object", @@ -39,15 +39,9 @@ }, { "name": "target", - "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "description": "The column name in the training set that represents the dependent variable for Random Forest regression.", "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] + "type": "string" } }, { @@ -96,7 +90,7 @@ "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", "schema": { "type": "object", - "subtype": "ml-model" + "subtype": "mlm-model" } }, "links": [ @@ -107,4 +101,4 @@ "rel": "about" } ] -} +} \ No newline at end of file diff --git a/proposals/ml_predict.json b/proposals/ml_predict.json index 87cd2500..8b49054a 100644 --- a/proposals/ml_predict.json +++ b/proposals/ml_predict.json @@ -20,7 +20,7 @@ "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.", "schema": { "type": "object", - "subtype": "ml-model" + "subtype": "mlm-model" } }, { @@ -46,4 +46,4 @@ ] } } -} +} \ No newline at end of file diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json index 5e9ea8b0..c92b79cf 100644 --- a/proposals/save_ml_model.json +++ b/proposals/save_ml_model.json @@ -1,7 +1,7 @@ { "id": "save_ml_model", "summary": "Save a ML model", - "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", + "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [mlm-model extension](https://github.com/crim-ca/mlm-extension).", "categories": [ "machine learning", "import" @@ -13,7 +13,7 @@ "description": "The data to store as a machine learning model.", "schema": { "type": "object", - "subtype": "ml-model" + "subtype": "mlm-model" } }, { @@ -35,8 +35,8 @@ }, "links": [ { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", + "href": "https://github.com/crim-ca/mlm-extension", + "title": "Machine Learning Model STAC extension", "type": "text/html", "rel": "about" } From 927bf300012823e11eb736257bc3e4a275bf0a55 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Wed, 17 Jul 2024 15:32:38 +0200 Subject: [PATCH 2/7] word updates --- proposals/ml_fit_class_random_forest.json | 2 +- proposals/ml_fit_regr_random_forest.json | 2 +- tests/.words | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json index e4f718ba..a8f361f4 100644 --- a/proposals/ml_fit_class_random_forest.json +++ b/proposals/ml_fit_class_random_forest.json @@ -9,7 +9,7 @@ "parameters": [ { "name": "training_set", - "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.", + "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyses to learn patterns and relationships within the data.", "schema": [ { "type": "object", diff --git a/proposals/ml_fit_regr_random_forest.json b/proposals/ml_fit_regr_random_forest.json index dc774b01..1af510b0 100644 --- a/proposals/ml_fit_regr_random_forest.json +++ b/proposals/ml_fit_regr_random_forest.json @@ -9,7 +9,7 @@ "parameters": [ { "name": "training_set", - "description": "The training set for the Random Forest regression model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.", + "description": "The training set for the Random Forest regression model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyses to learn patterns and relationships within the data.", "schema": [ { "type": "object", diff --git a/tests/.words b/tests/.words index a50285ba..86c5e340 100644 --- a/tests/.words +++ b/tests/.words @@ -47,3 +47,4 @@ Hyndman date1 date2 favor +mlm-model From e2826ec16538f54efc16e50459c21987b553e3ef Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Wed, 17 Jul 2024 15:35:42 +0200 Subject: [PATCH 3/7] mlm-model return value --- proposals/ml_fit_class_random_forest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json index a8f361f4..a9007828 100644 --- a/proposals/ml_fit_class_random_forest.json +++ b/proposals/ml_fit_class_random_forest.json @@ -90,7 +90,7 @@ "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", "schema": { "type": "object", - "subtype": "ml-model" + "subtype": "mlm-model" } }, "links": [ From 521e8c61686a26b5310aa317a4206410218af288 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Sat, 24 Aug 2024 11:42:21 +0200 Subject: [PATCH 4/7] Update meta/subtype-schemas.json Co-authored-by: Matthias Mohr --- meta/subtype-schemas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index a880847e..00238654 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -236,7 +236,7 @@ "type": "object", "subtype": "mlm-model", "title": "Machine Learning Model", - "description": "A machine learning model, accompanied with STAC metadata that implements the Machine Learning Model STAC mlm-model extension." + "description": "A machine learning model, accompanied with STAC metadata that implements the Machine Learning Model STAC extension (mlm-model)." }, "output-format": { "type": "string", From 38981f0e2f886a3ded1b4cc58f32ba637207c3c9 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Sat, 24 Aug 2024 11:42:32 +0200 Subject: [PATCH 5/7] Update meta/subtype-schemas.json Co-authored-by: Matthias Mohr --- meta/subtype-schemas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 00238654..29a569ae 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -234,7 +234,7 @@ }, "mlm-model": { "type": "object", - "subtype": "mlm-model", + "subtype": "ml-model", "title": "Machine Learning Model", "description": "A machine learning model, accompanied with STAC metadata that implements the Machine Learning Model STAC extension (mlm-model)." }, From fc4b04f35e4fad02d6de9fee34002577a1e26a79 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Sat, 24 Aug 2024 11:42:47 +0200 Subject: [PATCH 6/7] Update proposals/load_ml_model.json Co-authored-by: Matthias Mohr --- proposals/load_ml_model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 3e44a515..1dd02de0 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -32,7 +32,7 @@ "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.", "schema": { "type": "object", - "subtype": "mlm-model" + "subtype": "ml-model" } }, "links": [ From c618a87ab5db8e1a4d989b39c2b9f26462ed2254 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Sat, 24 Aug 2024 11:42:55 +0200 Subject: [PATCH 7/7] Update proposals/ml_fit_class_random_forest.json Co-authored-by: Matthias Mohr --- proposals/ml_fit_class_random_forest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json index a9007828..a8f361f4 100644 --- a/proposals/ml_fit_class_random_forest.json +++ b/proposals/ml_fit_class_random_forest.json @@ -90,7 +90,7 @@ "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", "schema": { "type": "object", - "subtype": "mlm-model" + "subtype": "ml-model" } }, "links": [