From b4123443a0e2d399a9c34494e065ff4b2e84a4e0 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Tue, 28 May 2024 18:22:25 +0200 Subject: [PATCH 1/5] Investigate fixing AD issues Definitely AutoZygote is required on the outside instead of AutoReverseDiff. Investigating what's going on with Enzyme in ternally ```julia using PyCall certifi = PyCall.pyimport("certifi") ENV["SSL_CERT_FILE"] = certifi.where() using ODINN working_dir = joinpath(homedir(), "OGGM/ODINN_tests") MB = true fast = true atol = 2.0 params = Parameters(OGGM = OGGMparameters(working_dir=working_dir, multiprocessing=true), simulation = SimulationParameters(working_dir=working_dir, use_MB=MB, velocities=true, tspan=(2010.0, 2015.0), multiprocessing=false, workers=5, test_mode=true), hyper = Hyperparameters(batch_size=4, epochs=4, optimizer=ODINN.ADAM(0.01)), UDE = UDEparameters(target = "A", sensealg = ODINN.GaussAdjoint(autojacvec=ODINN.EnzymeVJP())) ) rgi_ids = ["RGI60-11.03638"] model = Model(iceflow = SIA2Dmodel(params), mass_balance = mass_balance = TImodel1(params; DDF=6.0/1000.0, acc_factor=1.2/1000.0), machine_learning = NN(params)) glaciers = initialize_glaciers(rgi_ids, params) functional_inversion = FunctionalInversion(model, glaciers, params) @time run!(functional_inversion) ``` --- Project.toml | 1 + src/ODINN.jl | 6 ++++++ .../functional_inversion_utils.jl | 10 ++++------ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index 90d9b6f..dd31f0f 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" GeoStats = "dcc97b0b-8ce5-5539-9008-bb190f959ef6" diff --git a/src/ODINN.jl b/src/ODINN.jl index eba402c..ddfe602 100644 --- a/src/ODINN.jl +++ b/src/ODINN.jl @@ -31,6 +31,12 @@ using Downloads using TimerOutputs using GeoStats using ImageFiltering +using EnzymeCore + +# This is equivalent to `@ignore_derivatives` +EnzymeCore.EnzymeRules.inactive(::typeof(Huginn.define_callback_steps), args...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(MB_timestep!), args...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(apply_MB_mask!), args...) = nothing # ############################################## # ############ PARAMETERS ############### diff --git a/src/simulations/functional_inversions/functional_inversion_utils.jl b/src/simulations/functional_inversions/functional_inversion_utils.jl index 35ab25f..872a31b 100644 --- a/src/simulations/functional_inversions/functional_inversion_utils.jl +++ b/src/simulations/functional_inversions/functional_inversion_utils.jl @@ -28,7 +28,7 @@ function train_UDE!(simulation::FunctionalInversion) train_batches = generate_batches(simulation) θ = simulation.model.machine_learning.θ - optf = OptimizationFunction((θ, _, batch_ids, rgi_ids)->loss_iceflow(θ, batch_ids, simulation), Optimization.AutoReverseDiff()) + optf = OptimizationFunction((θ, _, batch_ids, rgi_ids)->loss_iceflow(θ, batch_ids, simulation), Optimization.AutoZygote()) optprob = OptimizationProblem(optf, θ) if simulation.parameters.UDE.target == "A" @@ -127,15 +127,13 @@ function batch_iceflow_UDE(θ, simulation::FunctionalInversion, batch_id::I) whe # Initialize glacier ice flow model initialize_iceflow_model(model.iceflow[batch_id], batch_id, glacier, params) - params.solver.tstops = @ignore_derivatives Huginn.define_callback_steps(params.simulation.tspan, params.solver.step) + params.solver.tstops = Huginn.define_callback_steps(params.simulation.tspan, params.solver.step) stop_condition(u,t,integrator) = Sleipnir.stop_condition_tstops(u,t,integrator, params.solver.tstops) #closure function action!(integrator) if params.simulation.use_MB # Compute mass balance - @ignore_derivatives begin - MB_timestep!(model, glacier, params.solver.step, integrator.t; batch_id = batch_id) - apply_MB_mask!(integrator.u, glacier, model.iceflow[batch_id]) - end + MB_timestep!(model, glacier, params.solver.step, integrator.t; batch_id = batch_id) + apply_MB_mask!(integrator.u, glacier, model.iceflow[batch_id]) end # Apply parametrization apply_UDE_parametrization!(θ, simulation, integrator, batch_id) From bf241da68858e6209c5284f02dd390812c639cfd Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 29 May 2024 04:05:24 +0200 Subject: [PATCH 2/5] Setup with Lux --- Project.toml | 3 ++- src/ODINN.jl | 2 +- src/helpers/utils.jl | 22 ------------------ src/models/machine_learning/ML_utils.jl | 23 +++++++++---------- src/models/machine_learning/MLmodel.jl | 21 +++++++++-------- src/parameters/Hyperparameters.jl | 4 ++-- .../functional_inversion_utils.jl | 4 ++-- 7 files changed, 29 insertions(+), 50 deletions(-) diff --git a/Project.toml b/Project.toml index dd31f0f..2b004ba 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "1.0.0" [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -21,6 +22,7 @@ Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Muninn = "4b816528-16ba-4e32-9a2e-3c1bc2049d3a" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" @@ -49,7 +51,6 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" BenchmarkTools = "1.3.2" ChainRules = "1.50" Downloads = "1" -Flux = "0.13, 0.14" GR = "0.71, 0.72, 0.73" Huginn = "0.6" IJulia = "1.2" diff --git a/src/ODINN.jl b/src/ODINN.jl index ddfe602..d827917 100644 --- a/src/ODINN.jl +++ b/src/ODINN.jl @@ -18,7 +18,7 @@ using IterTools: ncycle using Zygote using ChainRules: @ignore_derivatives using Base: @kwdef -using Flux +using Flux, Lux, ComponentArrays using Tullio using Infiltrator, Cthulhu using Plots, PlotThemes diff --git a/src/helpers/utils.jl b/src/helpers/utils.jl index b9d144b..59d037c 100644 --- a/src/helpers/utils.jl +++ b/src/helpers/utils.jl @@ -217,28 +217,6 @@ function generate_batches(batch_size, UA, gdirs, gdir_refs, tspan::Tuple; shuffl return train_loader end - -""" - get_NN() - -Generates a neural network. -""" -function get_NN(θ_trained) - UA = Chain( - Dense(1,3, x->softplus.(x)), - Dense(3,10, x->softplus.(x)), - Dense(10,3, x->softplus.(x)), - Dense(3,1, sigmoid_A) - ) - UA = Flux.f64(UA) - # See if parameters need to be retrained or not - θ, UA_f = Flux.destructure(UA) - if !isempty(θ_trained) - θ = θ_trained - end - return UA_f, θ -end - function get_NN_inversion(θ_trained, target) if target == "D" U, θ = get_NN_inversion_D(θ_trained) diff --git a/src/models/machine_learning/ML_utils.jl b/src/models/machine_learning/ML_utils.jl index 062d6f5..ce3b5f6 100644 --- a/src/models/machine_learning/ML_utils.jl +++ b/src/models/machine_learning/ML_utils.jl @@ -5,19 +5,18 @@ get_NN() Generates a neural network. """ function get_NN(θ_trained) - UA = Flux.Chain( - Dense(1,3, x->softplus.(x)), - Dense(3,10, x->softplus.(x)), - Dense(10,3, x->softplus.(x)), - Dense(3,1, sigmoid_A) + UA = Lux.Chain( + Lux.Dense(1,3, x->Lux.softplus.(x)), + Lux.Dense(3,10, x->Lux.softplus.(x)), + Lux.Dense(10,3, x->Lux.softplus.(x)), + Lux.Dense(3,1, sigmoid_A) ) - UA = Flux.f64(UA) - # See if parameters need to be retrained or not - θ, UA_f = Flux.destructure(UA) + θ, st = Lux.setup(Random.default_rng(), UA) + θ = ComponentArray{Float64}(θ) if !isnothing(θ_trained) θ = θ_trained end - return UA, θ, UA_f + return UA, θ, st end """ @@ -26,7 +25,7 @@ end Predicts the value of A with a neural network based on the long-term air temperature. """ function predict_A̅(U, temp) - return U(temp) .* 1e-18 + return U(temp)[1] .* 1e-18 end function sigmoid_A(x) @@ -83,12 +82,12 @@ function build_D_features(H::Matrix, temp, ∇S) ∇S_flat = ∇S[inn1(H) .!= 0.0] # flatten H_flat = H[H .!= 0.0] # flatten T_flat = repeat(temp,length(H_flat)) - X = Flux.normalise(hcat(H_flat,T_flat,∇S_flat))' # build feature matrix + X = Lux.normalise(hcat(H_flat,T_flat,∇S_flat))' # build feature matrix return X end function build_D_features(H::Float64, temp::Float64, ∇S::Float64) - X = Flux.normalise(hcat([H],[temp],[∇S]))' # build feature matrix + X = Lux.normalise(hcat([H],[temp],[∇S]))' # build feature matrix return X end diff --git a/src/models/machine_learning/MLmodel.jl b/src/models/machine_learning/MLmodel.jl index 6f22fcc..71503e9 100644 --- a/src/models/machine_learning/MLmodel.jl +++ b/src/models/machine_learning/MLmodel.jl @@ -27,35 +27,36 @@ function Model(; return model end -mutable struct NN{F <: AbstractFloat} <: MLmodel - architecture::Flux.Chain - NN_f::Optimisers.Restructure - θ::Vector{F} +mutable struct NN{T1, T2, T3} <: MLmodel + architecture::T1 + st::T2 + θ::T3 end +(f::NN)(u) = f.architecture(u, f.θ, f.st) """ NN(params::Parameters; - architecture::Union{Flux.Chain, Nothing} = nothing, + architecture::Union{Lux.Chain, Nothing} = nothing, θ::Union{Vector{AbstractFloat}, Nothing} = nothing) Feed-forward neural network. Keyword arguments ================= - - `architecture`: `Flux.Chain` neural network architecture + - `architecture`: `Lux.Chain` neural network architecture - `θ`: Neural network parameters """ function NN(params::Sleipnir.Parameters; - architecture::Union{Flux.Chain, Nothing} = nothing, - θ::Union{Vector{F}, Nothing} = nothing) where {F <: AbstractFloat} + architecture::Union{Lux.Chain, Nothing} = nothing, + θ::Union{ComponentArray{F}, Nothing} = nothing) where {F <: AbstractFloat} if isnothing(architecture) - architecture, θ, NN_f = get_NN(θ) + architecture, θ, st = get_NN(θ) end # Build the simulation parameters based on input values ft = params.simulation.float_type - neural_net = NN{ft}(architecture, NN_f, θ) + neural_net = NN(architecture, st, θ) return neural_net end diff --git a/src/parameters/Hyperparameters.jl b/src/parameters/Hyperparameters.jl index 19365dd..ade57d7 100644 --- a/src/parameters/Hyperparameters.jl +++ b/src/parameters/Hyperparameters.jl @@ -4,7 +4,7 @@ export Hyperparameters current_epoch::I current_minibatch::I loss_history::Vector{F} - optimizer::Union{Optim.FirstOrderOptimizer, Flux.Optimise.AbstractOptimiser, Optimisers.AbstractRule} + optimizer::Union{Optim.FirstOrderOptimizer, Optimisers.AbstractRule} loss_epoch::F epochs::I batch_size::I @@ -33,7 +33,7 @@ function Hyperparameters(; current_epoch::Int64 = 1, current_minibatch::Int64 = 1, loss_history::Vector{Float64} = zeros(Float64, 0), - optimizer::Union{Optim.FirstOrderOptimizer, Flux.Optimise.AbstractOptimiser, Optimisers.AbstractRule} = BFGS(initial_stepnorm=0.001), + optimizer::Union{Optim.FirstOrderOptimizer, Optimisers.AbstractRule} = BFGS(initial_stepnorm=0.001), loss_epoch::Float64 = 0.0, epochs::Int64 = 50, batch_size::Int64 = 15 diff --git a/src/simulations/functional_inversions/functional_inversion_utils.jl b/src/simulations/functional_inversions/functional_inversion_utils.jl index 872a31b..e23fc5f 100644 --- a/src/simulations/functional_inversions/functional_inversion_utils.jl +++ b/src/simulations/functional_inversions/functional_inversion_utils.jl @@ -216,7 +216,7 @@ end function apply_UDE_parametrization!(θ, simulation::FunctionalInversion, integrator, batch_id::I) where {I <: Integer} # We load the ML model with the parameters - U = simulation.model.machine_learning.NN_f(θ) + U = NN(simulation.model.machine_learning.architecture, simulation.model.machine_learning.st, convert(typeof(simulation.model.machine_learning.θ),θ)) # We generate the ML parametrization based on the target if simulation.parameters.UDE.target == "A" A = predict_A̅(U, [mean(simulation.glaciers[batch_id].climate.longterm_temps)])[1] @@ -242,7 +242,7 @@ callback_plots_A = function (θ, l, simulation) # callback function to observe t p = sortperm(avg_temps) avg_temps = avg_temps[p] # We load the ML model with the parameters - U = simulation.model.machine_learning.NN_f(θ) + U = NN(simulation.model.machine_learning.architecture, simulation.model.machine_learning.st, convert(typeof(simulation.model.machine_learning.θ),θ)) pred_A = predict_A̅(U, collect(-23.0:1.0:0.0)') pred_A = Float64[pred_A...] # flatten true_A = A_fake(avg_temps, true) From 73b9d1a5c3163bc07e664839bc86f5998f99d9b1 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 29 May 2024 09:25:44 +0200 Subject: [PATCH 3/5] Comment mass balance for now --- src/ODINN.jl | 2 +- src/helpers/mass_balance.jl | 6 ------ .../functional_inversions/functional_inversion_utils.jl | 4 ++-- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ODINN.jl b/src/ODINN.jl index d827917..b62ac0e 100644 --- a/src/ODINN.jl +++ b/src/ODINN.jl @@ -35,7 +35,7 @@ using EnzymeCore # This is equivalent to `@ignore_derivatives` EnzymeCore.EnzymeRules.inactive(::typeof(Huginn.define_callback_steps), args...) = nothing -EnzymeCore.EnzymeRules.inactive(::typeof(MB_timestep!), args...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(Muninn.MB_timestep!), args...) = nothing EnzymeCore.EnzymeRules.inactive(::typeof(apply_MB_mask!), args...) = nothing # ############################################## diff --git a/src/helpers/mass_balance.jl b/src/helpers/mass_balance.jl index c3d6522..59d919a 100644 --- a/src/helpers/mass_balance.jl +++ b/src/helpers/mass_balance.jl @@ -43,16 +43,10 @@ end function MB_timestep!(MB, mb_model::MB_model, climate, S, S_coords, t, step) # First we get the dates of the current time and the previous step period = partial_year(Day, t - step):Day(1):partial_year(Day, t) - @timeit to "Climate step" begin get_cumulative_climate!(climate, period) - end # Convert climate dataset to 2D based on the glacier's DEM - @timeit to "Climate 2D step" begin downscale_2D_climate!(climate, S, S_coords) - end - @timeit to "Compute MB" begin MB .= compute_MB(mb_model, climate.climate_2D_step[]) - end end function apply_MB_mask!(H, MB, MB_total, context::Tuple) diff --git a/src/simulations/functional_inversions/functional_inversion_utils.jl b/src/simulations/functional_inversions/functional_inversion_utils.jl index e23fc5f..3d67922 100644 --- a/src/simulations/functional_inversions/functional_inversion_utils.jl +++ b/src/simulations/functional_inversions/functional_inversion_utils.jl @@ -132,8 +132,8 @@ function batch_iceflow_UDE(θ, simulation::FunctionalInversion, batch_id::I) whe function action!(integrator) if params.simulation.use_MB # Compute mass balance - MB_timestep!(model, glacier, params.solver.step, integrator.t; batch_id = batch_id) - apply_MB_mask!(integrator.u, glacier, model.iceflow[batch_id]) + # MB_timestep!(model, glacier, params.solver.step, integrator.t; batch_id = batch_id) + # apply_MB_mask!(integrator.u, glacier, model.iceflow[batch_id]) end # Apply parametrization apply_UDE_parametrization!(θ, simulation, integrator, batch_id) From e522b1ae00c4a3044be501251751215c15b6518f Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sun, 16 Jun 2024 09:30:52 -0400 Subject: [PATCH 4/5] Update src/ODINN.jl --- src/ODINN.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ODINN.jl b/src/ODINN.jl index b62ac0e..d661eb5 100644 --- a/src/ODINN.jl +++ b/src/ODINN.jl @@ -34,9 +34,9 @@ using ImageFiltering using EnzymeCore # This is equivalent to `@ignore_derivatives` -EnzymeCore.EnzymeRules.inactive(::typeof(Huginn.define_callback_steps), args...) = nothing -EnzymeCore.EnzymeRules.inactive(::typeof(Muninn.MB_timestep!), args...) = nothing -EnzymeCore.EnzymeRules.inactive(::typeof(apply_MB_mask!), args...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(Huginn.define_callback_steps), args...; kwargs...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(Muninn.MB_timestep!), args...; kwargs...) = nothing +EnzymeCore.EnzymeRules.inactive(::typeof(apply_MB_mask!), args...; kwargs...) = nothing # ############################################## # ############ PARAMETERS ############### From 01d3cccba264b84ceed6d2a7b1a12350c128d7c5 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sun, 16 Jun 2024 09:57:12 -0400 Subject: [PATCH 5/5] use in-place --- .../functional_inversions/functional_inversion_utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulations/functional_inversions/functional_inversion_utils.jl b/src/simulations/functional_inversions/functional_inversion_utils.jl index 3d67922..20a403f 100644 --- a/src/simulations/functional_inversions/functional_inversion_utils.jl +++ b/src/simulations/functional_inversions/functional_inversion_utils.jl @@ -142,7 +142,7 @@ function batch_iceflow_UDE(θ, simulation::FunctionalInversion, batch_id::I) whe cb_MB = DiscreteCallback(stop_condition, action!) # Run iceflow UDE for this glacier - du = params.simulation.use_iceflow ? Huginn.SIA2D : Huginn.noSIA2D + du = params.simulation.use_iceflow ? Huginn.SIA2D! : Huginn.noSIA2D! iceflow_sol = simulate_iceflow_UDE!(θ, simulation, model, params, cb_MB, batch_id; du = du) println("simulation finished for $batch_id")