Various tutorial fixes

ptiede · Sep 12, 2024 · 121e2ed · 121e2ed
1 parent 44d2e39
commit 121e2ed
Show file tree

Hide file tree

Showing 7 changed files with 99 additions and 104 deletions.
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -163,6 +163,7 @@ Comrade.dirty_image
 Comrade.dirty_beam
 Comrade.beamsize
 Comrade.apply_fluctuations
+Comrade.corr_image_prior
 Comrade.rmap
 ```
 

diff --git a/docs/src/base_api.md b/docs/src/base_api.md
@@ -88,8 +88,6 @@ ComradeBase.UnstructuredMap
 ComradeBase.baseimage
 ComradeBase.centroid
 ComradeBase.second_moment
-ComradeBase.load
-ComradeBase.save
 ComradeBase.stokes
 ```
 

diff --git a/examples/advanced/HybridImaging/main.jl b/examples/advanced/HybridImaging/main.jl
@@ -123,29 +123,30 @@ g      = imagepixels(fovxy, fovxy, npix, npix)
 
 # Part of hybrid imaging is to force a scale separation between
 # the different model components to make them identifiable.
-# To enforce this we will set the
-# length scale of the raster component equal to the beam size of the telescope in units of
-# pixel length, which is given by
+# To enforce this we will set the raster component to have a 
+# correlation length of 5 times the beam size.
 beam = beamsize(dvis)
 rat = (beam/(step(g.X)))
-cprior = GaussMarkovRandomField(rat, size(g); order=2)
+cprior = GaussMarkovRandomField(5*rat, size(g))
 
-# additionlly we will fix the standard deviation of the field to unity and instead
-# use a pseudo non-centered parameterization for the field.
-# GaussMarkovRandomField(meanpr, 0.1*rat, 1.0, crcache)
 
-
-# Finally we can put form the total model prior
+# For the other parameters we use a uniform priors for the ring fractional flux `f`
+# ring radius `r`, ring width `σ`, and the flux fraction of the Gaussian component `fg`
+# and the amplitude for the ring brightness modes. For the angular variables `ξτ` and `ξ`
+# we use the von Mises prior with concentration parameter `inv(π^2)` which is essentially
+# a uniform prior on the circle. Finally for the standard deviation of the MRF we use a
+# half-normal distribution. This is to ensure that the MRF has small differences from the 
+# mean image.
 skyprior = (
           c  = cprior,
-          σimg = truncated(Normal(0.0, 1.0); lower=0.01),
+          σimg = truncated(Normal(0.0, 0.1); lower=0.01),
           f  = Uniform(0.0, 1.0),
           r  = Uniform(μas2rad(10.0), μas2rad(30.0)),
           σ  = Uniform(μas2rad(0.1), μas2rad(10.0)),
           τ  = truncated(Normal(0.0, 0.1); lower=0.0, upper=1.0),
-          ξτ = Uniform(-π/2, π/2),
+          ξτ = DiagonalVonMises(0.0, inv(π^2)),
           ma = ntuple(_->Uniform(0.0, 0.5), 2),
-          mp = ntuple(_->Uniform(0.0, 2π), 2),
+          mp = ntuple(_->DiagonalVonMises(0.0, inv(π^2)), 2),
           fg = Uniform(0.0, 1.0),
         )
 

diff --git a/examples/intermediate/ClosureImaging/main.jl b/examples/intermediate/ClosureImaging/main.jl
@@ -111,26 +111,20 @@ mpr = modify(Gaussian(), Stretch(μas2rad(50.0)./fwhmfac))
 imgpr = intensitymap(mpr, grid)
 skymeta = (;mimg = imgpr./flux(imgpr));
 
-# In addition we want a reasonable guess for what the resolution of our image should be.
-# For radio astronomy this is given by roughly the longest baseline in the image. To put this
-# into pixel space we then divide by the pixel size.
-beam = beamsize(dlcamp)
-rat = (beam/(step(grid.X)))
-
-# To make the Gaussian Markov random field efficient we first precompute a bunch of quantities
-# that allow us to scale things linearly with the number of image pixels. This drastically improves
-# the usual N^3 scaling you get from usual Gaussian Processes.
-crcache = ConditionalMarkov(GMRF, grid; order=1)
 
 # Now we can finally form our image prior. For this we use a heirarchical prior where the
-# correlation length is given by a inverse gamma prior to prevent overfitting.
-# Gaussian Markov random fields are extremly flexible models.
-# To prevent overfitting it is common to use priors that penalize complexity. Therefore, we
-# want to use priors that enforce similarity to our mean image, and prefer smoothness.
-cprior = HierarchicalPrior(crcache, truncated(InverseGamma(1.0, -log(0.1)*rat); upper=2*npix))
-prior = (c = cprior, σimg = Exponential(0.5), fg=Uniform(0.0, 1.0))
-
-# Putting this all together we can define our sky model.
+# direct log-ratio image prior is a Gaussian Markov Random Field. The correlation length
+# of the GMRF is a hyperparameter that is fit during imaging. We pass the data to the prior
+# to estimate what the maximumal resolutoin of the array is and prevent the prior from allowing
+# correlation lengths that are much small than the telescope beam size. Note that this GMRF prior
+# has unit variance. For more information on the GMRF prior see the [corr_image_prior](@ref) doc string.
+cprior = corr_image_prior(grid, dlcamp)
+
+# Putting everything together the total prior is then our image prior, a prior on the
+# standard deviation of the MRF, and a prior on the fractional flux of the Gaussian component.
+prior = (c = cprior, σimg = Exponential(0.1), fg=Uniform(0.0, 1.0))
+
+# We can then define our sky model.
 skym = SkyModel(sky, prior, grid; metadata=skymeta)
 
 # Since we are fitting closures we do not need to include an instrument model, since

diff --git a/examples/intermediate/PolarizedImaging/main.jl b/examples/intermediate/PolarizedImaging/main.jl
@@ -187,23 +187,13 @@ grid = imagepixels(fovx, fovy, nx, ny)
 skymeta = (;ftot=1.0, grid)
 
 
-# For our image prior we will use a simpler prior than
-#   - We use again use a GMRF prior. For more information see the [Imaging a Black Hole using only Closure Quantities](@ref) tutorial.
-#   - For the total polarization fraction, `p`, we assume an uncorrelated uniform prior `ImageUniform` for each pixel.
-#   - To specify the orientation of the polarization, `angparams`, on the Poincare sphere,
-#     we use a uniform spherical distribution, `ImageSphericalUniform`.
-#-
-rat = beamsize(dvis)/step(grid.X)
-cmarkov = ConditionalMarkov(GMRF, grid; order=1)
-dρ = truncated(InverseGamma(1.0, -log(0.1)*rat); lower=2.0, upper=max(nx, ny))
-cprior = HierarchicalPrior(cmarkov, dρ)
-
-
-# For all the calibration parameters, we use a helper function `CalPrior` which builds the
-# prior given the named tuple of station priors and a `JonesCache`
-# that specifies the segmentation scheme. For the gain products, we use the `scancache`, while
-# for every other quantity, we use the `trackcache`.
-fwhmfac = 2.0*sqrt(2.0*log(2.0))
+# We use again use a GMRF prior similar to the [Imaging a Black Hole using only Closure Quantities](@ref) tutorial
+# for the log-ratio transformed image. We use the same correlated image prior for the inverse-logit transformed 
+# total polarization. The mean total polarization fraction `p0` is centered at -2.0 with a standard deviation of 2.0
+# which logit transformed puts most of the prior mass < 0.8 fractional polarization. The standard deviation of the
+# total polarization fraction `pσ` again uses a Half-normal process. The angular parameters of the polarizaton are 
+# given by a uniform prior on the sphere.
+cprior = corr_image_prior(grid, dvis)
 skyprior = (
     c = cprior,
     σ  = truncated(Normal(0.0, 0.1); lower=0.0),
@@ -239,12 +229,6 @@ function fgain(x)
     return gR, gL
 end
 G = JonesG(fgain)
-# Note that we are using the Julia `do` syntax here to define an anonymous function. This
-# could've also been written as
-# ```julia
-# fgain(x) = (exp(x.lgR + 1im*x.gpR), exp(x.lgR + x.lgrat + 1im*(x.gpR + x.gprat)))
-# G = JonesG(fgain)
-# ```
 
 
 # Similarly we provide a `JonesD` function for the leakage terms. Since we assume that we
@@ -258,7 +242,6 @@ function fdterms(x)
     dL = complex(x.dLx, x.dLy)
     return dR, dL
 end
-
 D = JonesD(fdterms)
 
 # Finally we define our response Jones matrix. This matrix is a basis transform matrix
@@ -273,10 +256,18 @@ R = JonesR(;add_fr=true)
 # so we could've removed the * argument in this case.
 J = JonesSandwich(*, G, D, R)
 
-
+# For the instrument prior, we will use a simple IID prior for the complex gains and d-terms.
+# The `IIDSitePrior` function specifies that each site has the same prior and each value is independent
+# on some time segment. The current time segments are 
+#  - `ScanSeg()` which specifies each scan has an independent value
+#  - `TrackSeg()` which says that the value is constant over the track.
+#  - `IntegSeg()` which says that the value changes each integration time
+# For the released EHT data, the calibration procedure makes gains stable over each scan
+# so we use `ScanSeg` for those quantities. The d-terms are typically stable over the track
+# so we use `TrackSeg` for those.
 intprior = (
     lgR  = ArrayPrior(IIDSitePrior(ScanSeg(), Normal(0.0, 0.1))),
-    gpR  = ArrayPrior(IIDSitePrior(ScanSeg(), DiagonalVonMises(0.0, inv(π  ^2))); refant=SEFDReference(0.0), phase=false),
+    gpR  = ArrayPrior(IIDSitePrior(ScanSeg(), DiagonalVonMises(0.0, inv(π  ^2))); refant=SEFDReference(0.0)),
     lgrat= ArrayPrior(IIDSitePrior(ScanSeg(), Normal(0.0, 0.1))),
     gprat= ArrayPrior(IIDSitePrior(ScanSeg(), Normal(0.0, 0.1)); refant = SingleReference(:AA, 0.0)),
     dRx  = ArrayPrior(IIDSitePrior(TrackSeg(), Normal(0.0, 0.2))),
@@ -285,6 +276,8 @@ intprior = (
     dLy  = ArrayPrior(IIDSitePrior(TrackSeg(), Normal(0.0, 0.2))),
 )
 
+# Finally, we can build our instrument model which takes a model for the Jones matrix `J`
+# and priors for each term in the Jones matrix.
 intmodel = InstrumentModel(J, intprior)
 
 
@@ -306,18 +299,6 @@ tpost = asflat(post)
 #     of the parameter space to make sampling easier.
 #-
 
-ndim = dimension(tpost)
-
-using Enzyme
-Enzyme.API.runtimeActivity!(true)
-x = prior_sample(rng, tpost)
-dx = zero(x)
-autodiff(Enzyme.Reverse, logdensityof, Active, Const(tpost), Duplicated(x, dx))
-
-using BenchmarkTools
-@benchmark autodiff($Enzyme.Reverse, $logdensityof,$Active, $(Const(tpost)), Duplicated($x, fill!($dx, 0)))
-
-
 # Now we optimize. Unlike other imaging examples, we move straight to gradient optimizers
 # due to the higher dimension of the space. In addition the only AD package that can currently
 # work with the polarized Comrade posterior is Enzyme.

diff --git a/examples/intermediate/StokesIImaging/main.jl b/examples/intermediate/StokesIImaging/main.jl
@@ -93,7 +93,7 @@ grid = imagepixels(fovx, fovy, npix, npix)
 using VLBIImagePriors
 using Distributions, DistributionsAD
 fwhmfac = 2*sqrt(2*log(2))
-mpr  = modify(Gaussian(), Stretch(μas2rad(200.0)./fwhmfac))
+mpr  = modify(Gaussian(), Stretch(μas2rad(50.0)./fwhmfac))
 mimg = intensitymap(mpr, grid)
 
 
@@ -104,50 +104,24 @@ mimg = intensitymap(mpr, grid)
 skymeta = (;ftot = 1.1, mimg = mimg./flux(mimg))
 
 
-
-# In addition we want a reasonable guess for what the resolution of our image should be.
-# For radio astronomy this is given by roughly the longest baseline in the image. To put this
-# into pixel space we then divide by the pixel size.
-beam = beamsize(dvis)
-rat = (beam/(step(grid.X)))
-
 # To make the Gaussian Markov random field efficient we first precompute a bunch of quantities
 # that allow us to scale things linearly with the number of image pixels. The returns a
 # functional that accepts a single argument related to the correlation length of the field.
 # The second argument defines the underlying random field of the Markov process. Here
-# we are using a zero mean and unit variance Gaussian Markov random field. The keyword
-# argument specifies the order of the Gaussian field. Currently, we recommend using order
-#  - 1 which is identical to TSV variation and L₂ regularization
-#  - 2 which is identical to a Matern 1 process in 2D and is really the convolution of two
-#    order 1 processes
+# we are using a zero mean and unit variance Gaussian Markov random field. 
 # For this tutorial we will use the first order random field
-crcache = ConditionalMarkov(GMRF, grid; order=1)
-
-# To demonstrate the prior let create a few random realizations
-
-
-
-# Now we can finally form our image prior. For this we use a heirarchical prior where the
-# inverse correlation length is given by a Half-Normal distribution whose peak is at zero and
-# standard deviation is `0.1/rat` where recall `rat` is the beam size per pixel.
-# For the variance of the random field we use another
-# half normal prior with standard deviation 0.1. The reason we use the half-normal priors is
-# to prefer "simple" structures. Gaussian Markov random fields are extremly flexible models,
-# and to prevent overfitting it is common to use priors that penalize complexity. Therefore, we
-# want to use priors that enforce similarity to our mean image. If the data wants more complexity
-# then it will drive us away from the prior.
-cprior = HierarchicalPrior(crcache, truncated(InverseGamma(1.0, -log(0.01)*rat); lower=1.0, upper=2*npix))
+cprior = corr_image_prior(grid, dlcamp)
 
 
-# We can now form our model parameter priors. Like our other imaging examples, we use a
-# Dirichlet prior for our image pixels. For the log gain amplitudes, we use the `CalPrior`
-# which automatically constructs the prior for the given jones cache `gcache`.
+# Putting everything together the total prior is then our image prior, a prior on the
+# standard deviation of the MRF, and a prior on the fractional flux of the Gaussian component.
 prior = (
          c = cprior,
+         σimg = Exponential(0.1),
          fg = Uniform(0.0, 1.0),
-         σimg = truncated(Normal(0.0, 0.5), lower=0.0),
         )
 
+# Now we can construct our sky model.
 skym = SkyModel(sky, prior, grid; metadata=skymeta)
 
 # Unlike other imaging examples

diff --git a/src/mrf_image.jl b/src/mrf_image.jl
@@ -1,4 +1,4 @@
-export apply_fluctuations
+export apply_fluctuations, corr_image_prior
 
 
 """
@@ -50,3 +50,49 @@ function _apply_fluctuations(t::VLBIImagePriors.LogRatioTransform, mimg::Abstrac
     r .= r./_fastsum(r)
     return r
 end
+
+
+"""
+    corr_image_prior(grid::AbstractRectiGrid, corr_length::Real; base=GMRF, order=1, lower=1.0, upper=2*max(size(grid)...))
+    corr_image_prior(grid::AbstractRectiGrid, obs::EHTObservationTable; base=GMRF, order=1, lower=1.0, upper=2*max(size(grid)...))
+
+Construct a correlated image prior, for the image with grid `grid`, and using the observation `dvis`. 
+The correlation will be a Markov Random Field (MRF) of order `order`, with the base distribution `base`. For 
+`base` you can choose any of the Markov random fields defined in `VLBIImagePriors`, the default is `GMRF` which
+is a Gaussian MRF. 
+
+As part the prior will be a hierarchical prior with the correlation length as a hyperparameter. By default the correlation
+parameter uses a first order inverse gamma distribution for its prior. The `frac_below_beam` parameter is the fraction of the
+correlation prior mass that is below the beam size of the observation `dvis`. The `lower` and `upper` parameters are the lower
+and upper bounds of the correlation length, we don't let the correlation length to be too small or large for numerical reasons.
+
+## Arguments
+ - `grid::AbstractRectiGrid`: The grid of the image to be reconstructed.
+ - `corr_length`: The correlation length of the MRF. If this is an `EHTObservationTable` then the corr_length 
+    will be the approximate beam size of the observation. 
+
+## Keyword Arguments
+ - `base`: The base distribution of the MRF. Options include `GMRF`, `EMRF`, and `CMRF`
+ - `order`: The order of the MRF. Default is first order
+ - `frac_below_beam`: The fraction of the correlation prior mass that is below the beam size of the observation `dvis`. 
+    the default is `0.01` which means only 1% of the log-image correlation length is below the beam size.
+ - `lower`: The lower bound of the correlation length. Default is `1.0`
+ - `upper`: The upper bound of the correlation length. Default is `2*max(size(grid)...)`
+
+
+!!! warn
+    An order > 2 will be slow since we switch to a sparse matrix representation of the MRF.
+"""
+function corr_image_prior(grid::AbstractRectiGrid, corr_length::Real; 
+                         base=GMRF, order=1,
+                         frac_below_beam=0.01, 
+                         lower=1.0, upper=2*max(size(grid)...)
+                        )
+    rat = corr_length/step(grid.X)
+    cmarkov = ConditionalMarkov(base, grid; order=order)
+    dρ = truncated(InverseGamma(1.0, -log(frac_below_beam)*rat); lower, upper)
+    cprior = HierarchicalPrior(cmarkov, dρ)
+    return cprior
+end
+
+corr_image_prior(grid::AbstractRectiGrid, obs::EHTObservationTable; kwargs...) = corr_image_prior(grid, beamsize(obs); kwargs...)