Replies: 33 comments
-
Hi, Does a simple model run? Like the one in our tutorial? Thanks! |
Beta Was this translation helpful? Give feedback.
-
You will need to specify priors and init vals for convergence of
hierarchical models (this is not uncommon for MCMC).
See below for an example of generative data (similar to yours) and then
hierarchical models with priors that do fit and recover parameters well.
We are working on doing this more systematically and updating tutorials but
these should be helpful for now.
1. for generating hierarchical data.
n_subjects = 25 # number of subjects
n_trials = 50 # number of trials per subject - vary from low to high values
to check shrinkage
sd_v = 0.3 # sd for v-intercept (also used for a)
mean_v = 1.25 # mean for v-intercept
mean_vx = 0.8 # mean for slope of x onto v
mean_vy = 0.2 # mean for slope of x onto v
sd_tz=0.1
mean_a = 1.5
mean_t = 0.5
mean_z = 0.5
data_list = []
param_list =[]
for i in range(n_subjects):
# Make parameters for subject i
intercept = np.random.normal(mean_v, sd_v, size=1)
x = np.random.uniform(-1, 1, size=n_trials)
y = np.random.uniform(-1, 1, size=n_trials)
v_x = np.random.normal(mean_vx, sd_v, size=1)
v_y = np.random.normal(mean_vy, sd_v, size=1)
v = intercept + (v_x * x) + (v_y * y)
a = np.random.normal(mean_a, sd_v, size=1)
z = np.random.normal(mean_z, sd_tz, size=1)
t = np.random.normal(mean_t, sd_tz, size=1)
# v is a vector which differs over trials by x and y, so we have different
v for every trial - other params are same for all trials
true_values = np.column_stack(
[v, np.repeat(a, axis=0, repeats=n_trials), np.repeat(z, axis=0,
repeats=n_trials), np.repeat(t, axis=0, repeats=n_trials)]
)
# Simulate data
obs_ddm_reg_v = hssm.simulate_data(model="ddm", theta=true_values, size=1)
# store ground truth params
param_list.append(
pd.DataFrame(
{
"intercept": intercept,
"v_x": v_x,
"v_y": v_y,
"a": a,
"z": z,
"t": t,
}
)
)
# Append simulated data to list
data_list.append(
pd.DataFrame(
{
"rt": obs_ddm_reg_v["rt"],
"response": obs_ddm_reg_v["response"],
"x": x,
"y": y,
"subject": i,
}
)
)
# Make single dataframe out of subject-wise datasets
dataset_reg_v_hier = pd.concat(data_list)
dataset_reg_v_hier
# can also create datasets for individual subjects so that you can later
fit to just that subject to compare recovery to hierarchical inference e.g.:
dataset_reg_v_s0 = dataset_reg_v_hier[dataset_reg_v_hier["subject"]==0]
2. Model specification (note some of these priors will change but
this works for now)
model_reg_v_ddm_hier1A = hssm.HSSM(
data=dataset_reg_v_hier,
# loglik_kind="approx_differentiable", works best with analytic model
prior_settings = "safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + x + y + (1 + x + y| subject)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"y": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"1|subject": {"name": "Normal",
"mu": 0, # using non-centered approach so mu's of indiv subject offsets
should be 0
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
},
"x|subject": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5,
}, "initval": 0.5
},
"y|subject": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5,
}, "initval": 0.5
},
},
"link": "identity",
},
{
"name": "t",
"formula": "t ~ 1 + (1 | subject)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0.5, "sigma": 0.4, "initval": 0.3},
"1|subject": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5, "initval": .1
},
},
},
"link": "identity",
},
{
"name": "z",
"formula": "z ~ 1 + (1 | subject)",
"prior": {
# "Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subject": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05, "initval": .01
},
},
},
},
{
"name": "a",
"formula": "a ~ 1 + (1 | subject)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subject": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1, "initval": 0.3
},
},
},
},
],
noncentered = True,
# p_outlier=0
)
model_reg_v_ddm_hier1A
3. sampling (using just 200 draws and tunes to show it can happen quite
fast when working well - when it converges you should also see that it
spends some time with initial samples during burn in and then starts
sampling much faster. For this dataset each chain should finish in less
than a minute. If it gets stuck you would see that it takes longer but also
that the number of steps the sampler takes keeps maxing out at 1023 with
very small step sizes.
samples_model_reg_v_ddm_hier1A = model_reg_v_ddm_hier1A.sample(
sampler="nuts_numpyro", # type of sampler to choose, 'nuts_numpyro',
'nuts_blackjax' of default pymc nuts sampler
cores=1, # how many cores to use
chains=3, # how many chains to run
draws=200, # number of draws from the markov chain
tune=200, # number of burn-in samples
idata_kwargs=dict(log_likelihood=True), # return log likelihood
) # mp_ctx="forkserver")
4. example to get model output and check some of the params against ground
truth (sigmas should also approach true generative sigmas). of course can
also look at posteriors and forest plots etc
az.summary(samples_model_reg_v_ddm_hier1A, var_names=["v_Intercept", "v_x",
"v_y", "v_1|subject_sigma", "v_x|subject_sigma","v_y|subject_sigma",
"a_1|subject_sigma", "t_Intercept", "z_Intercept", "t_1|subject_sigma",
"z_1|subject_sigma"] )
5. for fitting empirical cav data with angle model (for now no random
effects of stim, also note some of the priors could be improved - i
adjusted *a* to have gamma prior like HDDM but left z and t for now as
halfnormal)
model_angle_cav = hssm.HSSM(
data= hssm.load_data('cavanagh_theta'),
model = "angle",
# link_settings = "log_logit",
prior_settings = "safe",
loglik_kind="approx_differentiable",
include=[
{
"name": "v",
"formula": "v ~ 1 + stim + (1 | participant_id)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"stim": {"name": "Normal", "mu": 0, "sigma": 1},#, "initval": 0.1},
"1|participant_id": {"name": "Normal",
"mu": 0, # using non-centered approach so mu's of indiv subject offsets
should be 0
"sigma": {"name": "HalfNormal",
"sigma": 2,
}, "initval": 0
},
},
},
{
"name": "t",
"formula": "t ~ 1 + (1 | participant_id)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 0.4, "initval": 0.3},
"1|participant_id": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1, "initval": .1
},
},
},
"link": "identity",
},
{
"name": "z",
"formula": "z ~ 1 + (1 | participant_id)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": 0.5},
"1|participant_id": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05, "initval": .01
},
},
},
},
{
"name": "theta",
"formula": "theta ~ 1 + (1 | participant_id)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 0.4, "initval": 0.2},
"1|participant_id": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": .2, "initval": .05
},
},
},
"link": "identity",
},
{
"name": "a",
"formula": "a ~ 1 + stim + theta+ dbs + theta:dbs + (1 + theta +dbs
+theta:dbs| participant_id)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"stim": {"name": "Normal", "mu": 0, "sigma": 1},#, "initval": 0.1},
"theta": {"name": "Normal", "mu": 0, "sigma": 1},#, "initval": 0.1},
"dbs": {"name": "Normal", "mu": 0, "sigma": 1},#, "initval": 0.1},
"theta:dbs": {"name": "Normal", "mu": 0, "sigma": 1},#, "initval": 0.1},
"1|participant_id": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1, "initval": .3
},
},
"theta|participant_id": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": .2, "initval": .05
},
},
},
},
],
noncentered = True,
)
model_angle_cav
6.For model selection
az.compare(
{
"ddm": model_ddm_cav.traces,
"angle": model_angle_cav.traces
}
)
Michael J Frank, PhD | Edgar L. Marston Professor
Director, Carney Center for Computational Brain Science
<https://www.brown.edu/carney/ccbs>
Laboratory of Neural Computation and Cognition <https://www.lnccbrown.com/>
Brown University
website <http://ski.clps.brown.edu>
…On Fri, Jun 21, 2024 at 12:50 PM hyang336 ***@***.***> wrote:
*Describe the bug*
When fitting simulated data, all samples are divergent, and it returns the
arviz warning that "Your data appears to have a single value or no finite
values". Am I missing something obvious?
*HSSM version*
0.2.2
*To Reproduce*
`
v_slope=0.45
a_slope=0.3
z_slope=0.1
t_slope=0.2
v_intercept=0
a_intercept=2
z_intercept=0
t_intercept=0.05
n_subjects=30 #number of subjects
n_trials=200 #number of trials per subject
param_sv=0.2 #standard deviation of the subject-level parameters
Save trial-level parameters for each subject
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
simulated data list
sim_data=[]
Generate subject-level parameters
for i in range(n_subjects):
# set the seed for each subject deterministically so all models are based
on the same data
np.random.seed(i)
# generate neural data, standard normal as the real data
simneural=np.random.normal(size=n_trials)
# generate v0, v1, v2, v3
v=np.random.normal(v_intercept, param_sv) + np.random.normal(v_slope, param_sv)*simneural
a=np.random.normal(a_intercept, param_sv) + np.random.normal(a_slope, param_sv)*simneural
z=np.random.normal(z_intercept, param_sv) + np.random.normal(z_slope, param_sv)*simneural
t=np.random.normal(t_intercept, param_sv) + np.random.normal(t_slope, param_sv)*simneural
# clip parameters to stay within default bounds
v = np.clip(v, -3, 3)
a = np.clip(a, 0.3, 2.5)
z = np.clip(z, 0, 1)
t = np.clip(t, 0, 2)
# save to subject_params
subject_params["v"]=np.append(subject_params["v"],v)
subject_params["a"]=np.append(subject_params["a"],a)
subject_params["z"]=np.append(subject_params["z"],z)
subject_params["t"]=np.append(subject_params["t"],t)
subject_params["simneural"]=np.append(subject_params["simneural"],simneural)
subject_params["subID"]=np.append(subject_params["subID"],np.repeat(i,len(simneural)))
# simulate RT and choices
true_values = np.column_stack([v,a,z,t])
# Get mode simulations
ddm_all = simulator.simulator(true_values, model="ddm", n_samples=1)
# Random regressor as control
rand_x = np.random.normal(size=len(simneural))
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rts"].flatten(),
"response": ddm_all["choices"].flatten(),
"x": simneural,
"rand_x": rand_x,
"subID": i
}
)
)
#make a single dataframe of subject-wise simulated data
sim_data_concat=pd.concat(sim_data)
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"link": "identity"
}
],
)
infer_data_ddm_null = model_ddm_null.sample(sampler="nuts_numpyro",
chains=4, cores=4, draws=5000, tune=5000,idata_kwargs = {'log_likelihood':
True}, target_accept=TA)
az.to_netcdf(infer_data_ddm_null,outdir+'sample_' + str(5000) + '
*' + str(5000) + 'trace_ParamInbound_ddm_NutsNumpyro_null.nc4')
az.plot_trace( infer_data_ddm_null, var_names="~log_likelihood", )
plt.savefig(outdir+'posterior_diagnostic' + str(5000) + '*' + str(5000) +
'_trace_ParamInbound_ddm_NutsNumpyro_null.png')
res_sum_null=az.summary(model_ddm_null.traces)
res_sum_null.to_csv(outdir+'summary_' + str(5000) + '_' + str(5000) +
'_trace_ParamInbound_ddm_NutsNumpyro_null.csv')
`
*Screenshots*
image.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/a88d1056-3938-4c05-8c2b-cf84c1605aec>
*Additional context*
—
Reply to this email directly, view it on GitHub
<#471>, or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFBUQ253IZSBDMD7ZMDZIRKURAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43ASLTON2WKOZSGM3DMOJQHE4DOMA>
.
You are receiving this because you are subscribed to this thread.Message
ID: ***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Thanks for the reply, I will try setting the priors and refit the model. Just a quick question though, I thought by setting prior_settings="safe", a default set of good-enough priors were chosen. What does that option do and why do I still need to set priors when prior_settings="safe"? |
Beta Was this translation helpful? Give feedback.
-
prior_settings = safe just helps the sampler not propose values that would
be outside the range of a LAN training. But it can still be the case that
the sampler can proposed allowed values but which are far away from what
would be realistically expected for the design (e.g a covariate effect on a
parameter would often not be likely to affect that parameter by much more
than 1 for a or v, and less for t, and similarly the offsets of individuals
from the group mean are not likely to vary by a huge amount).
…On Fri, Jun 21, 2024 at 2:50 PM hyang336 ***@***.***> wrote:
Thanks for the reply, I will try setting the priors and refit the model.
Just a quick question though, I thought by setting prior_settings="safe", a
default set of good-enough priors were chosen. What does that option do and
why do I still need to set priors when prior_settings="safe"?
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFFG7Z53TJ6ZHOQN3LDZIRYV5AVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBTGI4DIOBVGY>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
I added priors and initval to my models, but the behavior is the same
To test whether this is due to the model or the data being too complex, I also ran a simpler simulation where only v depends on the regressor in both the ground truth data and in the regression structure of the models. Then I fit both the ground-truth model and a random-intercept only model:
**All these models resulted in the same behavior, the acceptance probability is always 0 and all samples are divergent. I also run the ddm model Dr. Frank posted in the above reply, which despite having some 7000 divergence, has non-zero acc.prob. The posterior and R-hat all look good and the parameter recovery is good as well. I am really confused why there is such a big difference given how similar the models and priors are. ** |
Beta Was this translation helpful? Give feedback.
-
A few things
* try using more informative priors - your sigmas are all quite high (note
that sigma =2 will allow the sampler to propose values up to ~ 4 larger
than the mean which is highly unlikely). Especially for the priors on the
random effects (subID) these should be very unlikely to be that far from
the group mean. There are other approaches in the works that will make
some of this more robust to wider more uninformed priors (for example using
link functions) but this should work better for now.
* But also in your case even your sigma for z is 2 which doesn't make sense
(z can only go between 0 and 1). Try using values closer to what I had in
my example. (also in theory we should use something like a beta prior for z
but the halfnormal works for now when in a reasonable range).
* Your z intercept in simulated data is 0 and this means that the starting
point would begin on one of the boundaries. It makes more sense for z
intercept to be 0.5 (ie unbiased in between the two boundaries - z is a
fraction of a). Before fitting you should probably look at the simulated
data to make sure it is reasonable in terms of RT distributions and choice
proportions - I suspect with intrecept z=0 you might be generating a lot of
data that are very fast with choices all toward the lower boundary
…On Fri, Jun 21, 2024 at 9:12 PM hyang336 ***@***.***> wrote:
*I added priors and initval to my models, but the behavior is the same*
v_slope=0.45
a_slope=0.3
z_slope=0.1
t_slope=0.2
v_intercept=0
a_intercept=2
z_intercept=0
t_intercept=0.05
n_subjects=30 #number of subjects
n_trials=200 #number of trials per subject
param_sv=0.2 #standard deviation of the subject-level parameters
# Save trial-level parameters for each subject
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
# simulated data list
sim_data=[]
# Generate subject-level parameters
for i in range(n_subjects):
# set the seed for each subject deterministically so all models are based on the same data
np.random.seed(i)
# generate neural data, standard normal as the real data
simneural=np.random.normal(size=n_trials)
# generate v0, v1, v2, v3
v=np.random.normal(v_intercept, param_sv) + np.random.normal(v_slope, param_sv)*simneural
a=np.random.normal(a_intercept, param_sv) + np.random.normal(a_slope, param_sv)*simneural
z=np.random.normal(z_intercept, param_sv) + np.random.normal(z_slope, param_sv)*simneural
t=np.random.normal(t_intercept, param_sv) + np.random.normal(t_slope, param_sv)*simneural
# clip parameters to stay within default bounds
v = np.clip(v, -3, 3)
a = np.clip(a, 0.3, 2.5)
z = np.clip(z, 0, 1)
t = np.clip(t, 0, 2)
# save to subject_params
subject_params["v"]=np.append(subject_params["v"],v)
subject_params["a"]=np.append(subject_params["a"],a)
subject_params["z"]=np.append(subject_params["z"],z)
subject_params["t"]=np.append(subject_params["t"],t)
subject_params["simneural"]=np.append(subject_params["simneural"],simneural)
subject_params["subID"]=np.append(subject_params["subID"],np.repeat(i,len(simneural)))
# simulate RT and choices
true_values = np.column_stack([v,a,z,t])
# Get mode simulations
ddm_all = simulator.simulator(true_values, model="ddm", n_samples=1)
# Random regressor as control
rand_x = np.random.normal(size=len(simneural))
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rts"].flatten(),
"response": ddm_all["choices"].flatten(),
"x": simneural,
"rand_x": rand_x,
"subID": i
}
)
)
#make a single dataframe of subject-wise simulated data
sim_data_concat=pd.concat(sim_data)
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
}
],
)
#infer_data_race4nba_v_null = model_race4nba_v_null.sample(step=pm.Slice(model=model_race4nba_v_null.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_null = model_ddm_null.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
# az.to_netcdf(infer_data_race4nba_v_null,outdir+'sample_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.nc4')
az.to_netcdf(infer_data_ddm_null,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null.nc4')
az.plot_trace(
infer_data_ddm_null,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
# plt.savefig(outdir+'posterior_diagnostic_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.png')
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null.png')
res_sum_null=az.summary(model_ddm_null.traces)
# res_sum_null.to_csv(outdir+'summary_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.csv')
res_sum_null.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null.csv')
*To test whether this is due to the model or the data being too complex, I
also ran a simpler simulation where only v depends on the regressor in both
the ground truth data and in the regression structure of the models. Then I
fit both the ground-truth model and a random-intercept only model:*
v_slope=0.45
v_intercept=0
a_intercept=2
z_intercept=0
t_intercept=0.05
n_subjects=30 #number of subjects
n_trials=200 #number of trials per subject
param_sv=0.2 #standard deviation of the subject-level parameters
# Save trial-level parameters for each subject
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
# simulated data list
sim_data=[]
# Generate subject-level parameters
for i in range(n_subjects):
# set the seed for each subject deterministically so all models are based on the same data
np.random.seed(i)
# generate neural data, standard normal as the real data
simneural=np.random.normal(size=n_trials)
# generate v0, v1, v2, v3
v=np.random.normal(v_intercept, param_sv) + np.random.normal(v_slope, param_sv)*simneural
a=np.random.normal(a_intercept, param_sv)
z=np.random.normal(z_intercept, param_sv)
t=np.random.normal(t_intercept, param_sv)
# clip parameters to stay within default bounds
v = np.clip(v, -3, 3)
a = np.clip(a, 0.3, 2.5)
z = np.clip(z, 0, 1)
t = np.clip(t, 0, 2)
# save to subject_params
subject_params["v"]=np.append(subject_params["v"],v)
subject_params["a"]=np.append(subject_params["a"],a)
subject_params["z"]=np.append(subject_params["z"],z)
subject_params["t"]=np.append(subject_params["t"],t)
subject_params["simneural"]=np.append(subject_params["simneural"],simneural)
subject_params["subID"]=np.append(subject_params["subID"],np.repeat(i,len(simneural)))
# simulate RT and choices
true_values = np.column_stack([v,np.repeat([[a,z,t]], axis=0, repeats=len(simneural))])
# Get mode simulations
ddm_all = simulator.simulator(true_values, model="ddm", n_samples=1)
# Random regressor as control
rand_x = np.random.normal(size=len(simneural))
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rts"].flatten(),
"response": ddm_all["choices"].flatten(),
"x": simneural,
"rand_x": rand_x,
"subID": i
}
)
)
#make a single dataframe of subject-wise simulated data
sim_data_concat=pd.concat(sim_data)
model_ddm_true = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
}
],
)
#sample from the model
#infer_data_race4nba_v_true = model_race4nba_v_true.sample(step=pm.Slice(model=model_race4nba_v_true.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_true = model_ddm_true.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
#save trace
az.to_netcdf(infer_data_ddm_true,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_true.nc4')
#save trace plot
az.plot_trace(
infer_data_ddm_true,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_true.png')
#save summary
res_sum_true=az.summary(model_ddm_true.traces)
res_sum_true.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_true.csv')
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0, "sigma": 2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 2
}, "initval": 0.5
}
},
"link": "identity"
}
],
)
#infer_data_race4nba_v_null = model_race4nba_v_null.sample(step=pm.Slice(model=model_race4nba_v_null.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_null = model_ddm_null.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
# az.to_netcdf(infer_data_race4nba_v_null,outdir+'sample_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.nc4')
az.to_netcdf(infer_data_ddm_null,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_null.nc4')
az.plot_trace(
infer_data_ddm_null,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
# plt.savefig(outdir+'posterior_diagnostic_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.png')
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_null.png')
res_sum_null=az.summary(model_ddm_null.traces)
# res_sum_null.to_csv(outdir+'summary_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.csv')
res_sum_null.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_null.csv')
**All these models resulted in the same behavior, the acceptance
probability is always 0 and all samples are divergent. I also run your
model which despite having some 7000 divergence, has non-zero acc.prob. The
posterior and R-hat all look good and the parameter recovery is good as
well. I am really confused why there is such a big difference given how
similar the models and priors are. **
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFFWOMNAFWGNTXGZ5QDZITFOFAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBTGYZTMOJZG4>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Thanks for the quick reply! I did not realize how the proposed value scales with the sigma. I also thought z=0 correspond to the unbiased setting. I will try these new priors. |
Beta Was this translation helpful? Give feedback.
-
There is probably something obvious that I have missed, I changed the sigma and all the intercepts to the level of your model for all parameters, but the behavioral is still the same... I think the issue that all samples having acc.prob of 0 is probably caused by something more drastically wrong than wide priors
|
Beta Was this translation helpful? Give feedback.
-
Update: removing everything about t in "include=[]" resulted in behavior similar to Dr. Frank's model, I guess that parameter was the issue. |
Beta Was this translation helpful? Give feedback.
-
OK so that's some progress, but note this means that you will not be
estimating t hierarchically in this case. Indeed there are some issues re:
t in some cases especially when some of them are low. I confirmed that I
can replicate your problem but if I clip the lower bound of t to be 0.3 it
is fixed (ie you can add t back in include and estimate it hierarchically
and it should converge). We are aware of some issues with low t's and
looking into it.
…On Sat, Jun 22, 2024 at 10:38 AM hyang336 ***@***.***> wrote:
Update: removing everything about t in "include=[]" resulted in behavior
similar to Dr. Frank's model, I guess that parameter was the issue.
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFEZMKK7KMPJWLOSQIDZIWD6DAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBUGA2TMMRXHE>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Thanks for confirming. What is the current recommendation when modeling real data regarding the t parameter? Does the same issue exist in other models (e.g. race) or is it unique to DDM? |
Beta Was this translation helpful? Give feedback.
-
Looking into it. Alex and Paul may weigh in later this week.
For some real datasets (like cav_data) t might be larger than that min
value, but those were patients.
For now, a hack that works (but clearly not what we want in the long run)
is to just artificially add a constant RT to all values in the dataset - ie
in your simulated data if you add 0.3s to all RTs you should be able to fit
your original model and all parameters should recover except you would then
want to subtract 0.3 from the fitted t_intercept to get the true value...
it's odd behavior
…On Sat, Jun 22, 2024 at 6:08 PM hyang336 ***@***.***> wrote:
Thanks for confirming. What is the current recommendation when modeling
real data regarding the t parameter? Does the same issue exist in other
models (e.g. race) or is it unique to DDM?
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFD2QCTYSH7DG6UO2WLZIXYVRAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBUGIYDKOBWGA>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Ps. In case it wasn’t obvious the reason this hack works (and is
mathematically equivalent to the original model) is that t just shifts the
entire RT distribution by a constant, without affecting the shape or
choices (hence why it is the non decision time).
On Sat, Jun 22, 2024 at 6:16 PM Michael J Frank ***@***.***>
wrote:
… Looking into it. Alex and Paul may weigh in later this week.
For some real datasets (like cav_data) t might be larger than that min
value, but those were patients.
For now, a hack that works (but clearly not what we want in the long run)
is to just artificially add a constant RT to all values in the dataset - ie
in your simulated data if you add 0.3s to all RTs you should be able to fit
your original model and all parameters should recover except you would then
want to subtract 0.3 from the fitted t_intercept to get the true value...
it's odd behavior
On Sat, Jun 22, 2024 at 6:08 PM hyang336 ***@***.***> wrote:
> Thanks for confirming. What is the current recommendation when modeling
> real data regarding the t parameter? Does the same issue exist in other
> models (e.g. race) or is it unique to DDM?
>
> —
> Reply to this email directly, view it on GitHub
> <#471 (comment)>,
> or unsubscribe
> <https://github.com/notifications/unsubscribe-auth/AAG7TFD2QCTYSH7DG6UO2WLZIXYVRAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBUGIYDKOBWGA>
> .
> You are receiving this because you commented.Message ID:
> ***@***.***>
>
|
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
Thanks!
strategies 1 and 2 should be essentially equivalent (with just small
differences in what the actual min RT seen would be, and so there also
isn't really need to combine them (it will just make the min RT seen by
sampler to be 0.6).
But a couple of issues:
* When it did actually work but sample slowly, did it converge and recover
true parameters? or a separate pathological behavior is instead of never
accepting, the sampler can get stuck in a range where it accepts but takes
very tiny steps (e.g. < 1e-8 and takes the max_steps =1023). this will make
it very slow to sample and not converge when it finishes. So it would be
good to know if you were in that situation or if it did actually find the
true posterior albeit slowly.
* Regardless, both of these issues might relate again to the range of
values simulated and priors that might lead the sampler to propose
combinations of values that are not plausible and it just never accepts.
These come up in both generation (producing some very uncommon values that
might make it hard to capture when mixed with a range of other values), and
in fitting (priors that consider very unlikely values). My suggestion is
then to rerun the simulations with a more realistic range of values that
could produce plausible data (eg from the experiment that you care about)
and to consider priors that are also somewhat more informed by that range.
**Generation**
* you are generating data with eg. mean_a =2 and a_slope = 0.3 and then
standard deviation for both of those at 0.2.
* this means in the generative daa mean a will range from ~ 1.5 to 2.5 and
slope will range from ~ -0.3 to 0.9.
-- On the higher end this means someone can have a =2.5 + 0.9*(simneural)
and when simneural itself is on its higher range (around 2), that would
lead to an a trialwise of almost 4.5. Not completely impossible but don't
think I've ever seen an a that high for real data.
-- On the lower end a can even go negative - but you clip it at 0.3 -
which is still a very low.
* this might be worse for z (sometimes it will generate data close to z=0
or 1 given simneural can go between -2 and 2) and t (sometimes can generate
t close to 0).
** FItting **
priors consider *a* values of at the upper range of ~9 on the group mean
(intercept + x*simneural + sigma's)
and then individuals can add another ~3 on top of that at the max
simneural (so trialwise a of 12! ... and note it doesn't have to get that
extreme for it to never accept for a given subject). At the minimum range
it could even propose a *very negative* a (or just a very low one that gets
clipped by safe prior) when combining all those things and that would never
be accepted. Same for other params.
of course eventually MCMC should be able to move away from unlikely values
but when you scale up the number of subjects with these random slopes and
fairly wide priors it just becomes likely that some of them on each sample
will propose values that are quite unlikely
…On Sun, Jun 23, 2024 at 2:21 AM hyang336 ***@***.***> wrote:
Just want to add some more testing results that may be helpful for
pinpointing the issue. To compare the effectiveness of
1. clip t to be >0.3 during data generation
2. add 0.3 to rt after data generation
3. remove random effect and prior of t in model
I run several versions of a more complex model where the simulated
data were generated with random slope and intercept on every parameter.
Long story short, any of the 3 strategies alone doesn't work for these
more complex data and resulted in drastic failure (i.e., acc.prob always 0
and all samples diverge) (see figure below)
image.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/36a68e93-fb8e-4be7-a404-934c8cabf8a3>
Combining clip and removing t specification in model seemed to work
(acc.prob != 0 and no divergence), but the model samples much more slowly
(1 min per 10000 samples vs. 2 hrs per 10000 samples). I haven't tested all
combinations of the 3 strategies and I hacked up the code pretty quickly so
there may be errors. But hopefully these will be somewhat useful. Below is
the full script I use to run these tests on a Linux cluster:
#generate data using ddm model
from ssms.basic_simulators import simulator
import numpy as np
import pandas as pd
from scipy.special import softmax
from scipy.special import beta
import hssm
import bambi as bmb
import arviz as az
from matplotlib import pyplot as plt
import pymc as pm
import multiprocessing as mp
import os
import argparse
if __name__ == '__main__':
mp.freeze_support()
mp.set_start_method('spawn', force=True)
#parse arguments
parser = argparse.ArgumentParser(description='Simulate data and fit HSSM model')
parser.add_argument('--samples', type=str, help='how many samples to draw from MCMC chains',default=5000)
parser.add_argument('--burnin', type=str, help='how many samples to burn in from MCMC chains',default=5000)
parser.add_argument('--cores', type=str, help='how many CPU/GPU cores to use for sampling',default=4)
parser.add_argument('--model', type=str, help='which model to run')
parser.add_argument('--outdir', type=str, help='outpu directory to save results',default='/scratch/hyang336/working_dir/HDDM_HSSM/DDM_simulations/')
parser.add_argument('--TA', type=str, help='target_accept for NUTS sampler',default=0.8)
parser.add_argument('--tstrat', type=str, help='how to handle issue on the t paramter',default='clip')
args = parser.parse_args()
model=args.model
outdir=args.outdir
samples=int(args.samples)
burnin=int(args.burnin)
ncores=int(args.cores)
TA=float(args.TA)
tstrat=args.tstrat # 'clip' or 'RThack' or 'norandom' or 'clipRThack' or 'clipnorandom' or 'RThacknorandom' or 'clipRThacknorandom'
# print out the arguments for debugging
print('model:',model)
print('outdir:',outdir)
print('samples:',samples)
print('burnin:',burnin)
print('ncores:',ncores)
print('TA:',TA)
print('tstrat:',tstrat)
# make the output directory if it doesn't exist
if not os.path.exists(outdir):
os.makedirs(outdir,exist_ok=True)
#--------------------------------------We can try several generative model--------------------------------###
v_slope=0.45
a_slope=0.3
z_slope=0.1
t_slope=0.2
v_intercept=0.5
a_intercept=2
z_intercept=0.5
t_intercept=0.5
n_subjects=30 #number of subjects
n_trials=200 #number of trials per subject
param_sv=0.2 #standard deviation of the subject-level parameters
# Save trial-level parameters for each subject
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
# simulated data list
sim_data=[]
# Generate subject-level parameters
for i in range(n_subjects):
# set the seed for each subject deterministically so all models are based on the same data
np.random.seed(i)
# generate neural data, standard normal as the real data
simneural=np.random.normal(size=n_trials)
# generate v0, v1, v2, v3
v=np.random.normal(v_intercept, param_sv) + np.random.normal(v_slope, param_sv)*simneural
a=np.random.normal(a_intercept, param_sv) + np.random.normal(a_slope, param_sv)*simneural
z=np.random.normal(z_intercept, param_sv) + np.random.normal(z_slope, param_sv)*simneural
t=np.random.normal(t_intercept, param_sv) + np.random.normal(t_slope, param_sv)*simneural
# clip parameters to stay within default bounds
v = np.clip(v, -3, 3)
a = np.clip(a, 0.3, 2.5)
z = np.clip(z, 0, 1)
if tstrat=='clip' or tstrat=='clipRThack' or tstrat=='clipnorandom' or tstrat=='clipRThacknorandom':
t = np.clip(t, 0.3, 2)
else:
t = np.clip(t, 0, 2)
print('min t:',np.min(t))
print('max t:',np.max(t))
# save to subject_params
subject_params["v"]=np.append(subject_params["v"],v)
subject_params["a"]=np.append(subject_params["a"],a)
subject_params["z"]=np.append(subject_params["z"],z)
subject_params["t"]=np.append(subject_params["t"],t)
subject_params["simneural"]=np.append(subject_params["simneural"],simneural)
subject_params["subID"]=np.append(subject_params["subID"],np.repeat(i,len(simneural)))
# simulate RT and choices
true_values = np.column_stack([v,a,z,t])
# Get mode simulations
ddm_all = simulator.simulator(true_values, model="ddm", n_samples=1)
# Random regressor as control
rand_x = np.random.normal(size=len(simneural))
if tstrat=='RThack' or tstrat=='clipRThack' or tstrat=='RThacknorandom' or tstrat=='clipRThacknorandom':
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rts"].flatten() + 0.3, # hack to work around the issue on parameter t in HSSM 0.2.2
"response": ddm_all["choices"].flatten(),
"x": simneural,
"rand_x": rand_x,
"subID": i
}
)
)
else:
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rts"].flatten(),
"response": ddm_all["choices"].flatten(),
"x": simneural,
"rand_x": rand_x,
"subID": i
}
)
)
#make a single dataframe of subject-wise simulated data
sim_data_concat=pd.concat(sim_data)
#save subject-wise parameters
param_df=pd.DataFrame(subject_params)
param_df.to_csv(outdir+'simulation_binary_022_subject_params.csv')
####################################################################################### Define models ################################################################################################
match model:
case 'true':
# True model
if tstrat=='norandom' or tstrat=='clipnorandom' or tstrat=='RThacknorandom' or tstrat=='clipRThacknorandom':
model_ddm_true = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 1.75, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
}
],
)
else:
model_ddm_true = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 1.75, "initval": 1},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + x + (1 + x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0.5, "sigma": 0.4, "initval": 0.3},
"x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5, "initval": 0.1
},
}
},
"link": "identity"
}
],
)
#sample from the model
#infer_data_race4nba_v_true = model_race4nba_v_true.sample(step=pm.Slice(model=model_race4nba_v_true.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_true = model_ddm_true.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
#save trace
az.to_netcdf(infer_data_ddm_true,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_true' + 't-strat_' + str(tstrat) + '.nc4')
#save trace plot
az.plot_trace(
infer_data_ddm_true,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_true_' + 't-strat_' + str(tstrat) + '.png')
#save summary
res_sum_true=az.summary(model_ddm_true.traces)
res_sum_true.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_true_' + 't-strat_' + str(tstrat) + '.csv')
case 'null':
# model with no relationship between v and neural data
if tstrat=='norandom' or tstrat=='clipnorandom' or tstrat=='RThacknorandom' or tstrat=='clipRThacknorandom':
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
}
],
)
else:
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0.5, "sigma": 0.4, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5, "initval": 0.1
},
}
},
"link": "identity"
}
],
)
#infer_data_race4nba_v_null = model_race4nba_v_null.sample(step=pm.Slice(model=model_race4nba_v_null.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_null = model_ddm_null.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
# az.to_netcdf(infer_data_race4nba_v_null,outdir+'sample_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.nc4')
az.to_netcdf(infer_data_ddm_null,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null_' + 't-strat_' + str(tstrat) + '.nc4')
az.plot_trace(
infer_data_ddm_null,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
# plt.savefig(outdir+'posterior_diagnostic_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.png')
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null_' + 't-strat_' + str(tstrat) + '.png')
res_sum_null=az.summary(model_ddm_null.traces)
# res_sum_null.to_csv(outdir+'summary_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_null.csv')
res_sum_null.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_null_' + 't-strat_' + str(tstrat) + '.csv')
case 'rand':
# model with regression on random vectors (i.e. fake neural data that has the same distribution but was not involved in generating the parameters)
if tstrat=='norandom' or tstrat=='clipnorandom' or tstrat=='RThacknorandom' or tstrat=='clipRThacknorandom':
model_ddm_rand = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 1.75, "initval": 1},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
}
],
)
else:
model_ddm_rand = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
},
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 1, "sigma": 1.75, "initval": 1},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
},
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
},
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + rand_x + (1 + rand_x|subID)",
"prior": {
"Intercept": {"name": "Normal", "mu": 0.5, "sigma": 0.4, "initval": 0.3},
"rand_x": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
"rand_x|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5, "initval": 0.1
},
}
},
"link": "identity"
}
],
)
# infer_data_race4nba_v_rand = model_race4nba_v_rand.sample(step=pm.Slice(model=model_race4nba_v_rand.pymc_model), sampler="mcmc", chains=4, cores=4, draws=5000, tune=10000,idata_kwargs = {'log_likelihood': True})
infer_data_ddm_rand = model_ddm_rand.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
# az.to_netcdf(infer_data_race4nba_v_rand,outdir+'sample_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_rand.nc4')
az.to_netcdf(infer_data_ddm_rand,outdir+'sample_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_rand_' + 't-strat_' + str(tstrat) + '.nc4')
az.plot_trace(
infer_data_ddm_rand,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
# plt.savefig(outdir+'posterior_diagnostic_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_rand.png')
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_rand_' + 't-strat_' + str(tstrat) + '.png')
res_sum_rand=az.summary(model_ddm_rand.traces)
# res_sum_rand.to_csv(outdir+'summary_5000_10000_trace_ParamInbound_Fixed_az_SliceSampler_rand.csv')
res_sum_rand.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + '_trace_ParamInbound_ddm_NutsNumpyro_rand_' + 't-strat_' + str(tstrat) + '.csv')
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFCKJCYBWB7F54J3OJLZIZSNDAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBUGYZTKNRVGY>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Just came back to this:
It looks like the issue might be that when the true t is low (say 0.2)
then the prior sigma on the values across the population has to be low
enough so that it doesn't propose negative t's for any individual. I
confirmed that things work fine without the hack or clipping using true
t=0.2 but just allowing sigma for the t to come from HalfNormal(0.03).
Seems small but that would discourage negative t's from being proposed (and
that would also likely be source of divergences when they are rejected,
since there is a sudden boundary at t=0). In your case the same would need
to be done for the random slope effect on t (ie reduce the sigma on it) so
that none of the trialwise proposed ones are negative.
We will look into a more robust fix and/or set of recommendations in case
one is in this low t regime. But thanks, this has helped track down one of
the sources of issues people have had! And for now the RT hack i think is
fine and might also fix your full model also with slope effect on t if you
add yet a larger value to the RTs given there are multiple ways to bring it
very low (as long as you correct for it in your interpretation of the
posterior t value, i.e. subtract the corresponding amount).
On Sun, Jun 23, 2024 at 1:09 PM Michael J Frank ***@***.***>
wrote:
…
Ah right, so the clipping solves the extreme value problem but then indeed
the linear model will only apply in a small range of data which might have
funny effects on fitting as it will force the regression coeffs to be
shrink a lot based on many values that would have been out of range and so
are clipped -- and then that might also lead to collinearity for the other
parameters to adjust for the part that is in range.
Re: the sampler, I think it is potentially a problem that can lead to
acc;prob of 0 because the sampler doesn't propose a single value at a time,
it will propose the whole vector and across all participants mean and
slopes it is likely for any given proposal to encounter a very unlikely
value and then reject. In general in MCMC this will be something that can
also be solved using reparametrization, e.g. link functions so that the
sampler can operate across a wide range of parameters but the
transformation will squash their effects, and we do have some of those
implemented (you can try link_settings = log_logit), but not so
systematically yet, and they do require back transforming the parameters so
they are less easy to interpret -- so I think if one can get away with not
using them and just use informative priors it should be fine.
On Sun, Jun 23, 2024 at 11:30 AM hyang336 ***@***.***>
wrote:
> Thanks for pointing out the issues in my simulation!
>
> The "true" model for the combined strategy (with slope terms on v, a, and
> z, no random effect on t) did get stuck. The "null" model (only random
> intercept on v, a, and z) recovered parameters somewhat okay:
> a_intercept: mean=1.759, sd=0.082
> t: mean=0.19, sd=0.007
> v_intercept: mean=0.555, sd=0.053
> z_intercept: mean=0.427, sd=0.027
>
> Since I always clip a to be in [0.3, 2.5], v in [-3, 3], and z in [0, 1]
> in the generation process, there shouldn't be any trial with a above 2.5 in
> the data. This does mean that the generated data does not conform to the
> linear model exactly since out-of-bound values are replaced. And I guess
> that could be problematic too.
>
> In terms of the MCMC sampler proposing unlikely values, if that only
> happens to some samples, it wouldn't explain why all the samples on all
> chains have acc.prob of 0, right?
>
> I will experiment with a similar set of tests with only 1 parameter
> depending on the regressor both in generation and fitting, using parameters
> that are less likely to but at the boundaries.
>
> —
> Reply to this email directly, view it on GitHub
> <#471 (comment)>,
> or unsubscribe
> <https://github.com/notifications/unsubscribe-auth/AAG7TFAM4F664J4ZP3O5XATZI3SZBAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBVGA2DONBUGE>
> .
> You are receiving this because you commented.Message ID:
> ***@***.***>
>
|
Beta Was this translation helpful? Give feedback.
-
Along the same line of avoiding negative t or really small t being proposed, should the prior on the intercept be something other than normal? For now it is N(0.5, 0.4) which has a negative portion. Or is the prior on the intercept less important than the sigma on the random effects? |
Beta Was this translation helpful? Give feedback.
-
Yes the prior on the intercept should not include 0. I use a gamma prior
which does not contain 0 (I realize the first code I sent at beginning of
this thread was still an old bit of code using normal - in principle the
sampler would still reject those values and it can still work but as the
model gets more complex and the actual values are low, then this becomes
more of an issue - also likely causing some of the divergences).
This is what I am using now, the same prior we used in HDDM for the
intercept
Gamma", "mu": .4, "sigma": 0.2, "initval": 0.3
(although if one uses the RT hack and adds a significant value to the RTs
like 1s then they would want to adjust the intercept prior accordingly
because now we expect a t of at least 1s, e.g. Gamma "alpha": 20, "beta":
.05, "initval": 1.0 is centered around 1s.
But for adding 0.3 the original prior is probably still fine.
…On Sun, Jun 23, 2024 at 6:32 PM hyang336 ***@***.***> wrote:
Along the same line of avoiding negative t or really small t being
proposed, should the prior on the intercept be something other than normal?
For now it is N(0.5, 0.4) which has a negative portion. Or is the prior on
the intercept less important than the sigma on the random effects?
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFHUGC5VUMRJAFXMGCDZI5EILAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCOBVGM2DGNRSHA>
.
You are receiving this because you commented.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
@frankmj @hyang336 thank you so much for this discussion, it was super helpful. I was running into the same issue as @hyang336 with a real dataset. The chains would just blow through (<1min) and would be completely flat. Fitting works much better when I remove t altogether. However, I still can't get decent convergence without the change RT trick (adding 0.3). Based on your discussion here, I thought that this prior should work well, but seems like that's not the case. I just wanted to confirm that there's nothing obviously wrong in this prior that I'm missing.
|
Beta Was this translation helpful? Give feedback.
-
Hi, @igrahek , you can try reducing the sigma of the random intercept on t to be 0.03 instead of 0.5 (and initval to be 0.01 instead of 0.1) as Dr. Frank suggested. However I wasn't able to get the the fitting to work consistently with any of the strategies on t (other than removing its random effect) on simulated data or a real dataset. But that could be due to my simulation or dataset being weird. I am working on some prior/posteior predictive check using simulated data that are more constrained and testing on the dataset that comes with HSSM to separate issues of the data from issues of the model |
Beta Was this translation helpful? Give feedback.
-
@hyang336 thank you! I was trying out different values of the random intercept on t, going to very small values of sigma, but I'm still getting chains that looks very much like the ones you showed before. |
Beta Was this translation helpful? Give feedback.
-
My experience is that this can happen more generally if some of the priors
/ initvals are off. e.g. if you set the z parameter to have an initval of 0
(also for sigma), it could lead to proposals that will always be rejected.
If you look through each param and its priors and initvals it usually
helps resolve the issue. We should dig more deeply into this though
to catch such things before sampling
…On Mon, Jul 1, 2024 at 11:28 AM Ivan Grahek ***@***.***> wrote:
@hyang336 <https://github.com/hyang336> thank you! I was trying out
different values of the random intercept on t, going to very small values
of sigma, but I'm still getting chains that looks very much like the ones
you showed before.
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFA6YB2HHYMYIFA2NJ3ZKFYS7AVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDEMBQGQ3DEMRXGA>
.
You are receiving this because you were mentioned.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
Thanks for this. Great that it all converges. Divergences are a bit of a
different issue related to the gradient being unstable. Often some
divergences are not a big problem if you have enough samples and the chains
are clearly converging and recovering parameters from synthetic data. This
does seem like quite a few divergences though so it might reflect some
difficulty in the sampler - you could look at the ESS stats as well as the
r-hat to ensure there are enough effective samples.
But also a few thoughts that might be relevant
- while in general the generative parameters look fine, there are a few
cases that might impact this - you have a few generative a's that are very
close to 0 and very unlikely in real reasonable data - and then when the
sampler estimates the a's coming from a normal it might run up against that
bound at 0 which can lead to a non-smooth gradient.
- the prior for a i see you have a gamma(.5, 1.75) -- maybe you meant to
do the opposite which is the hddm prior of gamma (1.5,0.75)
- looking at prior predictive I am not sure why you have some values of z
going up to 3 and some negative values.
It could be better to use a Beta distribution for z- e.g. try Beta(5,5) to
center it at 0.5 and constrain between 0 and 1. (I know I might have had
that half normal in some previous post but it really makes more sense this
way)
My guess is ultimately you will get the same posteriors and that you just
overcame these issues with enough burn in but they might still contribute
to divergences.
…On Tue, Jul 2, 2024 at 1:35 PM hyang336 ***@***.***> wrote:
Just as an update, I simulated some data with parameters more constrained
in the "good" ranges (v depends on a regressor, while others having random
intercepts, 30 simulated subjects with 100 trials each):
param_distribution_regressor_v_t-strat_none.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/048a0cb0-bd70-4783-9325-3c0efc27da68>
These parameters generated the following RT distribution per response
category:
RT_distribution_regressor_v_t-strat_none.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/58065731-40a0-4181-abdd-f55f84e19a4f>
With none of the t hacks (clipping, adding constant to RT, or removing
random effect in model), the "true" model (with v depends on the regressor
and other parameters having random intercepts only) fits okay with 4
chains, 5000 burn-in and 5000 samples per chain. *However, there were
11287 divergences despite the trace plots looking okay and all R-hat around
1~1.01. Should I care about the divergence?*
posterior_diagnostic_5000_5000TA_0.8_trace_ParamInbound_ddm_simple_NutsNumpyro_trueregress_v_t-strat_none.png
(view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/d681db0c-02df-48c1-badf-b74773214c4c>
The priors, other than perhaps some higher values on z, all looked
reasonable (figures showing v, a, z, t sampled using
HSSM.sample_prior_predictive()):
prior_v_hist.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/55b9dc48-e7a0-4fd0-a3c2-5a8645328335>
prior_a_hist.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/77f112cc-705a-4505-bf0e-5a061f72e159>
prior_z_hist.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/cb9e1713-c069-42d5-a074-777150e17ff5>
prior_t_hist.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/6f2ae046-6648-4751-8187-5860e757db79>
Below is the full script I used to generate these:
#generate data using race_4 model, there are separate v and z for each accumulator, but a and t are shared
#from ssms.basic_simulators import simulator
import numpy as np
import pandas as pd
from scipy.special import softmax
from scipy.special import beta
import hssm
import bambi as bmb
import arviz as az
from matplotlib import pyplot as plt
import pymc as pm
import multiprocessing as mp
import os
import argparse
if __name__ == '__main__':
mp.freeze_support()
mp.set_start_method('spawn', force=True)
##----------------------------------------------parse arguments -----------------------------------##
parser = argparse.ArgumentParser(description='Simulate data and fit HSSM model')
parser.add_argument('--samples', type=str, help='how many samples to draw from MCMC chains',default=5000)
parser.add_argument('--burnin', type=str, help='how many samples to burn in from MCMC chains',default=5000)
parser.add_argument('--cores', type=str, help='how many CPU/GPU cores to use for sampling',default=4)
parser.add_argument('--model', type=str, help='which model to run')
parser.add_argument('--regressor', type=str, help='which parameter to regress on',default='v')
parser.add_argument('--outdir', type=str, help='outpu directory to save results',default='/scratch/hyang336/working_dir/HDDM_HSSM/DDM_simulations/')
parser.add_argument('--TA', type=str, help='target_accept for NUTS sampler',default=0.8)
parser.add_argument('--run', type=str, help='whether to run the sampler or just plot data distribution and prior predict',default='sample')
parser.add_argument('--tstrat', type=str, help='how to handle issue on the t paramter',default='clip')
args = parser.parse_args()
model_type=args.model # 'true' or 'null', the random seed setting means the rand model was the same as true model...
regressor=args.regressor
outdir=args.outdir
samples=int(args.samples)
burnin=int(args.burnin)
ncores=int(args.cores)
TA=float(args.TA)
tstrat=args.tstrat # 'clip' or 'RThack' or 'norandom' or 'clipnorandom' or 'RThacknorandom'
run=args.run # 'sample' or 'prior_predict'
# print out the arguments for debugging
print('model:',model_type)
print('outdir:',outdir)
print('samples:',samples)
print('burnin:',burnin)
print('ncores:',ncores)
print('TA:',TA)
print('regressor:',regressor)
print('tstrat:',tstrat)
print('run:',run)
# make the output directory if it doesn't exist
if not os.path.exists(outdir):
os.makedirs(outdir)
#--------------------------------------Generate parameters and simulate data--------------------------------###
# v in [-3, 3]
v_intercept_mu=1.25 #normal
v_slope_mu=0.3 #normal
v_sigma=0.2
# a in [0.3, 2.5]
a_intercept_a=1.5 #gamma with mean at 1.5
a_intercept_b=1
a_slope_mu=0.3 #normal
a_slope_sigma=0.2
# z in [0, 1]
z_intercept_a=10 #beta with mean at 0.5
z_intercept_b=10
z_slope_mu=0.1 #normal
z_slope_sigma=0.01
# t in [0, 2]
t_intercept_a=60 #gamma with mean at 0.5, variance at 0.0042
t_intercept_b=120
t_slope_mu=0.3 #normal
t_slope_sigma=0.1
n_subjects=30 #number of subjects
n_trials=100 #number of trials per subject
# Save trial-level parameters for each subject
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
# simulated data list
sim_data=[]
# Generate subject-level parameters
for i in range(n_subjects):
# set the seed for each subject deterministically so all models are based on the same data
np.random.seed(i)
# generate neural data, standard normal as the real data, and
simneural=np.random.normal(size=n_trials)
# generate v a z t based on regressor argument
if regressor=='v':
v_intercept=np.random.normal(loc=v_intercept_mu, scale=v_sigma, size=1)
v_slope=np.random.normal(loc=v_slope_mu, scale=v_sigma, size=1)
a_intercept=np.random.gamma(shape=a_intercept_a, scale=1/a_intercept_b, size=1) #numpy use scale parameterization
z_intercept=np.random.beta(a=z_intercept_a, b=z_intercept_b, size=1)
t_intercept=np.random.gamma(shape=t_intercept_a, scale=1/t_intercept_b, size=1)
v=v_intercept+v_slope*simneural
a=np.repeat(a_intercept, n_trials)
z=np.repeat(z_intercept, n_trials)
t=np.repeat(t_intercept, n_trials)
elif regressor=='a':
v_intercept=np.random.normal(loc=v_intercept_mu, scale=v_sigma, size=1)
a_intercept=np.random.gamma(shape=a_intercept_a, scale=1/a_intercept_b, size=1)
a_slope=np.random.normal(loc=a_slope_mu, scale=a_slope_sigma, size=1)
z_intercept=np.random.beta(a=z_intercept_a, b=z_intercept_b, size=1)
t_intercept=np.random.gamma(shape=t_intercept_a, scale=1/t_intercept_b, size=1)
v=np.repeat(v_intercept, n_trials)
a=a_intercept+a_slope*simneural
z=np.repeat(z_intercept, n_trials)
t=np.repeat(t_intercept, n_trials)
elif regressor=='z':
v_intercept=np.random.normal(loc=v_intercept_mu, scale=v_sigma, size=1)
a_intercept=np.random.gamma(shape=a_intercept_a, scale=1/a_intercept_b, size=1)
z_intercept=np.random.beta(a=z_intercept_a, b=z_intercept_b, size=1)
z_slope=np.random.normal(loc=z_slope_mu, scale=z_slope_sigma, size=1)
t_intercept=np.random.gamma(shape=t_intercept_a, scale=1/t_intercept_b, size=1)
v=np.repeat(v_intercept, n_trials)
a=np.repeat(a_intercept, n_trials)
z=z_intercept+z_slope*simneural
t=np.repeat(t_intercept, n_trials)
elif regressor=='t':
v_intercept=np.random.normal(loc=v_intercept_mu, scale=v_sigma, size=1)
a_intercept=np.random.gamma(shape=a_intercept_a, scale=1/a_intercept_b, size=1)
z_intercept=np.random.beta(a=z_intercept_a, b=z_intercept_b, size=1)
t_intercept=np.random.gamma(shape=t_intercept_a, scale=1/t_intercept_b, size=1)
t_slope=np.random.normal(loc=t_slope_mu, scale=t_slope_sigma, size=1)
v=np.repeat(v_intercept, n_trials)
a=np.repeat(a_intercept, n_trials)
z=np.repeat(z_intercept, n_trials)
t=t_intercept+t_slope*simneural
# no clipping, adjust generating parameters to avoid extreme values
# v = np.clip(v, -3, 3)
# a = np.clip(a_i, 0.3, 2.5)
# z = np.clip(z_i, 0, 1)
if tstrat=='clip' or tstrat=='clipnorandom':
t = np.clip(t, 0.3, 2)
# simulate RT and choices
true_values = np.column_stack([v,a,z,t])
# save to subject_params
subject_params["v"]=np.append(subject_params["v"],v)
subject_params["a"]=np.append(subject_params["a"],a)
subject_params["z"]=np.append(subject_params["z"],z)
subject_params["t"]=np.append(subject_params["t"],t)
subject_params["simneural"]=np.append(subject_params["simneural"],simneural)
subject_params["subID"]=np.append(subject_params["subID"],np.repeat(i,len(simneural)))
# Get model simulations
ddm_all = hssm.simulate_data(model="ddm", theta=true_values, size=1)
if tstrat=='RThack' or tstrat=='RThacknorandom':
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rt"] + 0.3,
"response": ddm_all["response"],
"x": simneural,
"subID": i
}
)
)
else:
sim_data.append(
pd.DataFrame(
{
"rt": ddm_all["rt"],
"response": ddm_all["response"],
"x": simneural,
"subID": i
}
)
)
#make a single dataframe of subject-wise simulated data
sim_data_concat=pd.concat(sim_data)
#save subject-wise parameters
param_df=pd.DataFrame(subject_params)
param_df.to_csv(outdir+'simulation_binary022_simple' + '_regressor_' + str(regressor) + '_t-strat_' + str(tstrat) + '_subject_params.csv')
#Plot the RT distributions in the simulated data for each type of response
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].hist(sim_data_concat.query("response==-1")["rt"], bins=100, alpha=0.5, label="response -1")
ax[0].legend()
ax[1].hist(sim_data_concat.query("response==1")["rt"], bins=100, alpha=0.5, label="response 1")
ax[1].legend()
plt.tight_layout()
fig.suptitle("RT distribution")
plt.savefig(outdir+'RT_distribution_' + 'regressor_' + str(regressor) + '_t-strat_' + str(tstrat) + '.png')
#plot the distribution of the parameters and the regressor
fig, ax = plt.subplots(1, 4, figsize=(12, 6))
ax[0].hist(subject_params["v"], bins=100)
ax[0].set_title("v distribution")
ax[1].hist(subject_params["a"], bins=100)
ax[1].set_title("a distribution")
ax[2].hist(subject_params["z"], bins=100)
ax[2].set_title("z distribution")
ax[3].hist(subject_params["t"], bins=100)
ax[3].set_title("t distribution")
plt.tight_layout()
plt.savefig(outdir+'param_distribution_' + 'regressor_' + str(regressor) + '_t-strat_' + str(tstrat) + '.png')
plt.close()
##------------------------- Specify formula and prior based on model and regressor -------------------##
if model_type=='true':
reg_key="x"
elif model_type=='null':
reg_key=None
if reg_key is not None:
match regressor:
case 'v':
v_form= f"v ~ 1 + {reg_key} + (1 + {reg_key}|subID)"
a_form= "a ~ 1 + (1|subID)"
z_form= "z ~ 1 + (1|subID)"
t_form= "t ~ 1 + (1|subID)"
v_prior={
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
f"{reg_key}": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
f"{reg_key}|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
}
a_prior={
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
}
z_prior={
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
}
t_prior={
"Intercept": {"name": "Gamma", "mu": 0.4, "sigma": 0.2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.03, "initval": .01
},
},
}
link_func="identity"
case 'a':
v_form= "v ~ 1 + (1|subID)"
a_form= f"a ~ 1 + {reg_key} + (1 + {reg_key}|subID)"
z_form= "z ~ 1 + (1|subID)"
t_form= "t ~ 1 + (1|subID)"
v_prior={
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
}
a_prior={
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
f"{reg_key}": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
f"{reg_key}|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
}
z_prior={
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
}
t_prior={
"Intercept": {"name": "Gamma", "mu": 0.4, "sigma": 0.2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.03, "initval": .01
},
},
}
link_func="identity"
case 'z':
v_form= "v ~ 1 + (1|subID)"
a_form= "a ~ 1 + (1|subID)"
z_form= f"z ~ 1 + {reg_key} + (1 + {reg_key}|subID)"
t_form= "t ~ 1 + (1|subID)"
v_prior={
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
}
a_prior={
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
}
z_prior={
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
f"{reg_key}": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
f"{reg_key}|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
}
t_prior={
"Intercept": {"name": "Gamma", "mu": 0.4, "sigma": 0.2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.03, "initval": .01
},
},
}
link_func="identity"
case 't':
v_form= "v ~ 1 + (1|subID)"
a_form= "a ~ 1 + (1|subID)"
z_form= "z ~ 1 + (1|subID)"
t_form= f"t ~ 1 + {reg_key} + (1 + {reg_key}|subID)"
v_prior={
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
}
a_prior={
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
}
z_prior={
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
}
t_prior={
"Intercept": {"name": "Gamma", "mu": 0.4, "sigma": 0.2, "initval": 0.3},
f"{reg_key}": {"name": "Normal", "mu": 0, "sigma": 1, "initval": 0},
f"{reg_key}|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.5
}, "initval": 0.5
},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.03, "initval": .01
},
},
}
link_func="identity"
else:
v_form= "v ~ 1 + (1|subID)"
a_form= "a ~ 1 + (1|subID)"
z_form= "z ~ 1 + (1|subID)"
t_form= "t ~ 1 + (1|subID)"
v_prior={
"Intercept": {"name": "Normal", "mu": 1, "sigma": 2, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.5
}
}
a_prior={
"Intercept": {"name": "Gamma", "mu": 0.5, "sigma": 1.75, "initval": 1},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 1
}, "initval": 0.3
}
}
z_prior={
"Intercept": {"name": "HalfNormal", "sigma": 1, "initval": .5},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.05
}, "initval": 0.01
}
}
t_prior={
"Intercept": {"name": "Gamma", "mu": 0.4, "sigma": 0.2, "initval": 0.3},
"1|subID": {"name": "Normal",
"mu": 0,
"sigma": {"name": "HalfNormal",
"sigma": 0.03, "initval": .01
},
},
}
link_func="identity"
##-------------------------------- Define the models --------------------------------###
if tstrat=='norandom' or tstrat=='clipnorandom' or tstrat=='RThacknorandom':
model = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": v_form,
"prior": v_prior,
"link": link_func
},
{
"name": "a",
"formula": a_form,
"prior": a_prior,
"link": link_func
},
{
"name": "z",
"formula": z_form,
"prior": z_prior,
"link": link_func
}
],
)
else:
model = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": v_form,
"prior": v_prior,
"link": link_func
},
{
"name": "a",
"formula": a_form,
"prior": a_prior,
"link": link_func
},
{
"name": "z",
"formula": z_form,
"prior": z_prior,
"link": link_func
},
{
"name": "t",
"formula": t_form,
"prior": t_prior,
"link": link_func
}
],
)
#--------------------------------------Fit the model--------------------------------###
if run=='sample':
#fit the model
infer_data_ddm = model.sample(sampler="nuts_numpyro", chains=4, cores=ncores, draws=samples, tune=burnin,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
az.to_netcdf(infer_data_ddm,outdir+'sample_' + str(burnin) + '_' + str(samples) + 'TA_' + str(TA) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_' + str(model_type) + 'regress_' + str(regressor) + '_t-strat_' + str(tstrat) + '.nc4')
az.plot_trace(
infer_data_ddm,
var_names="~log_likelihood", # we exclude the log_likelihood traces here
)
plt.savefig(outdir+'posterior_diagnostic_' + str(burnin) + '_' + str(samples) + '_TA_' + str(TA) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_' + str(model_type) + 'regress_' + str(regressor) + '_t-strat_' + str(tstrat) + '.png')
res_sum=az.summary(model.traces)
res_sum.to_csv(outdir+'summary_' + str(burnin) + '_' + str(samples) + 'TA_' + str(TA) + '_trace_ParamInbound_ddm_simple_NutsNumpyro_' + str(model_type) + 'regress_' + str(regressor) + '_t-strat_' + str(tstrat) + '.csv')
elif run=='prior_predict':
#HSSM prior predict method
prior_predict=model.sample_prior_predictive(draws=1000,omit_offsets=False)
az.to_netcdf(prior_predict,outdir+'prior_predict_ddm_simple_' + str(model_type) + 'regress_' + str(regressor) + '_t-strat_' + str(tstrat) + '.nc4')
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFA6TWFECKZDT3OGVKDZKLQFLAVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDEMBTHEZDENJYGU>
.
You are receiving this because you were mentioned.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
To answer your first question, yes it must have been a typo in my first
message re: prior on a _intercept - sorry about that. But yes we do
specify it in terms of mu and sigma in hddm so i just imported that
prior over, and this should still produce only positive values as it is a
gamma distribution (the mean of a gamma is alpha/beta and sigma^2 is
alpha/beta^2). So this prior should look like:
[image: image.png]
But you are right that when you look at all the priors for a across
subjects it could include negative values because there are offsets that
come from a normal distribution (half normal is just for the sigma of that
normal). In theory these negative proposed values should still just be
rejected and not that common (depending on sampling from the low end of the
intercept and negative values of the offset) but I suppose it is possible
that this also gives some gradient issues occasionally. We will
investigate.
Re: your real dataset, it is hard to diagnose from afar. The main thing i
notice in the real data is that responses are equally distributed and the v
intercept is near 0 - possibly if subjects are around chance accuracy there
might be some collinearity in the parameters that can explain the
combination of choices and RTs which then makes convergence difficult if
that collinearity is not of a simple form.
* You could try recovering a model from synthetic data where the parameters
simulated in the data are from the posterior means and sigmas' you
estimate from the real data (even if it is not converging - the mean across
the chains should be reasonable).
* You could try fitting a simpler model without v-x to see if that works
* You might be having some issues with the funnel in hierarchical models
and you could try using a centered parameterization instead of non-centered
(which is what we use by default, because it can often help
<https://twiecki.io/blog/2017/02/08/bayesian-hierchical-non-centered/> but
there is no free lunch and sometimes centered ones might work better (Alex
has a reference for that), and in practice there are a couple of times i've
seen it work better (this is also what we did by default in hddm, only
later making non-centered an option).
If you try the centered version you have to specify "noncentered=False" in
your model description, and then instead of modeling regressions as e.g. v
~ 1+ x + (1+x|subj_idx) you would instead remove the intercepts but use a
hyperprior on mu for the random effects e.g. for v:. (you can replace the
mu's and sigma's to accord with your centered version, ie where the
hyperprior for mu should correspond to the prior for the intercept, the
hyperprior for x|subj_idx should the the prior for the common effect of x,
etc but the sigma's for each param are the same).
"formula": "v ~ 0 +(1+x|subj_idx)",
"prior": {
"1|subj_idx": {"name": "Normal",
"mu": {"name": "Normal",
"mu": 1,
"sigma": 2, "initval": 0.5},
"sigma": {"name": "HalfNormal",
"sigma": 1, "initval": 0.5},
}, "x|subj_idx": {"name":
"Normal",
"mu": {"name": "Normal", "mu":
0.5, "sigma": 0.5, "initval": 0.1},
"sigma": {"name": "HalfNormal",
"sigma": .2, "initval": 0.1},
},
},
},
then to look at a given group level parameter, instead of looking at the
intercept you would look at e.g. *v_x|subj_idx_mu*
and then you would do the same for the other parameters
…On Tue, Jul 2, 2024 at 11:34 PM hyang336 ***@***.***> wrote:
With updated priors on a and z, as well as generating a further away from
0, the simulation now converges well with only ~100 divergence out of 4
chains of 5000 samples. But on a real dataset (with all RT > 0.2 s), the
sampler still struggles to converge when random effect of t is included.
Distribution of RT in read data
RT_distribution_median-binarized.png (view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/220655d5-298c-4adf-8151-54b0b582fde0>
Posterior of model fit
posterior_diagnostic_10000_10000_TA_0.8_trace_median-binarized_t-strat_null_recent_v_on_PrC_neg.png
(view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/42b2cd61-22bd-4d8b-bd94-c847ee440c52>
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFDDEHI63W3KDO5RO3LZKNWK5AVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDEMBVGAYTANRRGY>
.
You are receiving this because you were mentioned.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
Thanks I agree that 0 drift rate per se is not a problem - indeed we know
can recover that from synthetic data when the data are generated by the ddm
and the other parameters are independent. My point was that there are some
data regimes in which there might be collinearity e.g. higher v might be
associated with lower a and/or lower t and if the posterior across these
parameters is multimodal then it can lead to convergence issues (for any
MCMC approach). And re: whether your real data are well captured by the DDM
you can look at posterior predictives to see how well the model can capture
your RT distributions/choice proportions (and ultimately how they vary with
neural activity but even without that just to see if the behavior is well
fit).
Re: random effects on *t *impacting convergence - this could again relate
to the funnel, and/or these random effects could be collinear with other
parameters. You could look at posterior pair plots to investigate these
collinearities (at least among any two parameters, at both group and
individual levels). You can also plot the divergences on those plots which
might give some insights into where they occur -- but when there are so
many this won't be helpful.
also just looked at your converged posteriors and there is one peculiarity
where *t* estimated across the group is hitting a hard upper bound of
0.542... which is odd?
…On Wed, Jul 3, 2024 at 2:47 PM hyang336 ***@***.***> wrote:
Thanks for these insights. I will try parameter recovery using posteriors
of real data, and centered-parameterization. Regarding the near 0 intercept
of v in my real data, it is from a task in which accuracy is not well
defined. So the two boundaries represent two types of responses and the
research question is whether certain neural correlates are involved in the
decision process (e.g., trials with higher neural activity are more likely
to have faster responses of one type). I know this deviates from the
typical usage of DDM, but this seems to me just a case of stimulus coding
the responses and the DDM should be able to handle that.
In addition:
1. when generating simulated data with a v_intercept of 0, the model
(with random intercept on t) converged and parameter recovery was
acceptable. This suggests that the near 0 value of v is unlikely to cause
convergence issue.
2. when fitting the same data without random effect on t, the traces
look much better and R-hats are <=1.05, despite all samples being divergent
(4 chains of 10000 samples, 40000 divergence).
posterior_diagnostic_10000_10000_TA_0.8_trace_median-binarized_t-strat_norandom_recent_v_on_PrC_neg.png
(view on web)
<https://github.com/lnccbrown/HSSM/assets/26169163/3094328e-1d8d-4fae-b504-06841d778574>
—
Reply to this email directly, view it on GitHub
<#471 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAG7TFABX76G4MOQLTMPREDZKRBL5AVCNFSM6AAAAABJWLSG4WVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDEMBWHE3TOOBXGU>
.
You are receiving this because you were mentioned.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Just a short update: I have since tried a few different models with this data (e.g. linking the regressor to different parameters), but all of them have the t hitting the bound at 0.542. The RTs in this data is certainly longer than that so I am not sure what is causing that... |
Beta Was this translation helpful? Give feedback.
-
Describe the bug
When fitting simulated data, all samples are divergent, and it returns the arviz warning that "Your data appears to have a single value or no finite values". Am I missing something obvious?
HSSM version
0.2.2
To Reproduce
`
v_slope=0.45
a_slope=0.3
z_slope=0.1
t_slope=0.2
v_intercept=0
a_intercept=2
z_intercept=0
t_intercept=0.05
n_subjects=30 #number of subjects
n_trials=200 #number of trials per subject
param_sv=0.2 #standard deviation of the subject-level parameters
subject_params={
"v": np.array([]),
"a": np.array([]),
"z": np.array([]),
"t": np.array([]),
"simneural": np.array([]),
"subID": np.array([])
}
sim_data=[]
for i in range(n_subjects):
sim_data_concat=pd.concat(sim_data)
model_ddm_null = hssm.HSSM(
data=sim_data_concat,
prior_settings="safe",
include=[
{
"name": "v",
"formula": "v ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "a",
"formula": "a ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "z",
"formula": "z ~ 1 + (1|subID)",
"link": "identity"
},
{
"name": "t",
"formula": "t ~ 1 + (1|subID)",
"link": "identity"
}
],
)
infer_data_ddm_null = model_ddm_null.sample(sampler="nuts_numpyro", chains=4, cores=4, draws=5000, tune=5000,idata_kwargs = {'log_likelihood': True}, target_accept=TA)
az.to_netcdf(infer_data_ddm_null,outdir+'sample_' + str(5000) + '' + str(5000) + 'trace_ParamInbound_ddm_NutsNumpyro_null.nc4')
az.plot_trace(
infer_data_ddm_null,
var_names="~log_likelihood",
)
plt.savefig(outdir+'posterior_diagnostic' + str(5000) + '' + str(5000) + '_trace_ParamInbound_ddm_NutsNumpyro_null.png')
res_sum_null=az.summary(model_ddm_null.traces)
res_sum_null.to_csv(outdir+'summary_' + str(5000) + '_' + str(5000) + '_trace_ParamInbound_ddm_NutsNumpyro_null.csv')
`
Screenshots
Additional context
Beta Was this translation helpful? Give feedback.
All reactions