`GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n))` raises uninformative error message #874

mlondschien · 2024-10-31T13:20:08Z

import numpy as np
import tabmat
from glum import GeneralizedLinearRegressor

rng = np.random.default_rng(0)
X = rng.uniform(size=(1000, 1))
y = np.ones(1000)

GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)

raises

Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:35:25) [Clang 16.0.6 ]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.14.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: import numpy as np
   ...: import tabmat
   ...: from glum import GeneralizedLinearRegressor
   ...: 
   ...: rng = np.random.default_rng(0)
   ...: X = rng.uniform(size=(1000, 1))
   ...: y = np.ones(1000)
   ...: 
   ...: GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_distribution.py:1561: RuntimeWarning: divide by zero encountered in log
  log_odds = np.log(avg_y) - np.log(1 - avg_y)
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_link.py:241: UserWarning: Sigmoid function too close to 0 or 1. Clipping.
  warnings.warn("Sigmoid function too close to 0 or 1. Clipping.")
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1007: RuntimeWarning: divide by zero encountered in log
  np.log(max_alpha),
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1008: RuntimeWarning: divide by zero encountered in log
  np.log(min_alpha),
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/numpy/_core/function_base.py:145: RuntimeWarning: invalid value encountered in subtract
  delta = np.subtract(stop, start, dtype=type(dt))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 9
      6 X = rng.uniform(size=(1000, 1))
      7 y = np.ones(1000)
----> 9 GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:3206, in GeneralizedLinearRegressor.fit(self, X, y, sample_weight, offset, store_covariance_matrix, clusters, weights_sum, context)
   3201     if self.min_alpha is not None or self.min_alpha_ratio is not None:
   3202         warnings.warn(
   3203             "`alpha` is set. Ignoring `min_alpha` and `min_alpha_ratio`."
   3204         )
-> 3206 coef = self._solve_regularization_path(
   3207     X=X,
   3208     y=y,
   3209     sample_weight=sample_weight,
   3210     P2_no_alpha=P2_no_alpha,
   3211     P1_no_alpha=P1_no_alpha,
   3212     alphas=self._alphas,
   3213     coef=coef,
   3214     offset=offset,
   3215     lower_bounds=lower_bounds,
   3216     upper_bounds=upper_bounds,
   3217     A_ineq=A_ineq,
   3218     b_ineq=b_ineq,
   3219 )
   3221 # intercept_ and coef_ return the last estimated alpha
   3222 if self.fit_intercept:

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1178, in GeneralizedLinearRegressorBase._solve_regularization_path(self, X, y, sample_weight, alphas, P2_no_alpha, P1_no_alpha, coef, offset, lower_bounds, upper_bounds, A_ineq, b_ineq)
   1175     P1 = P1_no_alpha * alpha
   1176     P2 = P2_no_alpha * alpha
-> 1178     coef = self._solve(
   1179         X=X,
   1180         y=y,
   1181         sample_weight=sample_weight,
   1182         P2=P2,
   1183         P1=P1,
   1184         coef=coef,
   1185         offset=offset,
   1186         lower_bounds=lower_bounds,
   1187         upper_bounds=upper_bounds,
   1188         A_ineq=A_ineq,
   1189         b_ineq=b_ineq,
   1190     )
   1192     self.coef_path_[k, :] = coef
   1194 return self.coef_path_

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1113, in GeneralizedLinearRegressorBase._solve(self, X, y, sample_weight, P2, P1, coef, offset, lower_bounds, upper_bounds, A_ineq, b_ineq)
   1111     # 4.2 coordinate descent ##############################################
   1112     elif self._solver == "irls-cd":
-> 1113         coef, self.n_iter_, self._n_cycles, self.diagnostics_ = _irls_solver(
   1114             _cd_solver, coef, irls_data
   1115         )
   1116 # 4.3 L-BFGS ##########################################################
   1117 elif self._solver == "lbfgs":

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:279, in _irls_solver(inner_solver, coef, data)
    229 """
    230 Solve GLM with L1 and L2 penalty by IRLS.
    231 
   (...)
    275 https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
    276 """
    277 state = IRLSState(coef, data)
--> 279 state.eta, state.mu, state.obj_val, coef_P2 = _update_predictions(
    280     state, data, state.coef
    281 )
    282 state.gradient_rows, state.score, state.hessian_rows = update_quadratic(
    283     state, data, coef_P2
    284 )
    285 (
    286     state.converged,
    287     state.norm_min_subgrad,
    288     state.max_min_subgrad,
    289     state.inner_tol,
    290 ) = check_convergence(state, data)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:629, in _update_predictions(state, data, coef, X_dot_step, factor)
    627 if X_dot_step is None:
    628     X_dot_step = _safe_lin_pred(data.X, coef, data.offset)
--> 629 return eta_mu_objective(
    630     data.family,
    631     data.link,
    632     X_dot_step,
    633     factor,
    634     coef,
    635     state.eta,
    636     data.y,
    637     data.sample_weight,
    638     data.P1,
    639     data.P2,
    640     data.intercept_offset,
    641 )

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:662, in eta_mu_objective(family, link, X_dot_step, factor, coef, cur_eta, y, sample_weight, P1, P2, intercept_offset)
    658 eta, mu, deviance = family.eta_mu_deviance(
    659     link, factor, cur_eta, X_dot_step, y, sample_weight
    660 )
    661 obj_val = 0.5 * deviance
--> 662 obj_val += linalg.norm(P1 * coef[intercept_offset:], ord=1)
    663 coef_P2 = _make_coef_P2(intercept_offset, P2, coef)
    664 obj_val += 0.5 * (coef_P2 @ coef)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/scipy/linalg/_misc.py:146, in norm(a, ord, axis, keepdims, check_finite)
    144 # Differs from numpy only in non-finite handling and the use of blas.
    145 if check_finite:
--> 146     a = np.asarray_chkfinite(a)
    147 else:
    148     a = np.asarray(a)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/numpy/lib/_function_base_impl.py:649, in asarray_chkfinite(a, dtype, order)
    647 a = asarray(a, dtype=dtype, order=order)
    648 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 649     raise ValueError(
    650         "array must not contain infs or NaNs")
    651 return a

ValueError: array must not contain infs or NaNs

Essentially, guess_intercept returns inf here. I am not sure what "correct" behaviour would look like, but maybe a more informative error message?

The text was updated successfully, but these errors were encountered:

lbittarello mentioned this issue Nov 8, 2024

Update change log for release #880

Merged

lbittarello closed this as completed in #880 Nov 11, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n))` raises uninformative error message #874

`GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n))` raises uninformative error message #874

mlondschien commented Oct 31, 2024

GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n)) raises uninformative error message #874

GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n)) raises uninformative error message #874

Comments

mlondschien commented Oct 31, 2024

`GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n))` raises uninformative error message #874

`GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n))` raises uninformative error message #874