Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GLM(alpha_search=True, l1_ratio=1, family="binomial").fit(X, np.ones(n)) raises uninformative error message #874

Closed
mlondschien opened this issue Oct 31, 2024 · 0 comments · Fixed by #880

Comments

@mlondschien
Copy link
Contributor

import numpy as np
import tabmat
from glum import GeneralizedLinearRegressor

rng = np.random.default_rng(0)
X = rng.uniform(size=(1000, 1))
y = np.ones(1000)

GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)

raises

Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:35:25) [Clang 16.0.6 ]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.14.0 -- An enhanced Interactive Python. Type '?' for help.

In [1]: import numpy as np
   ...: import tabmat
   ...: from glum import GeneralizedLinearRegressor
   ...: 
   ...: rng = np.random.default_rng(0)
   ...: X = rng.uniform(size=(1000, 1))
   ...: y = np.ones(1000)
   ...: 
   ...: GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_distribution.py:1561: RuntimeWarning: divide by zero encountered in log
  log_odds = np.log(avg_y) - np.log(1 - avg_y)
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_link.py:241: UserWarning: Sigmoid function too close to 0 or 1. Clipping.
  warnings.warn("Sigmoid function too close to 0 or 1. Clipping.")
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1007: RuntimeWarning: divide by zero encountered in log
  np.log(max_alpha),
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1008: RuntimeWarning: divide by zero encountered in log
  np.log(min_alpha),
/Users/mlondschien/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/numpy/_core/function_base.py:145: RuntimeWarning: invalid value encountered in subtract
  delta = np.subtract(stop, start, dtype=type(dt))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 9
      6 X = rng.uniform(size=(1000, 1))
      7 y = np.ones(1000)
----> 9 GeneralizedLinearRegressor(alpha_search=True, l1_ratio=1, family="binomial").fit(X, y)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:3206, in GeneralizedLinearRegressor.fit(self, X, y, sample_weight, offset, store_covariance_matrix, clusters, weights_sum, context)
   3201     if self.min_alpha is not None or self.min_alpha_ratio is not None:
   3202         warnings.warn(
   3203             "`alpha` is set. Ignoring `min_alpha` and `min_alpha_ratio`."
   3204         )
-> 3206 coef = self._solve_regularization_path(
   3207     X=X,
   3208     y=y,
   3209     sample_weight=sample_weight,
   3210     P2_no_alpha=P2_no_alpha,
   3211     P1_no_alpha=P1_no_alpha,
   3212     alphas=self._alphas,
   3213     coef=coef,
   3214     offset=offset,
   3215     lower_bounds=lower_bounds,
   3216     upper_bounds=upper_bounds,
   3217     A_ineq=A_ineq,
   3218     b_ineq=b_ineq,
   3219 )
   3221 # intercept_ and coef_ return the last estimated alpha
   3222 if self.fit_intercept:

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1178, in GeneralizedLinearRegressorBase._solve_regularization_path(self, X, y, sample_weight, alphas, P2_no_alpha, P1_no_alpha, coef, offset, lower_bounds, upper_bounds, A_ineq, b_ineq)
   1175     P1 = P1_no_alpha * alpha
   1176     P2 = P2_no_alpha * alpha
-> 1178     coef = self._solve(
   1179         X=X,
   1180         y=y,
   1181         sample_weight=sample_weight,
   1182         P2=P2,
   1183         P1=P1,
   1184         coef=coef,
   1185         offset=offset,
   1186         lower_bounds=lower_bounds,
   1187         upper_bounds=upper_bounds,
   1188         A_ineq=A_ineq,
   1189         b_ineq=b_ineq,
   1190     )
   1192     self.coef_path_[k, :] = coef
   1194 return self.coef_path_

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_glm.py:1113, in GeneralizedLinearRegressorBase._solve(self, X, y, sample_weight, P2, P1, coef, offset, lower_bounds, upper_bounds, A_ineq, b_ineq)
   1111     # 4.2 coordinate descent ##############################################
   1112     elif self._solver == "irls-cd":
-> 1113         coef, self.n_iter_, self._n_cycles, self.diagnostics_ = _irls_solver(
   1114             _cd_solver, coef, irls_data
   1115         )
   1116 # 4.3 L-BFGS ##########################################################
   1117 elif self._solver == "lbfgs":

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:279, in _irls_solver(inner_solver, coef, data)
    229 """
    230 Solve GLM with L1 and L2 penalty by IRLS.
    231 
   (...)
    275 https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
    276 """
    277 state = IRLSState(coef, data)
--> 279 state.eta, state.mu, state.obj_val, coef_P2 = _update_predictions(
    280     state, data, state.coef
    281 )
    282 state.gradient_rows, state.score, state.hessian_rows = update_quadratic(
    283     state, data, coef_P2
    284 )
    285 (
    286     state.converged,
    287     state.norm_min_subgrad,
    288     state.max_min_subgrad,
    289     state.inner_tol,
    290 ) = check_convergence(state, data)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:629, in _update_predictions(state, data, coef, X_dot_step, factor)
    627 if X_dot_step is None:
    628     X_dot_step = _safe_lin_pred(data.X, coef, data.offset)
--> 629 return eta_mu_objective(
    630     data.family,
    631     data.link,
    632     X_dot_step,
    633     factor,
    634     coef,
    635     state.eta,
    636     data.y,
    637     data.sample_weight,
    638     data.P1,
    639     data.P2,
    640     data.intercept_offset,
    641 )

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/glum/_solvers.py:662, in eta_mu_objective(family, link, X_dot_step, factor, coef, cur_eta, y, sample_weight, P1, P2, intercept_offset)
    658 eta, mu, deviance = family.eta_mu_deviance(
    659     link, factor, cur_eta, X_dot_step, y, sample_weight
    660 )
    661 obj_val = 0.5 * deviance
--> 662 obj_val += linalg.norm(P1 * coef[intercept_offset:], ord=1)
    663 coef_P2 = _make_coef_P2(intercept_offset, P2, coef)
    664 obj_val += 0.5 * (coef_P2 @ coef)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/scipy/linalg/_misc.py:146, in norm(a, ord, axis, keepdims, check_finite)
    144 # Differs from numpy only in non-finite handling and the use of blas.
    145 if check_finite:
--> 146     a = np.asarray_chkfinite(a)
    147 else:
    148     a = np.asarray(a)

File ~/mambaforge/envs/icu-experiments/lib/python3.10/site-packages/numpy/lib/_function_base_impl.py:649, in asarray_chkfinite(a, dtype, order)
    647 a = asarray(a, dtype=dtype, order=order)
    648 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 649     raise ValueError(
    650         "array must not contain infs or NaNs")
    651 return a

ValueError: array must not contain infs or NaNs

Essentially, guess_intercept returns inf here. I am not sure what "correct" behaviour would look like, but maybe a more informative error message?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant