Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1417 - implicitly modify to str dtype on check #1418

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import h5py
import numpy as np
import pandas as pd
from pandas.api.types import is_string_dtype
from scipy import sparse

import anndata as ad
Expand All @@ -19,7 +20,7 @@
from anndata._core.merge import intersect_keys
from anndata._core.sparse_dataset import CSCDataset, CSRDataset, sparse_dataset
from anndata._io.utils import H5PY_V3, check_key
from anndata._warnings import OldFormatWarning
from anndata._warnings import ImplicitModificationWarning, OldFormatWarning
from anndata.compat import (
AwkArray,
CupyArray,
Expand Down Expand Up @@ -678,14 +679,25 @@ def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
group.attrs["column-order"] = col_names

if df.index.name is not None:
if df.index.name in col_names and not pd.Series(
df.index, index=df.index
).equals(df[df.index.name]):
raise ValueError(
f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
"whose values are different. This is not supported. Please make sure "
"the values are the same, or use a different name."
)

if df.index.name in col_names:

index_values = pd.Series(df.index, index=df.index)

df_values = df[df.index.name]

# This logic is required to mirror anndata/_core/aligned_df.py:_gen_dataframe_df
if not is_string_dtype(df_values):
warn("Transforming to str index.", ImplicitModificationWarning)
df_values = df_values.astype(str)

if not index_values.equals(df_values):
raise ValueError(
f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
"whose values are different. This is not supported. Please make sure "
"the values are the same, or use a different name."
)

index_name = df.index.name
else:
index_name = "_index"
Expand Down
Loading