Source code for abracudabra.conversion.cframe

"""Convert to a pandas/cudf series or dataframe."""

from __future__ import annotations

import contextlib
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any

from ..device.base import Device
from ..device.conversion import to_device
from ..device.library import get_pd_or_cudf
from ..device.query import guess_device
from .carray import to_array

if TYPE_CHECKING:
    from collections.abc import Iterable

    from torch import Tensor

    from ..annotations import Array, DataFrame, Series


[docs] def _guess_dataframe_device( sequences: Iterable[Tensor | Array], /, device: str | Device | None = None, ) -> Device: """Guess the device of a dataframe.""" if device is not None: return Device.parse(device) return guess_device(*sequences, skip_unknown=True)
[docs] def to_series( sequence: object, /, index: Array | Tensor | None = None, device: str | Device | None = None, *, strict: bool = False, **kwargs: Any, ) -> Series: """Convert an array or tensor to a pandas/cudf series. Args: sequence: The array or tensor to convert. index: The optional index for the series. device: The device to use for the series. strict: Whether to raise an error for unknown data types. kwargs: Additional keyword arguments for the series. Returns: The converted series. """ device = _guess_dataframe_device([sequence], device=device) array = to_array(sequence, device=device, strict=strict) if index is not None: # Try to move the index to the same device as the array # If it fails, just pass it as is, and let pandas/cudf handle it with contextlib.suppress(TypeError): index = to_device(index, device=device) pdf_or_cudf = get_pd_or_cudf(device.type) return pdf_or_cudf.Series(array, index=index, **kwargs) # type: ignore[arg-type]
[docs] def to_dataframe( data: Mapping[str, Array | Tensor] | Tensor | Array, /, index: Array | Tensor | None = None, device: str | Device | None = None, *, strict: bool = False, **kwargs: Any, ) -> DataFrame: """Convert to a pandas/cudf dataframe. Args: data: The data to convert. If a mapping, the keys will be used as column names. index: The optional index for the dataframe. device: The device to use for the dataframe. strict: Whether to raise an error for unknown data types. kwargs: Additional keyword arguments for the dataframe. Returns: The converted dataframe. """ device = _guess_dataframe_device( data.values() if isinstance(data, Mapping) else [data], device=device, ) if isinstance(data, Mapping): data = { key: to_array(value, device=device, strict=strict) for key, value in data.items() } else: data = to_array(data, device=device, strict=strict) if index is not None: with contextlib.suppress(TypeError): index = to_device(index, device=device) df_or_cudf = get_pd_or_cudf(device.type) return df_or_cudf.DataFrame(data, index=index, **kwargs)