Source code for abracudabra.conversion.cframe

"""Convert to a Pandas/cuDF series or dataframe."""

from __future__ import annotations

import contextlib
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any

from ..device.base import Device
from ..device.conversion import to_device
from ..device.library import get_pd_or_cudf
from ..device.query import guess_device
from .carray import to_array

if TYPE_CHECKING:
    from collections.abc import Iterable

    from torch import Tensor

    from .._annotations import Array, DataFrame, Series


def _guess_dataframe_device(
    sequences: Iterable[Tensor | Array],
    /,
    device: str | Device | None = None,
) -> Device:
    """Guess the device of a dataframe."""
    if device is not None:
        return Device.parse(device)

    return guess_device(*sequences, skip_unknown=True)



[docs]
def to_series(
    sequence: object,
    /,
    index: Array | Tensor | None = None,
    device: str | Device | None = None,
    *,
    strict: bool = False,
    **kwargs: Any,
) -> Series:
    r"""Convert an array or tensor to a Pandas/cuDF series.

    Args:
        sequence: The array or tensor to convert.
        index: The optional index for the series.
        device: The device to use for the series. If not provided, the array stays
            on the same device.
        strict: Whether to raise an error if the sequence is not
            a NumPy/CuPy array or Torch tensor.
        **kwargs: Additional keyword arguments for the series.

    Returns:
        The converted series.

    Examples:
        Convert a list to a CuPy series

        >>> series = to_series([10, 20, 30], device="cuda")
        >>> print(type(series))
        <class 'cudf.core.series.Series'>

        Convert a CuPy array to a cuDF series

        >>> import cupy as cp
        >>> cupy_array = cp.array([40, 50, 60])
        >>> series = to_series(cupy_array)
        >>> print(type(series))
        <class 'cudf.core.series.Series'>

    """
    device = _guess_dataframe_device([sequence], device=device)
    array = to_array(sequence, device=device, strict=strict)

    if index is not None:
        # Try to move the index to the same device as the array
        # If it fails, just pass it as is, and let Pandas/cuDF handle it
        with contextlib.suppress(TypeError):
            index = to_device(index, device=device)

    pdf_or_cudf = get_pd_or_cudf(device.type)
    return pdf_or_cudf.Series(array, index=index, **kwargs)  # type: ignore[arg-type]




[docs]
def to_dataframe(
    data: Mapping[str, Array | Tensor] | Tensor | Array,
    /,
    index: Array | Tensor | None = None,
    device: str | Device | None = None,
    *,
    strict: bool = False,
    **kwargs: Any,
) -> DataFrame:
    r"""Convert to a Pandas/cuDF dataframe.

    Args:
        data: The data to convert. If a mapping, the keys will be used as column names.
        index: The optional index for the dataframe.
        device: The device to use for the dataframe. If not provided,
            the type is guessed from the data.
        strict: Whether to raise an error if the provided data does not consist of
            NumPy/CuPy arrays or Torch tensors.
        **kwargs: Additional keyword arguments for the dataframe.

    Returns:
        The converted dataframe.

    Examples:
        Build a dataframe from mixed data types

        >>> import cupy as cp
        >>> import numpy as np
        >>> import torch

        >>> numpy_array = np.full((5,), 1, dtype=np.float32)
        >>> cupy_array = cp.full((5,), 2, dtype=cp.int8)
        >>> torch_tensor = torch.full((5,), 3, dtype=torch.float32, device="cuda:0")
        >>> dataframe = to_dataframe(
        ...     {"numpy": numpy_array, "cupy": cupy_array, "torch": torch_tensor},
        ...     device="cuda:0",
        ... )
        >>> print(dataframe)
        numpy  cupy  torch
        0    1.0     2    3.0
        1    1.0     2    3.0
        2    1.0     2    3.0
        3    1.0     2    3.0
        4    1.0     2    3.0
        >>> print(type(dataframe))
        <class 'cudf.core.dataframe.DataFrame'>

    """
    device = _guess_dataframe_device(
        data.values() if isinstance(data, Mapping) else [data],
        device=device,
    )

    if isinstance(data, Mapping):
        data = {
            key: to_array(value, device=device, strict=strict)
            for key, value in data.items()
        }
    else:
        data = to_array(data, device=device, strict=strict)

    if index is not None:
        with contextlib.suppress(TypeError):
            index = to_device(index, device=device)

    df_or_cudf = get_pd_or_cudf(device.type)
    return df_or_cudf.DataFrame(data, index=index, **kwargs)