"""Convert to a Pandas/cuDF series or dataframe."""
from __future__ import annotations
import contextlib
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any
from ..device.base import Device
from ..device.conversion import to_device
from ..device.library import get_pd_or_cudf
from ..device.query import guess_device
from .carray import to_array
if TYPE_CHECKING:
from collections.abc import Iterable
from torch import Tensor
from .._annotations import Array, DataFrame, Series
def _guess_dataframe_device(
sequences: Iterable[Tensor | Array],
/,
device: str | Device | None = None,
) -> Device:
"""Guess the device of a dataframe."""
if device is not None:
return Device.parse(device)
return guess_device(*sequences, skip_unknown=True)
[docs]
def to_series(
sequence: object,
/,
index: Array | Tensor | None = None,
device: str | Device | None = None,
*,
strict: bool = False,
**kwargs: Any,
) -> Series:
r"""Convert an array or tensor to a Pandas/cuDF series.
Args:
sequence: The array or tensor to convert.
index: The optional index for the series.
device: The device to use for the series. If not provided, the array stays
on the same device.
strict: Whether to raise an error if the sequence is not
a NumPy/CuPy array or Torch tensor.
**kwargs: Additional keyword arguments for the series.
Returns:
The converted series.
Examples:
Convert a list to a CuPy series
>>> series = to_series([10, 20, 30], device="cuda")
>>> print(type(series))
<class 'cudf.core.series.Series'>
Convert a CuPy array to a cuDF series
>>> import cupy as cp
>>> cupy_array = cp.array([40, 50, 60])
>>> series = to_series(cupy_array)
>>> print(type(series))
<class 'cudf.core.series.Series'>
"""
device = _guess_dataframe_device([sequence], device=device)
array = to_array(sequence, device=device, strict=strict)
if index is not None:
# Try to move the index to the same device as the array
# If it fails, just pass it as is, and let Pandas/cuDF handle it
with contextlib.suppress(TypeError):
index = to_device(index, device=device)
pdf_or_cudf = get_pd_or_cudf(device.type)
return pdf_or_cudf.Series(array, index=index, **kwargs) # type: ignore[arg-type]
[docs]
def to_dataframe(
data: Mapping[str, Array | Tensor] | Tensor | Array,
/,
index: Array | Tensor | None = None,
device: str | Device | None = None,
*,
strict: bool = False,
**kwargs: Any,
) -> DataFrame:
r"""Convert to a Pandas/cuDF dataframe.
Args:
data: The data to convert. If a mapping, the keys will be used as column names.
index: The optional index for the dataframe.
device: The device to use for the dataframe. If not provided,
the type is guessed from the data.
strict: Whether to raise an error if the provided data does not consist of
NumPy/CuPy arrays or Torch tensors.
**kwargs: Additional keyword arguments for the dataframe.
Returns:
The converted dataframe.
Examples:
Build a dataframe from mixed data types
>>> import cupy as cp
>>> import numpy as np
>>> import torch
>>> numpy_array = np.full((5,), 1, dtype=np.float32)
>>> cupy_array = cp.full((5,), 2, dtype=cp.int8)
>>> torch_tensor = torch.full((5,), 3, dtype=torch.float32, device="cuda:0")
>>> dataframe = to_dataframe(
... {"numpy": numpy_array, "cupy": cupy_array, "torch": torch_tensor},
... device="cuda:0",
... )
>>> print(dataframe)
numpy cupy torch
0 1.0 2 3.0
1 1.0 2 3.0
2 1.0 2 3.0
3 1.0 2 3.0
4 1.0 2 3.0
>>> print(type(dataframe))
<class 'cudf.core.dataframe.DataFrame'>
"""
device = _guess_dataframe_device(
data.values() if isinstance(data, Mapping) else [data],
device=device,
)
if isinstance(data, Mapping):
data = {
key: to_array(value, device=device, strict=strict)
for key, value in data.items()
}
else:
data = to_array(data, device=device, strict=strict)
if index is not None:
with contextlib.suppress(TypeError):
index = to_device(index, device=device)
df_or_cudf = get_pd_or_cudf(device.type)
return df_or_cudf.DataFrame(data, index=index, **kwargs)