Source code for pyathena.pandas.converter

# -*- coding: utf-8 -*-
from __future__ import annotations

import logging
from copy import deepcopy
from typing import Any, Callable, Dict, Optional, Type

from pyathena.converter import (
    Converter,
    _to_binary,
    _to_boolean,
    _to_decimal,
    _to_default,
    _to_json,
)

_logger = logging.getLogger(__name__)  # type: ignore


_DEFAULT_PANDAS_CONVERTERS: Dict[str, Callable[[Optional[str]], Optional[Any]]] = {
    "boolean": _to_boolean,
    "decimal": _to_decimal,
    "varbinary": _to_binary,
    "json": _to_json,
}



[docs]
class DefaultPandasTypeConverter(Converter):
    """Optimized type converter for pandas DataFrame results.

    This converter is specifically designed for the PandasCursor and provides
    optimized type conversion that works well with pandas data types.
    It minimizes conversions for types that pandas handles efficiently
    and only converts complex types that need special handling.

    The converter focuses on:
        - Preserving numeric types for pandas optimization
        - Converting only complex types (json, binary, etc.)
        - Maintaining compatibility with pandas data type inference

    Example:
        >>> from pyathena.pandas.converter import DefaultPandasTypeConverter
        >>> converter = DefaultPandasTypeConverter()
        >>>
        >>> # Used automatically by PandasCursor
        >>> cursor = connection.cursor(PandasCursor)
        >>> # converter is applied automatically to results

    Note:
        This converter is used by default in PandasCursor.
        Most users don't need to instantiate it directly.
    """


[docs]
    def __init__(self) -> None:
        super().__init__(
            mappings=deepcopy(_DEFAULT_PANDAS_CONVERTERS),
            default=_to_default,
            types=self._dtypes,
        )


    @property
    def _dtypes(self) -> Dict[str, Type[Any]]:
        if not hasattr(self, "__dtypes"):
            import pandas as pd

            self.__dtypes = {
                "tinyint": pd.Int64Dtype(),
                "smallint": pd.Int64Dtype(),
                "integer": pd.Int64Dtype(),
                "bigint": pd.Int64Dtype(),
                "float": float,
                "real": float,
                "double": float,
                "char": str,
                "varchar": str,
                "string": str,
                "array": str,
                "map": str,
                "row": str,
            }
        return self.__dtypes


[docs]
    def convert(self, type_: str, value: Optional[str]) -> Optional[Any]:
        pass





[docs]
class DefaultPandasUnloadTypeConverter(Converter):
    """Type converter for pandas UNLOAD operations.

    This converter is designed for use with UNLOAD queries that write
    results directly to Parquet files in S3. Since UNLOAD operations
    bypass the normal conversion process and write data in native
    Parquet format, this converter has minimal functionality.

    Note:
        Used automatically when PandasCursor is configured with unload=True.
        UNLOAD results are read directly as DataFrames from Parquet files.
    """


[docs]
    def __init__(self) -> None:
        super().__init__(
            mappings={},
            default=_to_default,
        )



[docs]
    def convert(self, type_: str, value: Optional[str]) -> Optional[Any]:
        pass