Source code for pyathena.pandas.converter
# -*- coding: utf-8 -*-
from __future__ import annotations
import logging
from copy import deepcopy
from typing import Any, Callable, Dict, Optional, Type
from pyathena.converter import (
Converter,
_to_binary,
_to_boolean,
_to_decimal,
_to_default,
_to_json,
)
_logger = logging.getLogger(__name__) # type: ignore
_DEFAULT_PANDAS_CONVERTERS: Dict[str, Callable[[Optional[str]], Optional[Any]]] = {
"boolean": _to_boolean,
"decimal": _to_decimal,
"varbinary": _to_binary,
"json": _to_json,
}
[docs]
class DefaultPandasTypeConverter(Converter):
"""Optimized type converter for pandas DataFrame results.
This converter is specifically designed for the PandasCursor and provides
optimized type conversion that works well with pandas data types.
It minimizes conversions for types that pandas handles efficiently
and only converts complex types that need special handling.
The converter focuses on:
- Preserving numeric types for pandas optimization
- Converting only complex types (json, binary, etc.)
- Maintaining compatibility with pandas data type inference
Example:
>>> from pyathena.pandas.converter import DefaultPandasTypeConverter
>>> converter = DefaultPandasTypeConverter()
>>>
>>> # Used automatically by PandasCursor
>>> cursor = connection.cursor(PandasCursor)
>>> # converter is applied automatically to results
Note:
This converter is used by default in PandasCursor.
Most users don't need to instantiate it directly.
"""
[docs]
def __init__(self) -> None:
super().__init__(
mappings=deepcopy(_DEFAULT_PANDAS_CONVERTERS),
default=_to_default,
types=self._dtypes,
)
@property
def _dtypes(self) -> Dict[str, Type[Any]]:
if not hasattr(self, "__dtypes"):
import pandas as pd
self.__dtypes = {
"tinyint": pd.Int64Dtype(),
"smallint": pd.Int64Dtype(),
"integer": pd.Int64Dtype(),
"bigint": pd.Int64Dtype(),
"float": float,
"real": float,
"double": float,
"char": str,
"varchar": str,
"string": str,
"array": str,
"map": str,
"row": str,
}
return self.__dtypes
[docs]
def convert(self, type_: str, value: Optional[str]) -> Optional[Any]:
pass
[docs]
class DefaultPandasUnloadTypeConverter(Converter):
"""Type converter for pandas UNLOAD operations.
This converter is designed for use with UNLOAD queries that write
results directly to Parquet files in S3. Since UNLOAD operations
bypass the normal conversion process and write data in native
Parquet format, this converter has minimal functionality.
Note:
Used automatically when PandasCursor is configured with unload=True.
UNLOAD results are read directly as DataFrames from Parquet files.
"""
[docs]
def __init__(self) -> None:
super().__init__(
mappings={},
default=_to_default,
)
[docs]
def convert(self, type_: str, value: Optional[str]) -> Optional[Any]:
pass