Sindbad~EG File Manager

Current Path : /usr/local/lib/python3.12/site-packages/pandas/io/__pycache__/
Upload File :
Current File : //usr/local/lib/python3.12/site-packages/pandas/io/__pycache__/parquet.cpython-312.pyc

�

Mٜg]��f�dZddlmZddlZddlZddlZddlmZmZm	Z	ddl
Z
ddl
mZddlm
Z
ddlmZddlmZdd	lmZdd
lmZddlmZddlmZdd
lmZddlZddlmZmZddl m!Z!ddl"m#Z#ddl$m%Z%m&Z&m'Z'm(Z(m)Z)erddl*m+Z+m,Z,m-Z-m.Z.m/Z/d d�Z0			d!											d"d�Z1Gd�d�Z2Gd�de2�Z3Gd�de2�Z4ee!d��							d#																	d$d��Z5ee!d��dddejlejlddf																	d%d��Z7y)&z parquet compat �)�annotationsN)�
TYPE_CHECKING�Any�Literal)�catch_warnings)�using_pyarrow_string_dtype)�_get_option)�lib)�import_optional_dependency��AbstractMethodError)�doc)�find_stack_level)�check_dtype_backend)�	DataFrame�
get_option)�_shared_docs)�arrow_string_types_mapper)�	IOHandles�
get_handle�
is_fsspec_url�is_url�stringify_path)�DtypeBackend�FilePath�
ReadBuffer�StorageOptions�WriteBuffer�BaseImplc�$�|dk(rtd�}|dk(r,ttg}d}|D]}	|�cStd|����|dk(r
t�S|dk(r
t�Std	��#t$r}|dt	|�zz
}Yd}~�dd}~wwxYw)
zreturn our implementation�autozio.parquet.engine�z
 - Nz�Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:�pyarrow�fastparquetz.engine must be one of 'pyarrow', 'fastparquet')r�PyArrowImpl�FastParquetImpl�ImportError�str�
ValueError)�engine�engine_classes�
error_msgs�engine_class�errs     �</usr/local/lib/python3.12/site-packages/pandas/io/parquet.py�
get_enginer03s���
����/�0��
���%��7���
�*�L�
1�#�~�%�+��
C��l�

�
�	
�����}��	�=�	 �� � �
�E�
F�F��%�
1��g��C��0�0�
��
1�s�A+�+	B�4B
�
B�storage_optionsc�,�t|�}|��tdd��}tdd��}|�#t||j�r
|rOt	d��|�!t||j
j�rn!tdt|�j����t|�rk|�i|�5td�}td�}	|jj|�\}}|�Mtd�}|jj|fi|xsi��\}}n|rt!|�r|d	k7rtd
��d}	|sN|sLt|t"�r<t$j&j)|�st+||d|��}	d}|	j,}||	|fS#t|jf$rY��wxYw)
zFile handling for PyArrow.Nz
pyarrow.fs�ignore)�errors�fsspecz8storage_options not supported with a pyarrow FileSystem.z9filesystem must be a pyarrow or fsspec FileSystem, not a r#�rbz8storage_options passed with buffer, or non-supported URLF��is_textr1)rr�
isinstance�
FileSystem�NotImplementedError�spec�AbstractFileSystemr)�type�__name__r�from_uri�	TypeError�ArrowInvalid�core�	url_to_fsrr(�os�path�isdirr�handle)
rF�fsr1�mode�is_dir�path_or_handle�pa_fsr5�pa�handless
          r/�_get_path_or_handlerPUs���$�D�)�N�	�~�*�<��I��+�H�X�F�����B��0@�0@�!A��)�N����
�J�r�6�;�;�3Q�3Q�$R�����b��*�*�+�-��
��^�$����"�+�I�6�B�.�|�<�E�
�%*�%5�%5�%>�%>�t�%D�"��N��:�/��9�F�!6����!6�!6��"�#2�#8�b�"��B��
�&��"8�D�D�L��S�T�T��G����~�s�+����
�
�n�-�
��D�%��
���� �����7�B�&�&��7�r���/�
��
�s�7E;�;F�Fc�0�eZdZedd��Zdd�Zddd�Zy)	rc�:�t|t�std��y)Nz+to_parquet only supports IO with DataFrames)r9rr))�dfs r/�validate_dataframezBaseImpl.validate_dataframe�s���"�i�(��J�K�K�)�c��t|���Nr)�selfrSrF�compression�kwargss     r/�writezBaseImpl.write��
��!�$�'�'rUNc��t|��rWr)rXrF�columnsrZs    r/�readz
BaseImpl.read�r\rU)rSr�return�None)rSrrW)r`r)r?�
__module__�__qualname__�staticmethodrTr[r_�rUr/rr�s ���L��L�(�(rUc�z�eZdZdd�Z					d													dd�Zdddejddf							d	d�Zy)
r%c�<�tdd��ddl}ddl}||_y)Nr#z(pyarrow is required for parquet support.��extrar)r�pyarrow.parquet�(pandas.core.arrays.arrow.extension_types�api)rXr#�pandass   r/�__init__zPyArrowImpl.__init__�s!��"��G�	
�	�	8���rUNc��|j|�d|jdd�i}	|�||	d<|jjj|fi|	��}
|j
rNdt
j|j
�i}|
jj}i|�|�}
|
j|
�}
t|||d|du��\}}}t|tj�rmt|d�rat|j t"t$f�rAt|j t$�r|j j'�}n|j }	|�-|jj(j*|
|f|||d�|��n+|jj(j,|
|f||d�|��|�|j/�yy#|�|j/�wwxYw)	N�schema�preserve_index�PANDAS_ATTRS�wb)r1rJrK�name)rY�partition_cols�
filesystem)rYrv)rT�poprl�Table�from_pandas�attrs�json�dumpsrp�metadata�replace_schema_metadatarPr9�io�BufferedWriter�hasattrrtr(�bytes�decode�parquet�write_to_dataset�write_table�close)rXrSrFrY�indexr1rurvrZ�from_pandas_kwargs�table�df_metadata�existing_metadata�merged_metadatarLrOs                r/r[zPyArrowImpl.write�s���	
����#�.6��
�
�8�T�8R�-S����38��/�0�*������*�*�2�D�1C�D��
�8�8�)�4�:�:�b�h�h�+?�@�K� %��� 5� 5��B�!2�B�k�B�O��1�1�/�B�E�.A���+��!��-�/
�+����
�~�r�'8�'8�9����/��>�.�.��e��=��.�-�-�u�5�!/�!4�!4�!;�!;�!=��!/�!4�!4��	 ��)�1���� � �1�1��"��!,�#1�)���
�-���� � �,�,��"��!,�)�	�
���"��
�
��#��w�"��
�
��#�s
�AF:�:GFc���d|d<i}	|dk(rddlm}
|
�}|j|	d<n0|dk(rtj|	d<nt�r
t
�|	d<tdd�	�}|d
k(rd|	d<t|||d�
�\}
}}	|jjj|
f|||d�|��}|jdi|	��}|d
k(r|jd
d��}|jjrKd|jjvr3|jjd}t!j"|�|_||�|j'�SS#|�|j'�wwxYw)NT�use_pandas_metadata�numpy_nullabler)�_arrow_dtype_mapping�types_mapperr#zmode.data_manager)�silent�array�split_blocksr6)r1rJ)r^rv�filtersF)�copysPANDAS_ATTRSre)�pandas.io._utilr��get�pd�
ArrowDtyperrr	rPrlr��
read_table�	to_pandas�_as_managerrpr}r{�loadsrzr�)rXrFr^r��use_nullable_dtypes�
dtype_backendr1rvrZ�to_pandas_kwargsr��mapping�managerrLrO�pa_table�resultr�s                  r/r_zPyArrowImpl.read�s���)-��$�%����,�,�<�*�,�G�/6�{�{��^�,�
�i�
'�/1�}�}��^�,�
'�
)�/H�/J��^�,��1�$�?���g��/3��^�,�.A���+��	/
�+����	 �2�t�x�x�'�'�2�2����%��	�
��H�(�X�'�'�;�*:�;�F��'�!��+�+�G�%�+�@�����'�'�"�h�o�o�&>�&>�>�"*�/�/�":�":�?�"K�K�#'�:�:�k�#:�F�L���"��
�
��#��w�"��
�
��#�s
�B7E�E%�r`ra��snappyNNNN)rSrrFzFilePath | WriteBuffer[bytes]rY�
str | Noner��bool | Noner1�StorageOptions | Noneru�list[str] | Noner`ra)r��boolr��DtypeBackend | lib.NoDefaultr1r�r`r)r?rbrcrnr[r
�
no_defaultr_rerUr/r%r%�s���	�#+�!�15�+/��@ ��@ �,�@ � �	@ �
�@ �/�
@ �)�@ �
�@ �J��$)�69�n�n�15��6 �
"�6 �4�
6 �/�6 �
�6 rUr%c�N�eZdZdd�Z					d							dd�Z				d			d	d�Zy)
r&c�,�tdd��}||_y)Nr$z,fastparquet is required for parquet support.rh)rrl)rXr$s  r/rnzFastParquetImpl.__init__)s��1��!O�
����rUNc���	�|j|�d|vr
|�td��d|vr|jd�}|�d|d<|�td��t	|�}t|�rt
d��	�	�fd�|d<n
�rtd	��td
��5|jj||f|||d�|��ddd�y#1swYyxYw)
N�partition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning data�hive�file_scheme�9filesystem is not implemented for the fastparquet engine.r5c�P���j|dfi�xsi��j�S)Nrs)�open)rF�_r5r1s  ��r/�<lambda>z'FastParquetImpl.write.<locals>.<lambda>Ts.���+�&�+�+��d�3�.�4�"�3��d�f�3rU�	open_withz?storage_options passed with file object or non-fsspec file pathT)�record)rY�write_indexr�)
rTr)rwr;rrrrrlr[)
rXrSrFrYr�rur1rvrZr5s
      `  @r/r[zFastParquetImpl.write1s����	
����#��V�#��(B��K��
��V�#�#�Z�Z��7�N��%�$*�F�=�!��!�%�K��
�
�d�#�����/��9�F�#�F�;����Q��
��4�
(��D�H�H�N�N���
�(�!�+�
��

�)�
(�
(�s�#B>�>Cc���i}|jdd�}|jdtj�}	d|d<|rtd��|	tjurtd��|�t	d��t|�}d}
t
|�r1td�}|j|d	fi|xsi��j|d
<nJt|t�r:tjj|�st|d	d|��}
|
j }	|j"j$|fi|��}|j&d
||d�|��|
�|
j)�SS#|
�|
j)�wwxYw)Nr�Fr��pandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginezHThe 'dtype_backend' argument is not supported for the fastparquet enginer�r5r6rIr7)r^r�re)rwr
r�r)r;rrrr�rIr9r(rErFrGrrHrl�ParquetFiler�r�)
rXrFr^r�r1rvrZ�parquet_kwargsr�r�rOr5�parquet_files
             r/r_zFastParquetImpl.readfsm��*,��$�j�j�)>��F���
�
�?�C�N�N�C�
�).��~�&���%��
�����.��%��
��!�%�K��
��d�#�������/��9�F�#.�6�;�;�t�T�#U�o�>S�QS�#U�#X�#X�N�4� �
��c�
"�2�7�7�=�=��+>�!��d�E�?��G��>�>�D�	 �/�4�8�8�/�/��G��G�L�)�<�)�)�U�'�7�U�f�U��"��
�
��#��w�"��
�
��#�s�1E
�
Er�r�)rSrrYz*Literal['snappy', 'gzip', 'brotli'] | Noner1r�r`ra)NNNN)r1r�r`r)r?rbrcrnr[r_rerUr/r&r&(sn���CK���15��3��3�@�	3�/�3�
�3�p��15��
0 �
/�0 �
�0 rUr&)r1r!c	��t|t�r|g}t|�}	|�tj�n|}
|	j
||
f|||||d�|��|�,t|
tj�sJ�|
j
�Sy)a�	
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result is
        returned as bytes. If a string, it will be used as Root Directory path
        when writing a partitioned dataset. The engine fastparquet does not
        accept file-like objects.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)rYr�rur1rv)r9r(r0r�BytesIOr[�getvalue)rSrFr*rYr�r1rurvrZ�impl�path_or_bufs           r/�
to_parquetr��s���B�.�#�&�(�)���f��D�AE������SW�K��D�J�J�
��	� ��%�'��	��	��|��+�r�z�z�2�2�2��#�#�%�%�rUc
���t|�}	|tjur0d}
|dur|
dz
}
tj|
t
t
���nd}t|�|	j|f||||||d�|��S)a�
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. deprecated:: 2.0

    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
        Back-end data type applied to the resultant :class:`DataFrame`
        (still experimental). Behaviour is as follows:

        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
          (default).
        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
          DataFrame.

        .. versionadded:: 2.0

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    filters : List[Tuple] or List[List[Tuple]], default None
        To filter out data.
        Filter syntax: [[(column, op, val), ...],...]
        where op is [==, =, >, >=, <, <=, !=, in, not in]
        The innermost tuples are transposed into a set of filters applied
        through an `AND` operation.
        The outer list combines these sets of filters through an `OR`
        operation.
        A single list of tuples can also be used, meaning that no `OR`
        operation between set of filters is to be conducted.

        Using this argument will NOT result in row-wise filtering of the final
        partitions unless ``engine="pyarrow"`` is also specified.  For
        other engines, filtering is only performed at the partition level, that is,
        to prevent the loading of some row-groups and/or files.

        .. versionadded:: 2.1.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame

    See Also
    --------
    DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.

    Examples
    --------
    >>> original_df = pd.DataFrame(
    ...     {{"foo": range(5), "bar": range(5, 10)}}
    ...    )
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> df_parquet_bytes = original_df.to_parquet()
    >>> from io import BytesIO
    >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
    >>> restored_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> restored_df.equals(original_df)
    True
    >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
    >>> restored_bar
        bar
    0    5
    1    6
    2    7
    3    8
    4    9
    >>> restored_bar.equals(original_df[['bar']])
    True

    The function uses `kwargs` that are passed directly to the engine.
    In the following example, we use the `filters` argument of the pyarrow
    engine to filter the rows of the DataFrame.

    Since `pyarrow` is the default engine, we can omit the `engine` argument.
    Note that the `filters` argument is implemented by the `pyarrow` engine,
    which can benefit from multithreading and also potentially be more
    economical in terms of memory.

    >>> sel = [("foo", ">", 2)]
    >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
    >>> restored_part
        foo  bar
    0    3    8
    1    4    9
    zYThe argument 'use_nullable_dtypes' is deprecated and will be removed in a future version.TzFUse dtype_backend='numpy_nullable' instead of use_nullable_dtype=True.)�
stacklevelF)r^r�r1r�r�rv)	r0r
r��warnings�warn�
FutureWarningrrr_)rFr*r^r1r�r�rvr�rZr��msgs           r/�read_parquetr��s���r�f��D��#�.�.�0�
#�	��$�&��X�
�C�	�
�
�c�=�5E�5G�H�#���
�&��4�9�9��	���'�/�#��	��	�	rU)r*r(r`r)Nr6F)rFz1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]rIrr1r�rJr(rKr�r`zVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any])Nr!r�NNNN)rSrrFz$FilePath | WriteBuffer[bytes] | Noner*r(rYr�r�r�r1r�rur�rvrr`zbytes | None)rFzFilePath | ReadBuffer[bytes]r*r(r^r�r1r�r�zbool | lib.NoDefaultr�r�rvrr�z&list[tuple] | list[list[tuple]] | Noner`r)8�__doc__�
__future__rrr{rE�typingrrrr�r�pandas._configr�pandas._config.configr	�pandas._libsr
�pandas.compat._optionalr�
pandas.errorsr
�pandas.util._decoratorsr�pandas.util._exceptionsr�pandas.util._validatorsrrmr�rr�pandas.core.shared_docsrr�r�pandas.io.commonrrrrr�pandas._typingrrrrrr0rPrr%r&r�r�r�rerUr/�<module>r�s���"�	��	���
�#�5�-��>�-�'�4�7���1�5������G�J.2���<'�
;�<'��<'�+�<'��	<'�

�<'��
<'�~
(�
(�D �(�D �Nn �h�n �b�\�"3�4�5�26��&��-1�'+��U��U�
.�U�
�U��	U�
�U�+�
U�%�U��U��U�6�U�p�\�"3�4�5�� $�-1�03���25�.�.��6:�q�
&�q��q��q�+�	q�
.�q�0�
q��q�4�q��q�6�qrU

Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists