a
    SicE                     @  sl  d Z ddlmZ ddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z! dddddZ"d3dddddddddZ#G dd dZ$G dd de$Z%G d d! d!e$Z&eed" d#d4d&d'dd(d)dd*d+d,d-d.Z'eed" d#d5d/dd*ddd&d0d1d2Z(dS )6z parquet compat     )annotationsN)Any)catch_warnings)FilePath
ReadBufferStorageOptionsWriteBuffer)import_optional_dependencyAbstractMethodError)doc)	DataFrame
MultiIndex
get_option)_shared_docs)Version)	IOHandles
get_handleis_fsspec_urlis_urlstringify_pathstrBaseImpl)enginereturnc                 C  s   | dkrt d} | dkr~ttg}d}|D ]F}z| W   S  tyl } z|dt| 7 }W Y d}~q(d}~0 0 q(td| | dkrt S | dkrt S td	dS )
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorr   
ValueError)r   Zengine_classes
error_msgsZengine_classerr r%   M/var/www/html/django/DPS/env/lib/python3.9/site-packages/pandas/io/parquet.py
get_engine$   s(    (
r'   rbFz1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]r   r   boolzVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any])pathfsstorage_optionsmodeis_dirr   c                 C  s   t | }t|r>|du r>td}|jj|fi |p2i \}}n|rZt|rR|dkrZtdd}|s|st|trt	j
|st||d|d}d}|j}|||fS )zFile handling for PyArrow.Nfsspecr(   z8storage_options passed with buffer, or non-supported URLFis_textr,   )r   r   r	   core	url_to_fsr   r"   
isinstancer   osr*   isdirr   handle)r*   r+   r,   r-   r.   path_or_handler/   handlesr%   r%   r&   _get_path_or_handleF   s2    

	r:   c                   @  s>   e Zd ZedddddZddddZddd
ddZd	S )r   r   None)dfr   c                 C  sx   t | tstdt | jtr>tdd | jjD sRtdn| jjdvrRtdtdd | jj	D }|sttdd S )	Nz+to_parquet only supports IO with DataFramesc                 s  s   | ]}|j d v V  qdS )>   emptystringN)inferred_type).0xr%   r%   r&   	<genexpr>w   s   z.BaseImpl.validate_dataframe.<locals>.<genexpr>z
                    parquet must have string column names for all values in
                     each level of the MultiIndex
                    >   r=   r>   z%parquet must have string column namesc                 s  s    | ]}|d urt |tV  qd S N)r4   r   )r@   namer%   r%   r&   rB      s   z!Index level names must be strings)
r4   r   r"   columnsr   alllevelsr?   indexnames)r<   Zvalid_namesr%   r%   r&   validate_dataframeo   s     
zBaseImpl.validate_dataframe)r<   c                 K  s   t | d S rC   r
   )selfr<   r*   compressionkwargsr%   r%   r&   write   s    zBaseImpl.writeNr   c                 K  s   t | d S rC   r
   )rK   r*   rE   rM   r%   r%   r&   read   s    zBaseImpl.read)N)__name__
__module____qualname__staticmethodrJ   rN   rP   r%   r%   r%   r&   r   n   s   c                	   @  sH   e Zd ZddddZdddd	d
ddddddZddddddZdS )r   r;   rO   c                 C  s&   t ddd dd l}dd l}|| _d S )Nr   z(pyarrow is required for parquet support.extrar   )r	   Zpyarrow.parquet(pandas.core.arrays.arrow.extension_typesapi)rK   r   pandasr%   r%   r&   __init__   s    zPyArrowImpl.__init__snappyNr   zFilePath | WriteBuffer[bytes]
str | Nonebool | Noner   list[str] | None)r<   r*   rL   rH   r,   partition_colsr   c                 K  s  |  | d|dd i}|d ur*||d< | jjj|fi |}	t||dd |d|d ud\}
}|d< t|
tjrt	|
drt|
j
ttfr|
j
}
t|
tr|
 }
zX|d ur| jjj|	|
f||d| n| jjj|	|
fd|i| W |d ur|  n|d ur|  0 d S )	Nschemapreserve_index
filesystemwb)r,   r-   r.   rD   )rL   r_   rL   )rJ   poprX   Tablefrom_pandasr:   r4   ioBufferedWriterhasattrrD   r   bytesdecodeparquetZwrite_to_datasetwrite_tableclose)rK   r<   r*   rL   rH   r,   r_   rM   Zfrom_pandas_kwargstabler8   r9   r%   r%   r&   rN      sV    




	

zPyArrowImpl.writeFr,   r   c                 K  sz  d|d< i }|rdd l }| j | | j | | j | | j |	 | j
 | | j | | j | | j | | j | | j | | j | | j | i}|j|d< td}	|	dkrd|d< t||dd |d	d
\}
}|d< zX| jjj|
fd|i|j f i |}|	dkrH|j!ddd}|W |d ur^|"  S n|d urt|"  0 d S )NTZuse_pandas_metadatar   Ztypes_mapperzmode.data_managerarrayZsplit_blocksrb   r(   )r,   r-   rE   F)copy)#rY   rX   int8	Int8Dtypeint16
Int16Dtypeint32
Int32Dtypeint64
Int64Dtypeuint8
UInt8Dtypeuint16UInt16Dtypeuint32UInt32Dtypeuint64UInt64Dtypebool_BooleanDtyper>   StringDtypefloat32Float32Dtypefloat64Float64Dtypegetr   r:   rd   rl   
read_table	to_pandas_as_managerrn   )rK   r*   rE   use_nullable_dtypesr,   rM   Zto_pandas_kwargspdmappingmanagerr8   r9   resultr%   r%   r&   rP      s^    




 
zPyArrowImpl.read)r[   NNN)NFNrQ   rR   rS   rZ   rN   rP   r%   r%   r%   r&   r      s       8   r   c                   @  s@   e Zd ZddddZddddd	d
dZddddddZdS )r    r;   rO   c                 C  s   t ddd}|| _d S )Nr   z,fastparquet is required for parquet support.rU   )r	   rX   )rK   r   r%   r%   r&   rZ     s    zFastParquetImpl.__init__r[   Nr   r   )r<   r,   r   c                   s   |  | d|v r$|d ur$tdnd|v r6|d}|d urFd|d< t|}t|rrtd  fdd|d< nr~td	td
d. | jj||f|||d| W d    n1 s0    Y  d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer/   c                   s    j | dfi pi   S )Nrc   openr*   _r/   r,   r%   r&   <lambda>/  s   z'FastParquetImpl.write.<locals>.<lambda>	open_withz?storage_options passed with file object or non-fsspec file pathT)record)rL   write_indexr   )	rJ   r"   rd   r   r   r	   r   rX   rN   )rK   r<   r*   rL   rH   r_   r,   rM   r%   r   r&   rN     s8    


zFastParquetImpl.writerp   c           	        s   i }| dd}t| jjtdkr,d|d< |r8tdt|}d }t|rtd t| jjtdkr j|dfi pzi j	|d	< qȇ fd
d|d< n,t
|trtj|st|ddd}|j}z>| jj|fi |}|jf d|i|W |d ur|  S n|d ur|  0 d S )Nr   Fz0.7.1Zpandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginer/   z0.6.1r(   r+   c                   s    j | dfi pi   S )Nr(   r   r   r   r%   r&   r   X  s   z&FastParquetImpl.read.<locals>.<lambda>r   r0   rE   )rd   r   rX   __version__r"   r   r   r	   r   r+   r4   r   r5   r*   r6   r   r7   ZParquetFiler   rn   )	rK   r*   rE   r,   rM   Zparquet_kwargsr   r9   Zparquet_filer%   r   r&   rP   A  sD    
 
zFastParquetImpl.read)r[   NNN)NNr   r%   r%   r%   r&   r      s       3 r    r,   )r,   r   r[   r   z$FilePath | WriteBuffer[bytes] | Noner\   r]   r^   zbytes | None)r<   r*   r   rL   rH   r,   r_   r   c           
      K  sr   t |tr|g}t|}|du r(t n|}	|j| |	f||||d| |du rjt |	tjsbJ |	 S dS dS )a}  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result is
        returned as bytes. If a string, it will be used as Root Directory path
        when writing a partitioned dataset. The engine fastparquet does not
        accept file-like objects.

        .. versionchanged:: 1.2.0

    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression. The supported compression methods actually
        depend on which engine is used. For 'pyarrow', 'snappy', 'gzip',
        'brotli', 'lz4', 'zstd' are all supported. For 'fastparquet',
        only 'gzip' and 'snappy' are supported.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

        .. versionadded:: 1.2.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)rL   rH   r_   r,   )r4   r   r'   rg   BytesIOrN   getvalue)
r<   r*   r   rL   rH   r,   r_   rM   implpath_or_bufr%   r%   r&   
to_parquetl  s&    <

r   zFilePath | ReadBuffer[bytes])r*   r   rE   r,   r   r   c                 K  s"   t |}|j| f|||d|S )aY  
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.

    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. versionadded:: 1.2.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame
    )rE   r,   r   )r'   rP   )r*   r   rE   r,   r   rM   r   r%   r%   r&   read_parquet  s    6r   )Nr(   F)Nr   r[   NNN)r   NNF))__doc__
__future__r   rg   r5   typingr   warningsr   pandas._typingr   r   r   r   pandas.compat._optionalr	   pandas.errorsr   pandas.util._decoratorsr   rY   r   r   r   pandas.core.shared_docsr   pandas.util.versionr   pandas.io.commonr   r   r   r   r   r'   r:   r   r   r    r   r   r%   r%   r%   r&   <module>   sF   	%   ($tf       R    