a
    j=ic                     @   s  d Z ddlZddlmZ ddlZddlZddlmZmZ ddl	Z
ddlZddlmZ ddlmZ ddlmZmZmZmZ ddlm  mZ ddlZddlmZ ddlmZ dd	l m!Z!m"Z"m#Z#m$Z$m%Z% zddl&Z&d
Z'W n e(y   dZ'Y n0 zTe 4 edde)d edde*d ddl+Z+W d   n1 s20    Y  d
Z,W n e(yZ   dZ,Y n0 ej-dZ.ej/ej0dej-j1e, peddkdddej0dej-j1e' dddgddd Z2ej/dd Z3ej/dd Z4ej/d d! Z5ej/d"d# Z6ej/d$d% Z7ej/ej8ej9j:ej8ej9j;ej8ej9j<ej=d&d'ej=d(d'ej=d)d'ej=d*d'gdd+d, Z>dKd.d/Z?d0d1 Z@d2d3 ZAd4d5 ZBd6d7 ZCd8d9 ZDd:d; ZEd<d= ZFd>d? ZGd@dA ZHG dBdC dCZIG dDdE dEeIZJej-dFG dGdH dHeIZKG dIdJ dJeIZLdS )Lz test parquet compat     N)BytesIO)catch_warningsfilterwarnings)
get_option)is_platform_windows)pa_version_under2p0pa_version_under5p0pa_version_under6p0pa_version_under8p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFignorez	`np.bool`)categoryz.*Int64Index.*z4ignore:RangeIndex.* is deprecated:DeprecationWarningfastparquetmode.data_managerarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C   s   | j S Nparamrequest r    m/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/pandas/tests/io/test_parquet.pyengineB   s    r"   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr    r    r    r!   paW   s    
r&   c                   C   s*   t std ntddkr&td dS )Nzfastparquet is not installedr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr$   r%   r   r    r    r    r!   fp^   s
    
r(   c                   C   s   t g dddS )N         fooAB)pd	DataFramer    r    r    r!   	df_compatg   s    r3   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr*            @      @float64dtypeTFT20130101r,   periods)abdef)r1   r2   listrangenparange
date_range)dfr    r    r!   df_cross_compatl   s    rJ   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr4   r?   cr?   NrK   )   foo   bars   bazr-   barbazr*   r5   r,      u1r6   r7   r8   r9          @      @r;   r<   r=   Z20130103)stringZstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimeZdatetime_with_nat)r1   r2   rD   rF   nanrE   rG   astyperH   	TimestampZNaTr    r    r    r!   df_full~   s$    

r`   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   s   | j S r   r   r   r    r    r!   timezone_aware_date_list   s    ra   r+   c
                    s   p
ddipi du r |r4|d< |d<  fdd}
du rt  |
|	 W d   q1 s|0    Y  n|
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr"   c              	      sr   t | D ]d}jfi  tdd  tfi }W d    n1 sN0    Y  tj| d qd S )NTrecord)check_names
check_likecheck_dtype)rE   r   r   r   tmassert_frame_equal)repeat_actualrg   rf   re   rI   expectedpathread_kwargswrite_kwargsr    r!   compare   s    .z!check_round_trip.<locals>.compare)rh   ensure_clean)rI   r"   ro   rq   rp   rn   re   rf   rg   rj   rr   r    rm   r!   check_round_trip   s    "
(rt   c                 C   s|   t rLddlm} |j| dd}t|jjt|ks6J |jjt|ksxJ n,ddlm	} |j	| dd}|j
jj|ksxJ dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NF)Zvalidate_schemaZhive)partitioning)r   pyarrow.parquetparquetZParquetDatasetlenZ
partitionsZpartition_namessetZpyarrow.datasetdatasetru   schemanames)ro   rn   pqrz   Zdsr    r    r!   check_partition_names   s    
r~   c                 C   sB   d}t jt|d t| dd W d    n1 s40    Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr-   rP   )r$   raises
ValueErrorrt   )r3   msgr    r    r!   test_invalid_engine   s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nio.parquet.enginer   r1   option_contextrt   )r3   r&   r    r    r!   test_options_py  s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr   r   r   )r3   r(   r    r    r!   test_options_fp
  s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr   autor   )r3   r(   r&   r    r    r!   test_options_auto  s    r   c                 C   sV  t tdtsJ t tdts$J tddF t tdtsDJ t tdtsVJ t tdtshJ W d    n1 s|0    Y  tddF t tdtsJ t tdtsJ t tdtsJ W d    n1 s0    Y  tddL t tdts
J t tdtsJ t tdts2J W d    n1 sH0    Y  d S )Nr   r   r   r   )
isinstancer   r   r   r1   r   )r(   r&   r    r    r!   test_options_get_engine  s    00r   c                  C   s  ddl m}  | d}| d}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }|s~|s~|rd| d}t	j
t|d td	 W d    q1 s0    Y  n:d
}t	j
t|d td	 W d    n1 s0    Y  |rBd| d}t	j
t|d td	 W d    n1 s60    Y  n<d}t	j
t|d td	 W d    n1 st0    Y  d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr#   r   r   __version__r'   r   r$   r   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr   r    r    r!   "test_get_engine_auto_error_message,  s8    



(&*r   c                 C   s~   | }t  ^}|j||d d t||d}t || t||ddgd}t ||ddg  W d    n1 sp0    Y  d S )Nr"   rb   r"   r?   rA   r"   columns)rh   rs   r   r   ri   rJ   r&   r(   rI   ro   resultr    r    r!   test_cross_engine_pa_fpW  s    
r   c              	   C   s   | }t  }|j||d d tddN t||d}t || t||ddgd}t ||ddg  W d    n1 s|0    Y  W d    n1 s0    Y  d S )Nr   Trc   r   r?   rA   r   )rh   rs   r   r   r   ri   r   r    r    r!   test_cross_engine_fp_pae  s    
r   c                   @   s:   e Zd Zdd Zdd Zejjejddddd	 Z	d
S )Basec              	   C   sj   t  N}tj||d  t|||d d W d    n1 s>0    Y  W d    n1 s\0    Y  d S )Nr   rb   )rh   rs   r$   r   r   )selfrI   r"   excerr_msgro   r    r    r!   check_error_on_writet  s    
zBase.check_error_on_writec              	   C   sf   t  J}t |  t|||d d W d    n1 s:0    Y  W d    n1 sX0    Y  d S )Nr   )rh   rs   external_error_raisedr   )r   rI   r"   r   ro   r    r    r!   check_external_error_on_writez  s    
z"Base.check_external_error_on_writedhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)urlZcheck_before_testc                 C   s.   |dkrt | d}t|}t|| d S )Nr   r   )r$   importorskipr   rh   ri   )r   r3   r"   r   rI   r    r    r!   test_parquet_read_from_url  s    	
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r$   marknetworkrh   r   r    r    r    r!   r   s  s   r   c                   @   s   e Zd Zdd Zdd Zdd Zejdg dd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| q(d S )Nr)   r*   r-   r<   z+to_parquet only supports IO with DataFrames)r1   Seriesr_   rF   r   r   r   )r   r"   objr   r    r    r!   
test_error  s    zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr4   r*   r5   rV   rY   r-   rP   )r1   r2   rD   rE   r   rt   )r   r"   rI   r    r    r!   test_columns_dtypes  s    
zTestBasic.test_columns_dtypesc                 C   s   t tdttddd}d}ddg|_| ||t| ddg|_| ||t| td	ddddtd	ddddg|_| ||t| d S )
Nr4   r*   r5   r   %parquet must have string column namesr   rM   rN   i  )r1   r2   rD   rE   r   r   r   r\   )r   r"   rI   r   r    r    r!   test_columns_dtypes_invalid  s    

z%TestBasic.test_columns_dtypes_invalidrb   )Ngzipsnappybrotlic                 C   sN   |dkrt d n|dkr&t d tdg di}t||d|id d S )Nr   r   r/   r)   rb   rq   )r$   r   r1   r2   rt   )r   r"   rb   rI   r    r    r!   test_compression  s    
zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr4   r*   r5   r   rV   r   rn   rp   )r1   r2   rD   rE   rt   )r   r"   rI   rn   r    r    r!   test_read_columns  s
    zTestBasic.test_read_columnsc                 C   s   |dk}t dg di}t|| g dt jdddtdg d	g}|D ]2}||_t|t jrn|jd |_t|||d
 qJg d|_d|j_	t|| d S )Nr   r/   r)   )r+   r,   r5   r<   r,   r=   r4   )r*   r,   r5   )re   )r   r*   r+   r-   )
r1   r2   rt   rH   rD   indexr   ZDatetimeIndex
_with_freqname)r   r"   re   rI   indexesr   r    r    r!   test_write_index  s     

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr/   r)   )r?   r*   )r?   r+   )r@   r*   )r1   r2   
MultiIndexfrom_tuplesr   rt   )r   r&   r"   rI   r   r    r    r!   test_write_multiindex  s
    zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}||fD ]4}||_t|| t||dddgi|ddg d qhd S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr+   r,   ABCr   Level1Level2leveldate)r|   r   r/   r0   rp   rn   )r1   rH   r2   rF   randomrandnrx   rD   r   Zfrom_productcopyr   rt   )r   r&   r"   datesrI   Zindex1index2r   r    r    r!   test_multiindex_with_columns  s    $
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr)   )qrs)r?   r@   F)rb   r   T)droprq   rn   )ZzyxZwvuZtsr)r   rP   rP   rQ   rQ   r-   r-   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]
}| qS r    r    ).0ir    r    r!   
<listcomp>      z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r1   r2   Zreset_indexrt   rD   rE   )r   r"   rI   rq   rn   arraysr    r    r!   test_write_ignoring_index  s     
 z#TestBasic.test_write_ignoring_indexc                 C   s@   t jg d}t jtjdd|d}d}| ||t| d S )Nr   r5   r,   r   Y\s*parquet must have string column names for all values in\s*each level of the MultiIndex)	r1   r   r   r2   rF   r   r   r   r   )r   r"   Z
mi_columnsrI   r   r    r    r!   test_write_column_multiindex  s
    z&TestBasic.test_write_column_multiindexc                 C   sP   |}g dg dg}t jtjdd|d}ddg|j_d}| ||t| d S )Nr   )r*   r+   r*   r+   r*   r+   r*   r+   r   r   r   r   r   )	r1   r2   rF   r   r   r   r|   r   r   r   r&   r"   r   rI   r   r    r    r!   &test_write_column_multiindex_nonstring)  s    z0TestBasic.test_write_column_multiindex_nonstringc                 C   sF   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r   r   Z	ColLevel1Z	ColLevel2)r1   r2   rF   r   r   r   r|   rt   r   r&   r"   r   rI   r    r    r!   #test_write_column_multiindex_string;  s    z-TestBasic.test_write_column_multiindex_stringc                 C   s:   |}g d}t jtjdd|d}d|j_t|| d S )N)rP   rQ   r-   r   r   r5   r   Z	StringCol)r1   r2   rF   r   r   r   r   rt   r   r    r    r!   test_write_column_index_stringJ  s
    z(TestBasic.test_write_column_index_stringc                 C   sD   |}g d}t jtjdd|d}d|j_d}| ||t| d S )Nr*   r+   r,   r5   r   r5   r   ZNonStringColr   )	r1   r2   rF   r   r   r   r   r   r   r   r    r    r!   !test_write_column_index_nonstringV  s    z+TestBasic.test_write_column_index_nonstringc           
      C   s  dd l m} |dkr.tjjdd}|j| tt	g ddt	g ddt	g dt	g d	t	g d
dt	g ddt	g ddd}t
 6}||| t||d}t||dd}W d    n1 s0    Y  |d jtdksJ ttj	g dddtj	g dddtj	g dddtj	g d	ddtj	g d
ddtj	g dddtj	g dddd}	|dkr|jddd}|	jddd}	t
||	 d S )Nr   r   z.Fastparquet nullable dtype support is disabledr   r*   r+   r,   Nint64Zuint8)r?   r@   rK   N)TFTNr   )      ?rT   rU   Nfloat32r8   )r?   r@   rK   rA   rB   rC   gr   Tr"   use_nullable_dtypesr?   Int64r9   UInt8rV   booleanZFloat32Float64rK   r*   )Zaxis)rv   rw   r$   r   xfailnode
add_markerr   tabler   rh   rs   Zwrite_tabler   r:   rF   r1   r2   r   ri   )
r   r"   r   r}   r   r   ro   Zresult1Zresult2rn   r    r    r!   test_use_nullable_dtypesb  sF    
,
z"TestBasic.test_use_nullable_dtypesr:   )	r   r   r   objectzdatetime64[ns, UTC]rZ   z	period[D]r   rV   c                 C   sT   t dt jg |di}d }|dkr<t dt jg ddi}t||ddi|d d S )Nvaluer9   rZ   r   r   Tr   )r1   r2   r   rt   )r   r&   r:   rI   rn   r    r    r!   test_read_empty_array  s    zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r   r$   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r    r    r    r!   r     s*   
		 
1r   z8ignore:CategoricalBlock is deprecated:DeprecationWarningc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jjede
jddgg gdd Zedd d! Zedd"d# Zd$d% Zd&d' Ze
jdeejgd(d) Zd*d+ Zd,d- Z edd.d/ Z!ejdd0d1d2d3 Z"ejdd4d1d5d6 Z#d7d8 Z$d9d: Z%ejdd0d1d;d< Z&d=d> Z'd?S )@TestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr<   r,   Europe/Brusselsr>   tzdatetime_tz)TNTZbool_with_none)r1   rH   r   rt   )r   r&   r`   rI   dtir    r    r!   
test_basic  s    
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr<   r,   r  r  r  rV   rY   r   r   )r1   rH   rt   )r   r&   r`   rI   r    r    r!   test_basic_subset_columns  s    

z,TestParquetPyArrow.test_basic_subset_columnsc                 C   s:   |j |d}t|tsJ t|}t|}t|| d S )Nr   )r   r   rW   r   r   rh   ri   )r   r&   r`   Z	buf_bytesZ
buf_streamresr    r    r!   *test_to_bytes_without_path_or_buf_provided  s
    z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r5   r,   aaar   zDuplicate column names found	r1   r2   rF   rG   ZreshaperD   r   r   r   r   r&   rI   r    r    r!   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s:   t dt jdddi}tr,| ||t n
t|| d S )Nr?   1 dayr,   r=   )r1   r2   timedelta_ranger
   r   NotImplementedErrorrt   r  r    r    r!   test_timedelta  s    z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )Nr?   r?   r*   rT   )r1   r2   r   r   ArrowExceptionr  r    r    r!   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc                 C   s6   t jddt jd}tj|dgd}| ||tj d S )Nr+   
   r9   fp16datar   )rF   rG   float16r1   r2   r   r   r  )r   r&   r  rI   r    r    r!   test_unsupported_float16  s    z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   	path_typec              	   C   s   t jddt jd}tj|dgd}t b}||}ttj	 |j
||d W d    n1 sd0    Y  tj|r~J W d    n1 s0    Y  d S )Nr+   r  r9   r  r  )ro   r"   )rF   rG   r  r1   r2   rh   rs   r   r   r  r   osro   isfile)r   r&   r  r  rI   path_strro   r    r    r!    test_unsupported_float16_cleanup  s    
,z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )NZabcdefr?   )rP   r-   r-   rP   NrP   rO   r9   r@   )r?   r@   rK   r?   rK   r@   )r@   rK   rA   T)
categoriesZorderedrK   )r1   r2   CategoricalrD   ZCategoricalDtypert   r  r    r    r!   test_categorical  s    

z#TestParquetPyArrow.test_categoricalc                 C   s8   t d}|jf i |}d|i}t||d||d d S )Ns3fs
filesystemzpandas-test/pyarrow.parquetro   rp   rq   )r$   r   ZS3FileSystemrt   )r   r3   s3_resourcer&   s3sor$  Zs3kwr    r    r!   test_s3_roundtrip_explicit_fs  s    
z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s   d|i}t ||d||d d S )Nstorage_optionsz s3://pandas-test/pyarrow.parquetr&  rt   )r   r3   r'  r&   r(  r    r    r!   test_s3_roundtrip+  s    z$TestParquetPyArrow.test_s3_roundtripr$  partition_colr/   c              
   C   sR   |  }|r*trdnd}|| |||< t|||dd|i|d |dddd d S )	Nint32r   zs3://pandas-test/parquet_dirr+  )partition_colsrb   r+  Tr*   )rn   ro   rp   rq   rf   rj   )r   r   r^   rt   )r   r3   r'  r&   r.  r(  Zexpected_dfZpartition_col_typer    r    r!   test_s3_roundtrip_for_dir7  s&    z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c                 C   s(   t  }|| t|}t|| d S r   )r   r   r   rh   ri   )r   r3   bufferZdf_from_bufr    r    r!   test_read_file_like_obj_supportb  s    
z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   | dd | dd tjtdd td W d    n1 sD0    Y  tjtdd |d W d    n1 s|0    Y  d S )NHOMEZTestingUserUSERPROFILEz.*TestingUser.*r   z~/file.parquet)Zsetenvr$   r   OSErrorr   r   )r   r3   Zmonkeypatchr    r    r!   test_expand_useri  s    &z#TestParquetPyArrow.test_expand_userc                 C   sf   ddg}|}t  >}|j||d d t|| t|j|jksDJ W d    n1 sX0    Y  d S )Nr[   rY   r0  rb   rh   ensure_clean_dirr   r~   r   shape)r   r&   r`   r0  rI   ro   r    r    r!   test_partition_cols_supportedr  s    

z0TestParquetPyArrow.test_partition_cols_supportedc                 C   sh   d}|g}|}t  >}|j||d d t|| t|j|jksFJ W d    n1 sZ0    Y  d S )Nr[   r8  r9  )r   r&   r`   r0  partition_cols_listrI   ro   r    r    r!   test_partition_cols_string{  s    

z-TestParquetPyArrow.test_partition_cols_stringc           	      C   sd   d}|g}|}t  :}||}|j||d t|j|jksBJ W d    n1 sV0    Y  d S )Nr0   )r0  )rh   r:  r   r   r;  )	r   r&   r3   r  r0  r=  rI   r  ro   r    r    r!   test_partition_cols_pathlib  s    
z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t  }t|| d S r   )r1   r2   rt   r  r    r    r!   test_empty_dataframe  s    z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   xr*   )typer{   r   )	r   r1   r2   r{   fieldZbool_r^   r[   rt   )r   r&   r   rI   r{   Zout_dfr    r    r!   test_write_with_schema  s
    
z)TestParquetPyArrow.test_write_with_schemac                 C   sp   t t jg dddt jg dddt jg dddd}t|| t dt jg d	ddi}t|| d S )
Nr)   r   r9   ZUInt32rL   rV   )r?   r@   rK   r?   r   )r1   r2   r   rt   r  r    r    r!    test_additional_extension_arrays  s    
z3TestParquetPyArrow.test_additional_extension_arraysz1.0.0)min_versionc              	   C   sh   t dt jg dddi}t d|, t|||d| dd W d    n1 sZ0    Y  d S )	Nr?   rL   zstring[pyarrow]r9   string_storagezstring[]rn   )r1   r2   r   r   rt   r^   )r   r&   rG  rI   r    r    r!    test_pyarrow_backed_string_array  s    z3TestParquetPyArrow.test_pyarrow_backed_string_arrayz2.0.0c                 C   sL   t t jg dt jddddt jt jddddd}t|| d S )N))r   r*   )r*   r+   )r,   r5   z
2012-01-01r,   D)r>   r   r5   )rK   rA   rB   )r1   r2   ZIntervalIndexr   period_rangeZfrom_breaksrH   rt   r  r    r    r!   test_additional_extension_types  s    
z2TestParquetPyArrow.test_additional_extension_typesc                 C   s>   t s
d}nd}tdtjddddi}t||d|id	 d S )
Nz2.6z2.0r?   z
2017-01-01Z1nr  r   r>   versionr   )r	   r1   r2   rH   rt   )r   r&   verrI   r    r    r!   test_timestamp_nanoseconds  s
    z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sV   t s(|jtjjkr(|jtjj	dd d|g }t
j|d|id}t||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r  F)rg   )r   tzinfor\   timezoneutcr   r   r$   r   r   r1   r2   rt   )r   r   r&   ra   idxrI   r    r    r!   test_timezone_aware_index  s    
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sr   t dttddi}t .}||| t||dgdd}W d    n1 sT0    Y  t|dksnJ d S )Nr?   r   r,   r?   z==r   F)filtersZuse_legacy_datasetr*   )	r1   r2   rD   rE   rh   rs   r   r   rx   )r   r&   rI   ro   r   r    r    r!   test_filter_row_groups  s    

$z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t &}||| t||}W d    n1 sP0    Y  |rvt	|j
t jjjsJ nt	|j
t jjjsJ d S )Nr  r,   )r/   r0   Cr   )r1   r2   rF   r   r   rh   rs   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r&   Zusing_array_managerrI   ro   r   r    r    r!   test_read_parquet_manager  s    
(z,TestParquetPyArrow.test_read_parquet_managerN)(r   r   r   r  r  r	  r  r  r  r  r$   r   r   r   r   strpathlibPathr   r#  
single_cpur*  r-  tdZ
skip_if_nor1  r3  r7  r<  r>  r?  r@  rD  rE  rJ  rM  rQ  rY  r\  r_  r    r    r    r!   r     s`   

"

	






r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	j
jdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!S )"TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr<   r,   z
US/Easternr  r  r  r=   	timedelta)r1   rH   r   r  rt   )r   r(   r`   rI   r  r    r    r!   r    s    
z!TestParquetFastParquet.test_basicc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr
  r5   r,   r  r   z9Cannot create parquet dataset with duplicate column namesr  r   r(   rI   r   r    r    r!   r    s    $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr?   )TNFr   g        r  r9   F)rn   rg   )r1   r2   rF   r]   rt   r   r(   rI   rn   r    r    r!   test_bool_with_none  s    z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr?   Z2013Mr,   rN  r  z"Can't infer object conversion type)r1   r2   rL  r   r   rg  r    r    r!   r    s
    z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr?   r4   )r1   r2   r"  rD   rt   )r   r(   rI   r    r    r!   r#  &  s    z'TestParquetFastParquet.test_categoricalc                 C   sz   dt tddi}t|}t 2}|j||d dd t||dgd}W d    n1 s\0    Y  t|dksvJ d S )Nr?   r   r,   r*   )rb   Zrow_group_offsetsrZ  )r[  )	rD   rE   r1   r2   rh   rs   r   r   rx   )r   r(   rA   rI   ro   r   r    r    r!   r\  *  s    

.z-TestParquetFastParquet.test_filter_row_groupsc                 C   s    t ||dd|id |dd d S )Nz$s3://pandas-test/fastparquet.parquetr+  )rb   r+  r&  r,  )r   r3   r'  r(   r(  r    r    r!   r-  2  s    z(TestParquetFastParquet.test_s3_roundtripc                 C   s   ddg}|}t  X}|j|d|d d tj|s8J dd l}||dj}t	|dks^J W d    n1 sr0    Y  d S )Nr[   rY   r   r"   r0  rb   r   Fr+   
rh   r:  r   r  ro   existsr   ZParquetFileZcatsrx   r   r(   r`   r0  rI   ro   r   Zactual_partition_colsr    r    r!   r<  =  s    
z4TestParquetFastParquet.test_partition_cols_supportedc                 C   s|   d}|}t  X}|j|d|d d tj|s4J dd l}||dj}t	|dksZJ W d    n1 sn0    Y  d S )Nr[   r   rk  r   Fr*   rl  rn  r    r    r!   r>  N  s    
z1TestParquetFastParquet.test_partition_cols_stringc                 C   s   ddg}|}t  X}|j|dd |d tj|s8J dd l}||dj}t	|dks^J W d    n1 sr0    Y  d S )Nr[   rY   r   )r"   rb   partition_onr   Fr+   rl  rn  r    r    r!   test_partition_on_supported_  s    
z2TestParquetFastParquet.test_partition_on_supportedc              	   C   s~   ddg}|}d}t jt|dL t $}|j|dd ||d W d    n1 sR0    Y  W d    n1 sp0    Y  d S )Nr[   rY   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar   r   )r"   rb   ro  r0  )r$   r   r   rh   r:  r   )r   r(   r`   r0  rI   r   ro   r    r    r!   3test_error_on_using_partition_cols_and_partition_onp  s    
zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc                 C   s*   t  }| }d|j_t|||d d S )Nr   rI  r1   r2   r   r   r   rt   rh  r    r    r!   r@    s    z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )NrR  rS  rT  r   rI  rr  )r   r(   ra   rX  rI   rn   r    r    r!   rY    s
    
z0TestParquetFastParquet.test_timezone_aware_indexc              	   C   s   t dddgi}t V}|| tjtdd t|ddd W d    n1 sX0    Y  W d    n1 sv0    Y  d S )	Nr?   r*   r+   z!not supported for the fastparquetr   r   Tr   )	r1   r2   rh   rs   r   r$   r   r   r   )r   r(   rI   ro   r    r    r!   &test_use_nullable_dtypes_not_supported  s
    

z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t dl}t|d tjtdd t|dd W d    n1 sL0    Y  t|j	dd W d    n1 s|0    Y  d S )	Ntest.parquets   breakit r   r   r   F)
missing_ok)
rh   rs   ra  rb  write_bytesr$   r   	Exceptionr   unlink)r   ro   r    r    r!   $test_close_file_handle_on_read_error  s
    *z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}tdT}t| d}|| W d    n1 sT0    Y  t||d}W d    n1 s~0    Y  t|| d S )Nr   r*   r.   )r  rt  wbr   )	r1   r2   rh   rs   openencoder   r   ri   )r   r"   rI   ro   rC   r   r    r    r!   test_bytes_file_name  s    (*z+TestParquetFastParquet.test_bytes_file_nameN)r   r   r   r  r  ri  r  r#  r\  r$   r   rc  r-  r<  r>  rp  rq  r@  rY  rs  rz  r~  r    r    r    r!   re    s"   	

	re  )	NNNNNTFTr+   )M__doc__r\   ior   r  ra  warningsr   r   numpyrF   r$   Zpandas._configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   r	   r
   Zpandas.util._test_decoratorsutilZ_test_decoratorsrd  Zpandasr1   Zpandas._testingZ_testingrh   Zpandas.util.versionr   Zpandas.io.parquetr   r   r   r   r   r   r#   r   DeprecationWarningFutureWarningr   r'   r   Z
pytestmarkZfixturer   Zskipifr"   r&   r(   r3   rJ   r`   nowrV  rW  minmaxstrptimera   rt   r~   r   r   r   r   r   r   r   r   r   r   r   re  r    r    r    r!   <module>   s   
(







         
A+   $
  N