a
    1$eu                     @   s  d dl mZ d dlmZ d dlZd dlmZ d dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlZd dlZd dlmZmZmZ d dlZd dlmZ d dlmZmZmZmZ d d	lmZ d d
l m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 e2ej34e5d Z6e	7dZ8e	j9de	j:de	j;j<ee8j=edk dddgddd Z>dd Z?dd Z@dd ZAdd  ZBd!d" ZCd#d$ ZDd%d& ZEe	j;Fd'd(i d)fd*dd+d,fd*d-d.d/fg d-d0d/fd*g d-d1d2fd*d*i id-d1d3fd*d*ddd4id-d1d5fd*d*dd6d4id-d1d5fg	d7d8 ZGd9d: ZHd;d< ZIe1Jd=d>d? ZKd@dA ZLe	j;FdBg dCdDdE ZMe	j;FdBg dCdFdG ZNdHdI ZOe	j;FdJg dKdLdM ZPe	j;j<ee8j=edk dNde	j;FdJg dOdPdQ ZQdRdS ZRdTdU ZSdVdW ZTe	j;FdXdYdZid)fdYe&i id)fdYe&d*d[iid,fgd\d] ZUd^d_ ZVd`da ZWdbdc ZXddde ZYdfdg ZZe	j;j<ee8j=edkdNddhdi Z[djdk Z\dldm Z]e	j;j<ee8j=ednk doddpdq Z^drds Z_dtdu Z`e	j;Fdvg dggdwdx Zae	j;Fdyddgdzd{ Zbd|d} Zce	j;Fd~eddgdge# dd Zde	j;Fdeddgdge# dd Zee	j;Fdg ddd Zfdd Zgdd Zhdd ZidS )    )absolute_import)productN)Version)	DataFrameread_parquet)assert_frame_equal)boxPointMultiPolygon)GeoDataFrame	read_filer   read_feather)to_wkb)get_path)
SUPPORTED_VERSIONS_create_metadata_decode_metadata_encode_metadata_geopandas_to_arrow_get_filesystem_path#_remove_id_from_member_of_ensembles_validate_dataframe_validate_metadataMETADATA_VERSION)assert_geodataframe_equalassert_geoseries_equal)mockdatapyarrowparquetfeatherz0.17.0zneeds pyarrow >= 0.17)reason)Zmarks)paramsc                 C   s,   | j dkrttjfS | j dkr(ttjfS d S )Nr   r    )paramr   r   
to_parquetr   
to_feather)request r'   Y/var/www/html/django/DPS/env/lib/python3.9/site-packages/geopandas/io/tests/test_arrow.pyfile_format-   s    


r)   c                  C   s  d} t t| }t|}t|ts&J |d tks6J |d dksFJ d|d v sVJ |j }t| |d d d |ksJ |d d d dksJ |d d d	 d
dgksJ t	
|d d d |jjsJ |d d dksJ |d d tjksJ d S )Nnaturalearth_lowresversionprimary_columngeometrycolumnscrsencodingWKBgeometry_typesr
   PolygonbboxZcreatorZlibrary	geopandas)r   r   r   
isinstancedictr   r/   to_json_dictr   npZarray_equalr-   Ztotal_boundsr5   __version__)test_datasetdfmetadatacrs_expectedr'   r'   r(   test_create_metadata@   s(    

r?   c                  C   st   t d} |  }d}d|v r:d}d|d d d v s:J t| |r^d|d d d vs^J t || kspJ d S )N	EPSG:4326FZdatum_ensembleTidmembersr   )pyprojCRSr8   r   )r/   Zcrs_jsonZcheck_ensembler'   r'   r(    test_crs_metadata_datum_ensembleZ   s    
rE   c                  C   sX   t jtddddgdd} tjtdd t| dd W d    n1 sJ0    Y  d S )	Nr   
   r@   r-   r/   zschema_version must be one ofmatchinvalidschema_version)r5   r   r   pytestraises
ValueErrorr   )gdfr'   r'   r(   (test_write_metadata_invalid_spec_versionk   s    rQ   c                  C   s    ddi} d}t | |ksJ d S )Nab
   {"a": "b"})r   )r=   expectedr'   r'   r(   test_encode_metadataq   s    rV   c                  C   s0   d} ddi}t | |ksJ t d d u s,J d S )NrT   rR   rS   )r   )Zmetadata_strrU   r'   r'   r(   test_decode_metadatax   s    rW   c                  C   s   d} t t| }t| t|d d|d< tt t| W d    n1 sV0    Y  tt t|d W d    n1 s0    Y  tt td W d    n1 s0    Y  d S )Nr*   iso_a3   r   znot a dataframe)r   r   r   	set_indexrM   rN   rO   )r;   r<   r'   r'   r(   test_validate_dataframe   s    &,r[   c                   C   s\   t ddd ddidd t ddd ddidd t dddd	d
diddidd d S )Nr-   r1   r/   r0   0.1.0)r,   r.   rL   	<version>r,   r.   r+   rA   ZEPSGi  )	authoritycode0.4.0)r   r'   r'   r'   r(   test_validate_metadata_valid   s.    

rc   zmetadata,error)N9Missing or malformed geo metadata in Parquet/Feather filerd   foo)r,   r.   z>'geo' metadata in Parquet/Feather file is missing required keyr^   )r,   r+   z?'geo' metadata in Parquet/Feather file is missing required key:)r.   r+   r_   z*'columns' in 'geo' metadata must be a dictzZ'geo' metadata in Parquet/Feather file is missing required key 'encoding' for column 'foo'r\   z'Only WKB geometry encoding is supportedZBKWc                 C   s:   t jt|d t|  W d    n1 s,0    Y  d S )NrH   )rM   rN   rO   r   )r=   errorr'   r'   r(   test_validate_metadata_invalid   s    4rg   c                  C   sR   ddd dddidd} t jtdd t|  W d    n1 sD0    Y  d S )	Nr-   r1   Z	spherical)r/   r0   edges1.0.0-beta.1r_   zDThe geo metadata indicate that column 'geometry' has spherical edgesrH   )rM   warnsUserWarningr   )r=   r'   r'   r(   test_validate_metadata_edges   s    rl   c                 C   sd   t g dgg dtddgd}tjtdd" |j| d dd	 W d    n1 sV0    Y  d S )
NrY         rR   rS   rR   rY   r   r.   r-   zaGeoPandas only supports using pyarrow as the engine for to_parquet: 'fastparquet' passed instead.rH   test.parquetZfastparquetZengine)r   r	   rM   rN   rO   r$   )tmpdirr<   r'   r'   r(   'test_to_parquet_fails_on_invalid_engine   s     ru   zgeopandas.io.arrow._to_parquetc                 C   sF   t g dgg dtddgd}|jddd | j|ddd d d	 d S )
Nrm   rp   rY   rq    r   rs   snappy)compressionindexrL   )r   r	   r$   Zassert_called_with)Zmock_to_parquetr<   r'   r'   r(   *test_to_parquet_does_not_pass_engine_along  s
     
rz   c                 C   sH   t g dg dd}tjt| d}|| t|}t|| d S )Nrm   )rR   rS   c)rR   rS   test.pq)r   ospathjoinstrr$   pd_read_parquetr   )rt   r<   filenamepq_dfr'   r'   r(   test_pandas_parquet_roundtrip1  s
    
r   r;   )r*   Znaturalearth_citiesZnybbc                 C   sP   d} t tt| jdgd}tjt|d}|| t	|}t
|| d S )Nr*   r-   r.   r|   )r   r   r   dropr}   r~   r   r   r$   r   r   )r;   rt   r<   r   r   r'   r'   r(   test_pandas_parquet_roundtrip2%  s    
r   c           	      C   sv   |\}}t t|}| }tjt| d}||| tj|sHJ t|| ||}t	|t
shJ t|| dS )z_Writing to parquet should not raise errors, and should not alter original
    GeoDataFrame
    r|   N)r   r   copyr}   r~   r   r   existsr   r6   r   )	rt   r)   r;   readerwriterr<   origr   r   r'   r'   r(   test_roundtrip4  s    

r   c                 C   s   |\}}d}t t|d}tjt| d}|||dd ||}t|| tjt| d}|||dd ||}t|jdd| d	S )
zwSetting index=`True` should preserve index in output, and
    setting index=`False` should drop index from output.
    r*   rX   ztest_with_index.pqT)ry   zdrop_index.pqF)r   N)	r   r   rZ   r}   r~   r   r   r   Zreset_indexrt   r)   r   r   r;   r<   r   r   r'   r'   r(   
test_indexP  s    
r   rx   )rw   gzipbrotliNc                 C   sT   d}t t|}tjt|d}|j|| d t|}t|t	sFJ t
|| dS )eUsing compression options should not raise errors, and should
    return identical GeoDataFrame.
    r*   r|   rx   N)r   r   r}   r~   r   r   r$   r   r6   r   r   rx   rt   r;   r<   r   r   r'   r'   r(   test_parquet_compressiond  s    r   z*Feather only supported for pyarrow >= 0.17)ZuncompressedZlz4Zzstdc                 C   sT   d}t t|}tjt|d}|j|| d t|}t|t	sFJ t
|| dS )r   r*   test.featherr   N)r   r   r}   r~   r   r   r%   r   r6   r   r   r   r'   r'   r(   test_feather_compressionu  s    
r   c                 C   s   |\}}d}t t|}|j |d< tjt| d}||| tj|sRJ ||}t	|t
shJ t|| t|j|jdd dS )zIf multiple geometry columns are present when written to parquet,
    they should all be returned as such when read from parquet.
    r*   geom2r|   T)Zcheck_geom_typeN)r   r   r-   r   r}   r~   r   r   r   r6   r   r   r   r   r   r'   r'   r(   test_parquet_multiple_geom_cols  s    

r   c                 C   s   d}t t|}t|}t|d j|d< tjt| d}|	| t
jtdd t| W d   n1 sr0    Y  dS )ztMissing geo metadata, such as from a parquet file created
    from a pandas DataFrame, will raise a ValueError.
    r*   r-   r|   -Missing geo metadata in Parquet/Feather file.rH   N)r   r   r   r   valuesr}   r~   r   r   r$   rM   rN   rO   r   )rt   r;   r<   r   r'   r'   r(   test_parquet_missing_metadata  s    
r   c                 C   sv   ddl m} tdg di}tjt| d}||| t	j
tdd t| W d   n1 sh0    Y  dS )zMissing geo metadata, such as from a parquet file created
    from a pyarrow Table (which will also not contain pandas metadata),
    will raise a ValueError.
    r   NrR   rm   r|   r   rH   )pyarrow.parquetr   r   tabler}   r~   r   r   write_tablerM   rN   rO   r   )rt   Zpqr   r   r'   r'   r(   test_parquet_missing_metadata2  s    r   zgeo_meta,errorZgeo    barc           
      C   s   ddl m}m} d}tt|}t|}t|d j|d< ||}|j	j
}|| ||}tjt| d}	|||	 tjt|d t|	 W d   n1 s0    Y  dS )zHas geo metadata with missing required fields will raise a ValueError.

    This requires writing the parquet file directly below, so that we can
    control the metadata that is written for this test.
    r   )r   Tabler*   r-   r|   rH   N)r   r   r   r   r   r   r   r   Zfrom_pandasschemar=   updatereplace_schema_metadatar}   r~   r   r   r   rM   rN   rO   r   )
rt   Zgeo_metarf   r   r   r;   r<   r   r=   r   r'   r'   r(   test_parquet_invalid_metadata  s    


r   c                 C   s   |\}}d}t t|}tjt| d}||| ||ddgd}t|ddg | tjt	dd ||dgd W d   n1 s0    Y  dS )	zWReading a subset of columns should correctly decode selected geometry
    columns.
    r*   r|   namer-   r   z4No geometry columns are included in the columns readrH   N)
r   r   r}   r~   r   r   r   rM   rN   rO   r   r'   r'   r(   test_subset_columns  s    
r   c                 C   s   |\}}d}t t|}|j |d< tjt| d}||| ||ddgd}t|	dddg | |j |d< ||| t
jtdd  ||g d	d}W d
   n1 s0    Y  t|	dg d	 | d
S )zReading a subset of columns that does not include the primary geometry
    column should promote the first geometry column present.
    r*   r   r|   r   r   geom3zEMultiple non-primary geometry columns read from Parquet/Feather file.rH   )r   r   r   N)r   r   r-   r   r}   r~   r   r   r   Zset_geometryrM   rj   rk   r   r'   r'   r(   test_promote_secondary_geometry  s$    

.r   c                 C   sp   |\}}d}t t|}tjt| d}||| tt ||dgd W d   n1 sb0    Y  dS )z`Reading a parquet file that is missing all of the geometry columns
    should raise a ValueErrorr*   r|   r   r   N)	r   r   r}   r~   r   r   rM   rN   rO   )rt   r)   r   r   r;   r<   r   r'   r'   r(   test_columns_no_geometry,  s    
r   c                 C   sb   |\}}d}t t|}d|_tjt| d}||| ||}|jdu sPJ t||dd dS )zcIf CRS is `None`, it should be properly handled
    and remain `None` when read from parquet`.
    r*   Nr|   TZ	check_crs)r   r   r/   r}   r~   r   r   r   r   r'   r'   r(   test_missing_crs;  s    
r   c                 C   s8   t dddgi}|| d  t| d }t|| d S )NrR   rY   rn   r|   )r   r$   r   r   )Ztmp_pathr<   r   r'   r'   r(   test_default_geo_col_writesO  s    r   c                 C   sZ   t td}tjt| d}tjtdd |	| W d    n1 sL0    Y  d S )Nr*   r   z,pyarrow >= 0.17 required for Feather supportrH   )
r   r   r}   r~   r   r   rM   rN   ImportErrorr%   )rt   r<   r   r'   r'   r(   test_feather_arrow_versionX  s    r   c                  C   s   t d} dd l} G dd d| jjj}| jd|dd |dd}d	}tt|}|	d
d}|
| W d    n1 s~0    Y  tdddid}t|| td|d}t|| | jd| jjjdd d S )Nfsspecr   c                       s   e Zd Z fddZ  ZS )z+test_fsspec_url.<locals>.MyMemoryFileSystemc                    s   || _ t j|i | d S )N)is_setsuper__init__)selfr   argskwargs	__class__r'   r(   r   m  s    z4test_fsspec_url.<locals>.MyMemoryFileSystem.__init__)__name__
__module____qualname__r   __classcell__r'   r'   r   r(   MyMemoryFileSystemj  s   r   memoryT)clobber)r   r*   zdata.parquetwbzmemory://data.parquetr   storage_options)
filesystem)rM   importorskipZfsspec.implementations.memoryZimplementationsr   ZMemoryFileSystemZregister_implementationr   r   openr$   r   r   )r   r   memfsr;   r<   fresultr'   r'   r(   test_fsspec_urlf  s     

(

r   c                  C   sJ   t jtdd( d} tt| ddid W d    n1 s<0    Y  d S )Nr   rH   r*   re   r   r   )rM   rN   rO   r   r   )r;   r'   r'   r(   /test_non_fsspec_url_with_storage_options_raises  s    r   z5.0.0z"pyarrow.fs requires pyarrow>=5.0.0c                  C   s"   t d\} }t| tjjsJ d S )Nzfile:///data.parquet)r   r6   r   fsZLocalFileSystem)r   _r'   r'   r(   test_prefers_pyarrow_fs  s    r   c                  C   sV   t jtddddgdd} d}| | t |}t| |dd ttj	| d S )Nr   rF   	epsg:4326rG   z~/test_file.parquetTr   )
r5   r   r   r$   r   r   r}   remover~   
expanduser)rP   	test_filer   r'   r'   r(   #test_write_read_parquet_expand_user  s    

r   c                  C   sV   t jtddddgdd} d}| | t |}t| |dd ttj	| d S )Nr   rF   r   rG   z~/test_file.featherTr   )
r5   r   r   r%   r   r   r}   r   r~   r   )rP   r   Zf_dfr'   r'   r(   #test_write_read_feather_expand_user  s    

r   r-   c                 C   s   t jddgt| i|d}|| d  ddlm} || d }t|jj	d }d|d	 d
 v shJ d|d	 d
 vs|J d S )NcolrY   r-   rr   r   
read_table   geor0   r.   r-   r4   )
r5   r   lenr$   r   r   jsonloadsr   r=   )rt   r-   rP   r   r   r=   r'   r'   r(   test_write_empty_bbox  s    r   formatc           
      C   s   |dkrddl m} nddlm} tjt| d| }tj	t
ddddgd}t|}|jj}t|d }|d	 d
 d= |dt|i ||}||| ttd| }||}	|	jtdsJ d S )Nr    r   )write_feather)r   test.rF   r   r   r.   r-   r/   read_z	OGC:CRS84)pyarrow.featherr   r   r   r}   r~   r   r   r5   r   r   r   r   r=   r   r   r   r   getattrr/   equalsrC   rD   )
rt   r   writer   rP   r   r=   Zgeo_metadatareadr<   r'   r'   r(   test_write_read_default_crs  s    

r   c                 C   s   t jt jdgd}tjr,|| d  n<tjt	dd || d  W d    n1 s^0    Y  ddl
m} || d }|d d   }tjr|d	ksJ n|d
ksJ d S )NzPOINT Z (1 2 3)r   rr   z'The GeoDataFrame contains 3D geometriesrH   r   r   r-   Z:01e9030000000000000000f03f00000000000000400000000000000840Z:0101000080000000000000f03f00000000000000400000000000000840)r5   r   Z	GeoSeriesZfrom_wktcompatZUSE_SHAPELY_20r$   rM   rj   rk   r   r   Zas_pyhex)rt   rP   r   r   Zwkbr'   r'   r(   test_write_iso_wkb  s    ,r   zformat,schema_versionc                 C   s  |dkrddl m} nddlm} tjt| d| }tjt	ddddgdd}t
|d| }|||d	 t
td
| }||}t|| |pt}||}	t|	jjd }
|
d |ksJ |dkr|
d d d |j ksJ n,|j }t| |
d d d |ksJ t|tdkr\d|
d d v s@J |
d d d dksJ n2d|
d d v srJ |
d d d dgksJ d S )Nr    r   r   r   rF   r@   rG   to_rK   r   r   r+   r]   r.   r-   r/   rb   Zgeometry_typer3   r2   )r   r   r   r}   r~   r   r   r5   r   r   r   r   r   r   r   r   r=   r/   Zto_wktr8   r   r   )rt   r   rL   r   r   rP   r   r   r<   r   r=   r>   r'   r'   r(   test_write_spec_version  s0    
 
r   zformat,versionc           	      C   s  |dkrddl m} |pd}nddlm} |p0d}tjt| d| }tjt	ddddgdd	}t
|d
| }|tv rtjtdd |||d W d    q1 s0    Y  n|||d ||}t|jjd }|tv r|d |ksJ n|d tks
J d S )Nr    r   r   rn   z2.6r   rF   r@   rG   r   z?the `version` parameter has been replaced with `schema_version`rH   )r+   r   r+   )r   r   r   r}   r~   r   r   r5   r   r   r   r   rM   rj   FutureWarningr   r   r   r=   r   )	rt   r   r+   r   r   rP   r   r   r=   r'   r'   r(   'test_write_deprecated_version_parameter  s(    
,r   r+   )r]   rb   ri   c              	   C   s   t jddgddgddgdttddddtddd	d	gtd
d
ddgdd}t td d|  d }t||dd t td d|  d }t||dd dS )a  
    Verify that files for different metadata spec versions can be read
    created for each supported version:

    # small dummy test dataset (not naturalearth_lowres, as this can change over time)
    from shapely.geometry import box, MultiPolygon
    df = geopandas.GeoDataFrame(
        {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
        geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
        crs="EPSG:4326",
    )
    df.to_feather(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.feather')
    df.to_parquet(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.parquet')
    rR   rS   rY   rn   皙?皙?Zcol_strZcol_intZ	col_floatr   ro         r@   rG   arrowZtest_data_vz.featherTr   z.parquetN)r5   r   r
   r   r   	DATA_PATHr   r   )r+   rU   r<   r'   r'   r(   test_read_versioned_file8  s    ,r   c               	   C   s   t jddgddgddgdttddddtddd	d	gtd
d
ddgdd} t td d }t|| dd t td d }t|| dd dS )a  
    Verify that files written by GDAL can be read by geopandas.
    Since it is currently not yet straightforward to install GDAL with
    Parquet/Arrow enabled in our conda setup, we are testing with some
    generated files included in the repo (using GDAL 3.5.0):

    # small dummy test dataset (not naturalearth_lowres, as this can change over time)
    from shapely.geometry import box, MultiPolygon
    df = geopandas.GeoDataFrame(
        {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
        geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
        crs="EPSG:4326",
    )
    df.to_file("test_data.gpkg", GEOMETRY_NAME="geometry")
    and then the gpkg file is converted to Parquet/Arrow with:
    $ ogr2ogr -f Parquet -lco FID= test_data_gdal350.parquet test_data.gpkg
    $ ogr2ogr -f Arrow -lco FID= -lco GEOMETRY_ENCODING=WKB test_data_gdal350.arrow test_data.gpkg
    rR   rS   rY   rn   r   r   r   r   ro   r   r   r@   rG   r   ztest_data_gdal350.parquetTr   ztest_data_gdal350.arrowN)r5   r   r
   r   r   r   r   r   )rU   r<   r'   r'   r(   test_read_gdal_filesU  s    ,r   c                 C   s^   t td}| d }|  |d d |d  |dd  |d  t|}t|| d S )Nr*   partitioned_datasetd   zdata1.parquetzdata2.parquet)r   r   mkdirr$   r   r   )rt   r<   Zbasedirr   r'   r'   r(   %test_parquet_read_partitioned_datasetu  s    r   c                 C   s   t d}ttd}|d}|d |dd"}|d d | W d    n1 s^0    Y  |dd"}|dd  | W d    n1 s0    Y  td	}t	|| d S )
Nr   r*   r   r   z!partitioned_dataset/data1.parquetr   r   z!partitioned_dataset/data2.parquetzmemory://partitioned_dataset)
rM   r   r   r   r   r   r   r$   r   r   )rt   r   r<   r   r   r   r'   r'   r(   ,test_parquet_read_partitioned_dataset_fsspec  s    


00r   )j
__future__r   	itertoolsr   r   Zpackaging.versionr   r}   pathlibrM   Zpandasr   r   r   Zpandas.testingr   numpyr9   rC   Zshapely.geometryr   r	   r
   r5   Zgeopandas._compat_compatr   r   r   r   Zgeopandas.arrayr   Zgeopandas.datasetsr   Zgeopandas.io.arrowr   r   r   r   r   r   r   r   r   r   Zgeopandas.testingr   r   Zgeopandas.tests.utilr   Pathr~   dirname__file__r   r   r   Zfixturer#   markZskipifr:   r)   r?   rE   rQ   rV   rW   r[   rc   Zparametrizerg   rl   ru   patchrz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r'   r(   <module>   s*  0

	#


3







	

 
		


(
%
 