a
    j=icC                     @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlZd dlZd dlmZ d dlm  mZ d dlmZmZmZmZmZmZmZ d dlm Z! d d	l"m#Z# d dl$Zd d
l$m%Z% ej&g dddd Z'dd Z(e)de)ddd Z*dd Z+e)de)de)ddd Z,ej-j.dej/de)de)dgdej/de)ddgddG dd dZ0dS )     )partial)reload)BytesIOStringION)Path)URLError)is_platform_windows)	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csvto_datetime)file_path_to_url	read_html)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 C   s   |ddd| j S )z6Parametrized fixture for HTML encoding test filenames.iodataZhtml_encoding)param)requestdatapath r   j/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/pandas/tests/io/test_html.pyhtml_encoding_file#   s    
r   c                 O   s   t | t |ks,J dt |  dt | d}ttdd | |}|sPJ |t| |D ]0\}}tj||g|R i | |jrZJ dqZd S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 S   s   t | tot |tS N)
isinstancer	   )xyr   r   r   <lambda>9       z(assert_framelist_equal.<locals>.<lambda>zframes are both empty)lenallmapziptmassert_frame_equalempty)Zlist1Zlist2argskwargsmsgZboth_framesZframe_iZframe_jr   r   r   assert_framelist_equal0   s&    r,   bs4html5libc                 C   s^   dd l }| |dd tjtdd& t|dddd	d
d W d    n1 sP0    Y  d S )Nr   __version__z4.2zPandas requires versionmatchr   r   html	spam.htmlr-   flavor)r-   setattrpytestraisesImportErrorr   )Zmonkeypatchr   r-   r   r   r   test_bs4_version_failsD   s    r:   c                  C   sT   d} d}d| d }t jt|d t| d|d W d    n1 sF0    Y  d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr0   Zgoogler1   r5   r7   r8   
ValueErrorr   )urlr5   r+   r   r   r   test_invalid_flavorN   s
    r?   lxmlc                 C   s<   | dddd}t |ddgd}t |ddgd}t|| d S )	Nr   r   r2   valid_markup.htmlr   r@   )	index_colr5   r-   r   r,   )r   filenameZdfs_lxmlZdfs_bs4r   r   r   test_same_orderingW   s    rE   r5   )Zmarksclass)scopec                   @   s0  e Zd Zejdd Zejdd Zejddddd	 Zd
d Zej	j
ej
ddddd Zej	j
ej
ddddd Zej	j
ej
ddddd Zej	jdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Z d7d8 Z!d9d: Z"d;d< Z#d=d> Z$ej	j
ej
d?d@ Z%ej	jej	j
ej
dAdB Z&ej	jdCdD Z'ej	jdEdF Z(dGdH Z)ej	jdIdJ Z*ej	jdKdL Z+ej	jdMdN Z,ej	jdOdP Z-ej	jdQdR Z.ej	jdSdT Z/ej	jdUdV Z0dWdX Z1ej	j
ej
dYdddZd[ Z2ej	j
ej
dYddd\d] Z3d^d_ Z4d`da Z5dbdc Z6ddde Z7dfdg Z8dhdi Z9ej	jdjdk Z:ej	jdldm Z;dndo Z<dpdq Z=drds Z>dtdu Z?dvdw Z@dxdy ZAdzd{ ZBd|d} ZCd~d ZDdd ZEdd ZFdd ZGdd ZHej	Idddgdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdd ZPdd ZQej	jdd ZRdd ZSdd ZTej	IddeUdgdfdeUdgeUdgfgdd ZVej	Wddd ZXdd ZYdd ZZej	jdd Z[dd Z\dd Z]ej	Idg ddd Z^dd Z_dd Z`dS )TestReadHtmlc                 C   s   |ddddS )Nr   r   r2   r3   r   selfr   r   r   r   	spam_dataj   s    zTestReadHtml.spam_datac                 C   s   |ddddS )Nr   r   r2   banklist.htmlr   rI   r   r   r   banklist_datan   s    zTestReadHtml.banklist_dataTfunction)ZautouserG   c                 c   s   t t|d| _d V  d S )Nr4   )r   r   )rJ   r5   r   r   r   set_defaultsr   s    zTestReadHtml.set_defaultsc                 C   sV   t jdddd ddddjt}| }| j|dd	id
dd
 }t || d S )N      c                  W   s
   t j S r   )nprandomZrand)r)   r   r   r   r    |   r!   z2TestReadHtml.test_to_html_compat.<locals>.<lambda>F)Z
data_gen_fZc_idx_namesZr_idx_namesz{:.3f}rF   Z	dataframer   )attrsrB   )	r&   ZmakeCustomDataframeapplymapformatZastypefloatto_htmlr   r'   )rJ   dfoutresr   r   r   test_to_html_compatw   s    z TestReadHtml.test_to_html_compatThttps://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html)r>   Zcheck_before_testc                 C   s~   d}t t | |d}W d    n1 s00    Y  t t | |d}W d    n1 sf0    Y  t|| d S )Nr]   First Federal Bank of FloridaMetcalf Bank)r&   Zassert_produces_warningFutureWarningr   r,   rJ   r>   df1df2r   r   r   "test_banklist_url_positional_match   s    	""z/TestReadHtml.test_banklist_url_positional_matchc                 C   s.   d}| j |dd}| j |dd}t|| d S )Nr]   r^   r0   r_   rC   ra   r   r   r   test_banklist_url   s    	zTestReadHtml.test_banklist_url\https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/html/spam.htmlc                 C   s.   d}| j |dd}| j |dd}t|| d S )Nrf   	.*Water.*r0   UnitrC   ra   r   r   r   test_spam_url   s
    
zTestReadHtml.test_spam_urlc                 C   s6   | j |dddid}| j |dddid}t|| d S )Nz.*Florida.*idtabler1   rT   r_   rC   )rJ   rM   rb   rc   r   r   r   test_banklist   s    zTestReadHtml.test_banklistc                 C   sV   | j |dd}| j |dd}t|| |d jd dks<J |d jd dksRJ d S )Nrg   r0   rh   r   r   r   
ProximatesZNutrient)r   r,   iloccolumnsrJ   rK   rb   rc   r   r   r   	test_spam   s
    
zTestReadHtml.test_spamc                 C   s&   |  |}|D ]}t|tsJ qd S r   r   r   r	   )rJ   rK   dfsrY   r   r   r   test_spam_no_match   s    
zTestReadHtml.test_spam_no_matchc                 C   s.   | j |ddid}|D ]}t|tsJ qd S )Nrj   rk   )rT   rt   )rJ   rM   ru   rY   r   r   r   test_banklist_no_match   s    z#TestReadHtml.test_banklist_no_matchc                 C   s4   | j |dddd }|jd dks&J |jr0J d S )Nrg      r1   headerr   ro   )r   rq   r(   )rJ   rK   rY   r   r   r   test_spam_header   s    zTestReadHtml.test_spam_headerc                 C   s.   | j |ddd}| j |ddd}t|| d S Nrg      r1   skiprowsrh   rC   rr   r   r   r   test_skiprows_int   s    zTestReadHtml.test_skiprows_intc                 C   s6   | j |dtdd}| j |dtdd}t|| d S Nrg   rx   r~   rh   )r   ranger,   rr   r   r   r   test_skiprows_range   s    z TestReadHtml.test_skiprows_rangec                 C   s6   | j |dddgd}| j |dddgd}t|| d S Nrg   r}   rx   r~   rh   rC   rr   r   r   r   test_skiprows_list   s    zTestReadHtml.test_skiprows_listc                 C   s6   | j |dddhd}| j |dddhd}t|| d S r   rC   rr   r   r   r   test_skiprows_set   s    zTestReadHtml.test_skiprows_setc                 C   s.   | j |ddd}| j |ddd}t|| d S r|   rC   rr   r   r   r   test_skiprows_slice   s    z TestReadHtml.test_skiprows_slicec                 C   s6   | j |dtdd}| j |dtdd}t|| d S r   r   slicer,   rr   r   r   r   test_skiprows_slice_short  s    z&TestReadHtml.test_skiprows_slice_shortc                 C   s<   | j |dtddd}| j |dtdddd}t|| d S )	Nrg   rx      r~   rh   rP   r}   r   rr   r   r   r   test_skiprows_slice_long  s    z%TestReadHtml.test_skiprows_slice_longc                 C   s:   | j |dtdd}| j |dtdd}t|| d S r   )r   rR   Zaranger,   rr   r   r   r   test_skiprows_ndarray  s    z"TestReadHtml.test_skiprows_ndarrayc                 C   sB   t jtdd  | j|ddd W d    n1 s40    Y  d S )Nz%is not a valid type for skipping rowsr0   rg   Zasdfr~   )r7   r8   	TypeErrorr   )rJ   rK   r   r   r   test_skiprows_invalid  s    z"TestReadHtml.test_skiprows_invalidc                 C   s.   | j |ddd}| j |ddd}t|| d S Nrg   r   r1   rB   rh   rC   rr   r   r   r   
test_index  s    zTestReadHtml.test_indexc                 C   s2   | j |dddd}| j |dddd}t|| d S Nrg   r}   r   )r1   rz   rB   rh   rC   rr   r   r   r   test_header_and_index_no_types!  s    z+TestReadHtml.test_header_and_index_no_typesc                 C   s2   | j |dddd}| j |dddd}t|| d S r   rC   rr   r   r   r    test_header_and_index_with_types&  s    z-TestReadHtml.test_header_and_index_with_typesc                 C   s.   | j |ddd}| j |ddd}t|| d S r   rC   rr   r   r   r   test_infer_types+  s    zTestReadHtml.test_infer_typesc                 C   s   t |dd}t| }W d    n1 s.0    Y  t |dd}t| }W d    n1 sf0    Y  | j|dd}| j|dd}t|| d S NzUTF-8)encodingrg   r0   rh   )openr   readr   r,   )rJ   rK   fdata1data2rb   rc   r   r   r   test_string_io2  s    **zTestReadHtml.test_string_ioc                 C   s^   t |dd}| }W d    n1 s*0    Y  | j|dd}| j|dd}t|| d S r   )r   r   r   r,   )rJ   rK   r   r   rb   rc   r   r   r   test_string=  s
    &zTestReadHtml.test_stringc                 C   s   t |dd}| j|dd}W d    n1 s00    Y  t |dd}| j|dd}W d    n1 sj0    Y  t|| d S r   )r   r   r,   )rJ   rK   r   rb   rc   r   r   r   test_file_likeF  s
    ,,zTestReadHtml.test_file_likec                 C   s@   t jtdd | jddd W d    n1 s20    Y  d S )Nz#urlopen error unknown url type: gitr0   zgit://github.comrg   )r7   r8   r   r   rJ   r   r   r   test_bad_url_protocolO  s    z"TestReadHtml.test_bad_url_protocolc                 C   sH   d}t jttf|d | jddd W d    n1 s:0    Y  d S )NzNName or service not known|Temporary failure in name resolution|No tables foundr0   zhttp://www.a23950sdfa908sd.comrg   )r7   r8   r   r=   r   )rJ   r+   r   r   r   test_invalid_urlU  s    zTestReadHtml.test_invalid_urlc                 C   sN   |}| j ttj|dddid}t|ts2J |D ]}t|ts6J q6d S )NZFirstrj   rk   rl   )r   r   ospathabspathr   listr	   rJ   rM   r>   ru   rY   r   r   r   test_file_url`  s    zTestReadHtml.test_file_urlc                 C   sJ   |}t jtdd$ | j|dddid W d    n1 s<0    Y  d S )NzNo tables foundr0   r^   rj   Z	tasdfablerl   r<   )rJ   rM   r>   r   r   r   test_invalid_table_attrsj  s
    
z%TestReadHtml.test_invalid_table_attrsc                 O   s"   | j |g|R dddid|S )NMetcalfrj   rk   rl   r   )rJ   r   r)   r*   r   r   r   
_bank_datar  s    zTestReadHtml._bank_datac                 C   s*   | j |ddgdd }t|jts&J d S )Nr   r}   rz   r   r   rq   r
   rJ   rM   rY   r   r   r   test_multiindex_headerw  s    z#TestReadHtml.test_multiindex_headerc                 C   s*   | j |ddgdd }t|jts&J d S )Nr   r}   rB   )r   r   indexr
   r   r   r   r   test_multiindex_index|  s    z"TestReadHtml.test_multiindex_indexc                 C   s@   | j |ddgddgdd }t|jts,J t|jts<J d S )Nr   r}   )rz   rB   )r   r   rq   r
   r   r   r   r   r   test_multiindex_header_index  s    z)TestReadHtml.test_multiindex_header_indexc                 C   s,   | j |ddgddd }t|jts(J d S Nr   r}   )rz   r   r   r   r   r   r   &test_multiindex_header_skiprows_tuples  s    z3TestReadHtml.test_multiindex_header_skiprows_tuplesc                 C   s,   | j |ddgddd }t|jts(J d S r   r   r   r   r   r   test_multiindex_header_skiprows  s    z,TestReadHtml.test_multiindex_header_skiprowsc                 C   sB   | j |ddgddgddd }t|jts.J t|jts>J d S )Nr   r}   )rz   rB   r   )r   r   r   r
   rq   r   r   r   r   %test_multiindex_header_index_skiprows  s    z2TestReadHtml.test_multiindex_header_index_skiprowsc                 C   sZ   |}| j ttj|ttdddid}t|ts>J |D ]}t|t	sBJ qBd S )NZFloridarj   rk   rl   )
r   r   r   r   r   recompiler   r   r	   r   r   r   r   test_regex_idempotency  s    z#TestReadHtml.test_regex_idempotencyc                 C   sF   d}t jt|d  | j|ddd W d    n1 s80    Y  d S )Nz\(you passed a negative value\)r0   ZWaterr   r~   r<   rJ   rK   r+   r   r   r   test_negative_skiprows  s    z#TestReadHtml.test_negative_skiprowshttps://docs.python.org/2/c                 C   s&   d}| j |dd}t|dks"J d S )Nr   Pythonr0   r}   r   r"   )rJ   r>   ru   r   r   r   test_multiple_matches  s    z"TestReadHtml.test_multiple_matchesc                 C   s<   d}| j |dd}dd |D }t|tddgks8J d S )Nr   r   r0   c                 S   s   g | ]}|j d  dd qS )rn   r   rP   )rp   ).0rY   r   r   r   
<listcomp>  r!   z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>ZRepoZWhat)r   sorted)rJ   r>   ru   zzr   r   r   test_python_docs_table  s    z#TestReadHtml.test_python_docs_tablec                 C   s"   d}|  |}t|dksJ dS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        r}   Nr   )rJ   r2   resultr   r   r   test_empty_tables  s    
zTestReadHtml.test_empty_tablesc                 C   s:   |  dd }tddgddggddgd	}t|| d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   r}   rx   rQ   rP   ABr   rq   r   r	   r&   r'   rJ   r   expectedr   r   r   test_multiple_tbody  s    z TestReadHtml.test_multiple_tbodyc                 C   s0   |  dd }tddidgd}t|| dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   r   Nr   r   r   r   r   test_header_and_one_column  s    z'TestReadHtml.test_header_and_one_columnc                 C   s4   |  dd }tg dgg dd}t|| dS )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   )ZUkraineZOdessa  )ZCountryZMunicipalityZYearr   Nr   r   r   r   r   test_thead_without_tr
  s    z"TestReadHtml.test_thead_without_trc                 C   s   d}t ddggddgd}t ddgddggddgd}|jd	d
}|jdd
}| |d }| |d }t|| t|| dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>ZbodyAZbodyBr   r   r   ZfootAZfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r	   rV   r   r&   r'   )rJ   Zdata_templateZ	expected1Z	expected2r   r   Zresult1Zresult2r   r   r   test_tfoot_read(  s    zTestReadHtml.test_tfoot_readc                 C   s4   | j dddd }tddggdd}t|| d S )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textr   )SIrq   r   r   r   r   r   &test_parse_header_of_non_string_columnN  s    z3TestReadHtml.test_parse_header_of_non_string_columnc                    s   ddl m   fdd}| j|dddidd }t|d	d
ddttdd}|j|jks\J g d}g d}||||}||}	|jddd}
ddg}|
| 	t
|
|< t|
|	 d S )Nr   _remove_whitespacec                    s&   z
 | W S  t y    |  Y S 0 d S r   )AttributeErrorr   r   r   r   try_remove_wsi  s    
z8TestReadHtml.test_banklist_header.<locals>.try_remove_wsr   rj   rk   rl   r   r   csvzbanklist.csv)Updated DateClosing Date
converters)
z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.)
zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoZEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings BankT)datetimenumericr   r   )pandas.io.htmlr   r   r   r   shaperU   replace_convertapplyr   r&   r'   )rJ   rM   r   r   rY   Zground_trutholdnewZdfnewZgtnew	convertedZ	date_colsr   r   r   test_banklist_headere  s     
z!TestReadHtml.test_banklist_headerc                 C   sl   d}t |}| }W d    n1 s*0    Y  ||v s@J | j|dddidd }|| v shJ d S )NzGold Canyonrj   rk   rl   r   )r   r   r   	to_string)rJ   rM   gcr   Zraw_textrY   r   r   r   test_gold_canyon  s    
&zTestReadHtml.test_gold_canyonc                 C   s4   | j dddd }| j dddd }t|| d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   r   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r&   r'   )rJ   r   r   r   r   r   test_different_number_of_cols  s     "z*TestReadHtml.test_different_number_of_colsc                 C   s4   |  dd }tg dgg dd}t|| d S )NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   )abc)r   r   Cr   r   r   r   r   r   test_colspan_rowspan_1  s    z#TestReadHtml.test_colspan_rowspan_1c                 C   s8   | j dddd }tg dgg dd}t|| d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   r   )r   r   r   Zr   )XzX.1Yr   Wr   r   r   r   r   r    test_colspan_rowspan_copy_values  s    z-TestReadHtml.test_colspan_rowspan_copy_valuesc                 C   s8   | j dddd }tg dgg dd}t|| d S )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   r   )r   r   r   r   D)r   r   zB.1zB.2r   r   r   r   r   r   r   test_colspan_rowspan_both_not_1  s    z,TestReadHtml.test_colspan_rowspan_both_not_1c                 C   s8   | j dddd }tddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   r   r   r   r   r   r   r   r   r   r   test_rowspan_at_end_of_row;  s    z'TestReadHtml.test_rowspan_at_end_of_rowc                 C   s>   | j dddd }tddgddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   r   r   r   r   r   r   r   r   r   test_rowspan_only_rowsV  s    
z#TestReadHtml.test_rowspan_only_rowsc                 C   sT   |  dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   r   r   r   r   r}   levelscodesrx   r   r   r
   r	   r&   r'   rJ   r   rq   r   r   r   r   +test_header_inferred_from_rows_with_only_thi  s    $z8TestReadHtml.test_header_inferred_from_rows_with_only_thc                 C   sd   t dtdddi}| }| j|dgdd}t||d  | j|dgdd}t||d  d S )Ndate1/1/2001
   Zperiodsr}   r   Zparse_datesrB   )r	   r   rX   r   r&   r'   )rJ   rY   r   r[   r   r   r   test_parse_dates_list  s    z"TestReadHtml.test_parse_dates_listc                 C   sn   t tddd}t|dd |dd d}| j| dd	d
gid	d}td|i}t||d  d S )Nr  r  r  c                 S   s   t |  S r   )strr  r   r   r   r   r      r!   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>c                 S   s   t |  S r   )r  timer   r   r   r   r      r!   )r  r  r   r}   rx   r  r   )r   r   r	   r$   r   rX   r&   r'   )rJ   Z	raw_datesrY   r[   Znewdfr   r   r   test_parse_dates_combine  s    z%TestReadHtml.test_parse_dates_combinec                 C   s   |dddd}t j|s,J t| dt j|sJJ t| d| j|ddd	d
 }|jdkslJ d|jd v s~J |d jt	dksJ t	
|jd dsJ d S )Nr   r   r2   wikipedia_states.htmlz is not a filez is an empty fileArizonar}   ry   r   )<      Unnamedr   sq mifloat64)r   r  HzPN$A)r   r   isfilereprgetsizer   r   rq   dtyperR   allcloselocrJ   r   r   r   r   r   r   test_wikipedia_states_table  s    z(TestReadHtml.test_wikipedia_states_tablec                 C   sp   |dddd}| j |dddd }|jdks0J d	|jd
 d v sFJ |jjdksVJ t|jd dslJ d S )Nr   r   r2   r  r  r   r   )r     r  r   r}   rx   )ZAlaska)zTotal area[2]r  r  )r   r   rq   ZnlevelsrR   r!  r"  r#  r   r   r    test_wikipedia_states_multiindex  s    z-TestReadHtml.test_wikipedia_states_multiindexc                 C   sB   | j dddgd}tddggtddgd	}t|d | d S )
NaK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   r}   r   r   r   )Unnamed: 0_level_0r   )zUnnamed: 1_level_0r   r   )r   r	   r
   from_tuplesr&   r'   r   r   r   r   %test_parser_error_on_empty_header_row  s    z2TestReadHtml.test_parser_error_on_empty_header_rowc                 C   sL   | j dddd }tddidgd}|d jtdks<J t|| d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   r   gClg0@r   r  )r   r	   r   rR   r&   r'   r   r   r   r   test_decimal_rows  s    zTestReadHtml.test_decimal_rowsargFc                 C   sJ   t d}tjt|d | j||d W d    n1 s<0    Y  d S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr0   r   )r   escaper7   r8   r   r   )rJ   rK   r-  r+   r   r   r   test_bool_header_arg  s
    z!TestReadHtml.test_bool_header_argc                 C   s6   | j ddtidd }tdddgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r   r   r   z0.763z0.244)r   r  r	   r&   r'   r   r   r   r   test_converters  s    zTestReadHtml.test_convertersc                 C   s6   | j ddgdd }tddtjgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)Z	na_valuesr   r   g"~j?r   r	   rR   nanr&   r'   r   r   r   r   test_na_values  s    zTestReadHtml.test_na_valuesc                 C   sh   d}t dddgi}| j|ddd }t|| t dtjtjgi}| j|ddd }t|| d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r   zN/AZNAF)Zkeep_default_nar   T)r	   r   r&   r'   rR   r2  )rJ   Z	html_dataexpected_dfhtml_dfr   r   r   test_keep_default_na  s    z!TestReadHtml.test_keep_default_nac                 C   s>   |  dd }tddgtjtjggddgd}t|| d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r   r   r   r   r   r1  r   r   r   r   test_preserve_empty_rows3  s     z%TestReadHtml.test_preserve_empty_rowsc                 C   sT   |  dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   r   r   r   r   r}   r  rx   r   r	  r
  r   r   r   ,test_ignore_empty_rows_when_inferring_headerK  s    $z9TestReadHtml.test_ignore_empty_rows_when_inferring_headerc                 C   sJ   t g dd}g dg dg|_|jdd}| |d }t|| d S )N))ZHillaryD   r  )ZBernieJ   r  )ZDonaldE   R)r   )r'  ZAgeZParty)NamezUnnamed: 1_level_1zUnnamed: 2_level_1Fr   r   )r	   rq   rX   r   r&   r'   )rJ   r4  r2   r5  r   r   r   test_multiple_header_rows`  s    z&TestReadHtml.test_multiple_header_rowsc                 C   s@   |dddd}| j |dd}t|ts*J t|d ts<J d S )Nr   r   r2   rA   r   r   )r   r   r   r	   )rJ   r   rD   ru   r   r   r   test_works_on_valid_markupm  s    z'TestReadHtml.test_works_on_valid_markupc                 C   s&   |dddd}| j |dddgd d S )	Nr   r   r2   rL   rg   r@   r.   r;   r   )rJ   r   rM   r   r   r   test_fallback_successs  s    z"TestReadHtml.test_fallback_successc                 C   s:   t ddd}ttjdd|d}| }d|v s6J d S )Nz
2000-01-01r  r  rP   r>  )r   r	   rR   rS   ZrandnrX   )rJ   rngrY   r   r   r   r   test_to_html_timestampx  s    z#TestReadHtml.test_to_html_timestampc                 C   s   t dddg}| }|jdd}|jdd}|jdd}|jdd}|jdd}d|v s`J ||kslJ ||ksxJ ||ksJ d	|v sJ d
|vsJ d|vsJ ||ksJ d S )Nr}   rx   )r   r   T)borderr   Fz border="1"z border="2"z border="0"z border)r	   rX   )rJ   rY   Zout_border_defaultZout_border_trueZout_border_explicit_defaultZout_border_nondefaultZout_border_zeroZout_border_falser   r   r   test_to_html_borderless  s    z$TestReadHtml.test_to_html_borderlesszdisplayed_only,exp0,exp1ZfooNzfoo  bar  baz  quxc                 C   sT   t d}| j||d}t|d | |d ur@t|d | nt|dksPJ d S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>)displayed_onlyr   r}   )r   r   r&   r'   r"   )rJ   rF  Zexp0Zexp1r   ru   r   r   r   test_displayed_only  s    	z TestReadHtml.test_displayed_onlyz\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningc           
      C   s"  t j|}t j|d }|d\}}zt|d(}| j| |dd }W d    n1 sd0    Y  t|d,}| jt	| |dd }W d    n1 s0    Y  | j||dd }	t
|| t
||	 W n: ty   t rd|v sd|v rt   Y n0 d S )Nr   _rb)r   rB   Z16Z32)r   r   basenamesplitextsplitr   r   r   popr   r&   r'   	Exceptionr   r7   skip)
rJ   r   	base_pathrootrH  r   Zfobjfrom_stringZfrom_file_likefrom_filenamer   r   r   test_encode  s,    
((
zTestReadHtml.test_encodec                 C   s~   | j jddkrtd G dd dt}|d}|  |sBJ tjtdd |  | W d    n1 sp0    Y  d S )	Nr5   r@   zNot applicable for lxmlc                   @   s   e Zd Zdd ZdS )zFTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                 S   s   dS NFr   r   r   r   r   seekable  s    zOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekableN)__name__
__module____qualname__rV  r   r   r   r   UnseekableStringIO  s   rZ  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr0   )r   keywordsgetr7   rO  r   r8   r=   )rJ   rZ  badr   r   r   test_parse_failure_unseekable  s    
z*TestReadHtml.test_parse_failure_unseekablec                 C   s>   G dd d}|d}|d}|  |s,J |  |s:J d S )Nc                   @   s<   e Zd ZddddZdddZdd Zd	d
 Zdd ZdS )z9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFileN)returnc                 S   s   || _ d| _d S rU  )r   at_end)rJ   r   r   r   r   __init__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__c                 S   s   | j r
dn| j}d| _ |S )Nr   T)r`  r   )rJ   sizer   r   r   r   r     s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.readc                 S   s
   d| _ d S rU  )r`  )rJ   offsetr   r   r   seek  s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekc                 S   s   dS )NTr   r   r   r   r   rV    s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekablec                 S   s   dsJ d S rU  r   r   r   r   r   __iter__   s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__iter__)N)rW  rX  rY  ra  r   rd  rV  re  r   r   r   r   MockFile  s
   
rf  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>r   )rJ   rf  Zgoodr]  r   r   r   test_parse_failure_rewinds  s
    z'TestReadHtml.test_parse_failure_rewindsc                 C   s   G dd dt j}ttjj |dddd}|| j|fd}|| j|fd}|  |  | s\| rnq\d |j	  u r|j	u sn J d S )Nc                       s   e Zd Z fddZ  ZS )z@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc              
      sD   zt    W n* ty8 } z|| _W Y d }~nd }~0 0 d | _d S r   )superrunrN  err)rJ   rj  	__class__r   r   ri    s
    zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run)rW  rX  rY  ri  __classcell__r   r   rk  r   ErrorThread  s   rn  r   r   r2   rA   )targetr)   )
	threadingThreadr   pandasr   r2   r   startis_aliverj  )rJ   r   rn  rD   Zhelper_thread1Zhelper_thread2r   r   r   test_importcheck_thread_safety
  s    
z+TestReadHtml.test_importcheck_thread_safetyc                 C   sB   |dddd}t |}| |d }| |d }t|| d S )Nr   r   r2   r3   r   )r   r   r&   r'   )rJ   r   Zfile_path_string	file_pathrb   rc   r   r   r   test_parse_path_object%  s
    z#TestReadHtml.test_parse_path_objectc                 C   s0   |  dd }tdggdgd}t|| d S )Nz
            <table>
                <tr>
                    <th>A</th>
                </tr>
                <tr>
                    <td>word1<br>word2</td>
                </tr>
            </table>
        r   zword1 word2r   r   r   r   r   r   r   test_parse_br_as_space-  s    z#TestReadHtml.test_parse_br_as_space)r#   bodyrz   r   c           	      C   s   d}g dg dg dg dg dg dd}|d	 }|d
 }|d }|dkrh|d }|d }|d }n4|dkrz|d }n"|dkr|d }n|dkr|d }| j ||dd }t||g|d}t|| d S )Na  
          <table>
            <tr>
              <th>HTTP</th>
              <th>FTP</th>
              <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
            </tr>
            <tr>
              <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
              <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
              <td>Linkless</td>
            </tr>
            <tfoot>
              <tr>
                <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
                <td>
                  Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
                </td>
              </tr>
            </tfoot>
          </table>
          )HTTPFTPLinkless))rz  N)r{  N)r|  z'https://en.wiktionary.org/wiki/linkless)	WikipediaSURROUNDING Debian TEXTr|  ))r}  zhttps://en.wikipedia.org/)r~  zftp://ftp.us.debian.org/)r|  N)Footer$Multiple links: Only first captured.N))r  z)https://en.wikipedia.org/wiki/Page_footer)r  1N)head_ignorehead_extractbody_ignorebody_extractfooter_ignorefooter_extractr  r  r  r#   r  r  r  ry  r   rz   Zextract_linksr   r   r   )	rJ   r-  Zgh_13141_dataZgh_13141_expectedZdata_expZfoot_expZhead_expr   r   r   r   r   test_extract_links@  s0    


zTestReadHtml.test_extract_linksc                 C   sB   d}t jt|d t|dd W d    n1 s40    Y  d S )NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r0   Z	incorrectr  r<   r   r   r   r   test_extract_links_bad  s    z#TestReadHtml.test_extract_links_badc                 C   s2   d}| j |ddd }tdgg}t|| d S )Nz
        <table>
          <tr>
            <td>
              <a href='https://google.com'>Google.com</a>
            </td>
          </tr>
        </table>
        r#   r  r   )z
Google.comzhttps://google.comr   )rJ   r   r   r   r   r   r    test_extract_links_all_no_header  s    	z-TestReadHtml.test_extract_links_all_no_header)arW  rX  rY  r7   fixturerK   rM   rO   r\   marknetworkr&   rd   re   ri   Zslowrm   rs   rv   rw   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r$  r&  r)  r,  parametrizer/  r0  r3  r6  r7  r8  r?  r@  rA  rC  rE  r	   rG  filterwarningsrT  r^  rg  ru  rw  rx  r  r  r  r   r   r   r   rH   a   s  	




		
	







&
/
D!




 


CrH   )1	functoolsr   	importlibr   r   r   r   r   pathlibr   r   rp  urllib.errorr   numpyrR   r7   Zpandas.compatr   Zpandas.util._test_decoratorsutilZ_test_decoratorstdrr  r	   r
   r   r   r   r   r   Zpandas._testingZ_testingr&   Zpandas.io.commonr   r   r   r  r   r,   Z
skip_if_nor:   r?   rE   r  r  r   rH   r   r   r   r   <module>   sJ   $	
	