a
    j=ice                     @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z dd Zdd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejdddgd d! Zd"d# Zd$d% Zd&d' Zd(d) Zejd*d+d,d-gfd.d d-gfgd/d0 Z d1d2 Z!d3d4 Z"ejd5g d6fg d7fg d8fg d9fg d:d6fg d:d9fg d:d8fg d:d9fgd;d< Z#d=d> Z$d?d@ Z%dAdB Z&dCdD Z'dEdF Z(dS )G    )datetimeN)	DataFrameIndex
MultiIndexSeries_testingc                 C   sV   t dtjdg| d}tjtdd  |jjdd d W d    n1 sH0    Y  d S )NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)r   npnanpytestraises
ValueErrorstrextract)any_string_dtypevalues r   r/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raises   s    r   c                 C   s   t dtjdg| d}tdtjtjg| d}|jd}t|| |jjddd}t|| tddgtjtjgtjtjgg| d}|jjd	d
d}t|| d S )Nr   r	   r
   BAD__z.*(BAD[_]+).*Tr   BADr   F)r   r   r   r   r   r   tmassert_frame_equal)r   sexpectedresultr   r   r   test_extract_expand_kwarg   s    r$   c               
   C   s   t dtjddt dd ddg	} | jjddd	}tjtjg}td
dg|d
dg||||||g	}t	|| | jjddd	}t d
tjd
tjtjtjtjtjtjg	}t
|| d S )NaBAD_BAD	BAD_b_BADTr	             @r   Fr   BAD_r   z.*(BAD[_]+).*BAD)r   r   r   r   todayr   r   r   r   r    assert_series_equal)Zserr#   err"   r   r   r   &test_extract_expand_False_mixed_object(   s    ""r-   c                  C   sR   t g d} d}tjt|d  | jjddd W d    n1 sD0    Y  d S )N)A1A2A3ZA4ZB5z,only one regex group is supported with Indexr   ([AB])([123])Fr   )r   r   r   r   r   r   )idxmsgr   r   r    test_extract_expand_index_raises;   s    r4   c                 C   s   | g d|d}d}t jt|d  |jjddd W d    n1 sH0    Y  t jt|d  |jjddd W d    n1 s0    Y  d S )	Nr.   B2ZC3r
   "pattern contains no capture groupsr   
[ABC][123]Fr   
(?:[AB]).*r   r   r   r   r   index_or_seriesr   s_or_idxr3   r   r   r   ,test_extract_expand_no_capture_groups_raisesE   s    .r>   c                 C   sX   | ddg|d}|j jddd}| ddgd|d	}| tkrHt|| nt|| d S )
Nr.   r/   r
   (?P<uno>A)\dFr   Aunonamer   )r   r   r   r   r+   Zassert_index_equalr<   r   r=   r#   r"   r   r   r   (test_extract_expand_single_capture_groupR   s    rE   c                 C   s  t g d| d}|jjddd}t tjtjtjg| d}t|| |jjddd}ttjtjgtjtjgtjtjgg| d}t|| |jjddd}t dd	tjg| d}t|| |jjd
dd}tddgd	dgtjtjgg| d}t|| |jjddd}t dd	tjgd| d}t|| |jjddd}tddgd	dgtjtjggddg| d}t|| |jjddd}tddgd	dgtjtjggddg| d}t|| |jjddd}t dd	tjg| d}t|| t g d| d}|jjddd}tddgd	dgtjtjgg| d}t|| t g d| d}|jjddd}tddgd	dgtjdggddg| d}t|| t g d| d}|jjddd}tddgd	dgdtjggddg| d}t|| d S )Nr5   r
   (_)Fr   (_)(_)([AB])[123]r@   Br1   12(?P<letter>[AB])letterrB   !(?P<letter>[AB])(?P<number>[123])numbercolumnsr   ([AB])(?P<number>[123])r   ([AB])(?:[123])ZA11ZB22ZC33([AB])([123])(?:[123])r.   r6   3"(?P<letter>[AB])?(?P<number>[123])rW   r.   r6   C#(?P<letter>[ABC])(?P<number>[123])?rZ   )	r   r   r   r   r   r   r+   r   r    r   r!   r#   r"   r   r   r   "test_extract_expand_capture_groups^   sv    "r]   c                 C   s   g d}t |t |k r.| jtjjdd |d t | }t|||d}|jjddd}tdd	t	j
g||d}t|| |jjd
dd}tddgdd	gdt	j
ggddg||d}t|| d S )NrY   zIndex too short.)reasonindexr   (\d)Fr   rJ   rK   (?P<letter>\D)(?P<number>\d)?r@   rI   rZ   rM   rO   rQ   r`   r   )lennodeZ
add_markerr   markZxfailr   r   r   r   r   r   r+   r   r    )requestr`   r   datar!   r#   r"   r   r   r   (test_extract_expand_capture_groups_index   s     ri   c                 C   sD   t g dd| d}|jjddd}t g dd| d}t|| d S )	Na3b3c2ZbobrB   z(?P<sue>[a-z])Fr   abcZsue)r   r   r   r   r+   r\   r   r   r   ,test_extract_single_series_name_is_preserved   s    rr   c                 C   sZ   t dtjdg| d}|jjddd}tddgtjtjgtjtjgg| d}t|| d S )	Nr   r	   r
   r   Tr   r   r   )r   r   r   r   r   r   r   r    r\   r   r   r   test_extract_expand_True   s    rs   c               
   C   sn   t jt jg} tdt jddt dd ddg	}|jjddd}td	d
g| d	d
g| | | | | | g	}t	|| d S )Nr%   r&   Tr	   r'   r(   r   r   r)   r   )
r   r   r   r   r*   r   r   r   r   r    )r,   mixedr#   r"   r   r   r   %test_extract_expand_True_mixed_object   s     "ru   c                 C   s   | g d|d}d}t jt|d  |jjddd W d    n1 sH0    Y  t jt|d  |jjddd W d    n1 s0    Y  d S )	Nr5   r
   r7   r   r8   Tr   r9   r:   r;   r   r   r   4test_extract_expand_True_single_capture_group_raises   s    .rv   c                 C   sD   | ddg|d}|j jddd}tdddgi|d}t|| d S )	Nr.   r/   r
   r?   Tr   rA   r@   )r   r   r   r   r    rD   r   r   r   -test_extract_expand_True_single_capture_group   s    rw   rC   series_namec                 C   s  t g d| |d}|jjddd}ttjtjtjg|d}t|| |jjddd}ttjtjgtjtjgtjtjgg|d}t|| |jjddd}td	d
tjg|d}t|| |jjddd}td	dgd
dgtjtjgg|d}t|| |jjddd}tdd	d
tjgi|d}t|| |jjddd}td	dgd
dgtjtjggddg|d}t|| |jjddd}td	dgd
dgtjtjggddg|d}t|| |jjddd}td	d
tjg|d}t|| d S )Nr5   rB   rF   Tr   r
   rG   rH   r@   rI   r1   rJ   rK   rL   rM   rN   rO   rP   rR   r   rS   r   r   r   r   r   r   r   r    )rC   r   r!   r#   r"   r   r   r   test_extract_series  sJ    "rz   c                 C   s   t g d| d}|jjddd}tddgdd	gtjtjgg| d}t|| t g d
| d}|jjddd}tddgdd	gtjdggddg| d}t|| t g d| d}|jjddd}tddgdd	gdtjggddg| d}t|| d S )NrT   r
   rU   Tr   r@   rJ   rI   rK   rV   rX   rW   rM   rO   rP   rY   r[   rZ   ry   r\   r   r   r   test_extract_optional_groupsB  s,    r{   c                 C   s   g d}t | t |k r"td | d t | } t|| |d}|jjddd}tddtjg| |d}t	
|| |jjd	dd}td
dgddgdtjggddg| |d}t	
|| d S )NrY   zIndex too shortr_   ra   Tr   rJ   rK   rb   r@   rI   rZ   rM   rO   rc   )rd   r   skipr   r   r   r   r   r   r   r    )r`   r   rh   r!   r#   r"   r   r   r   +test_extract_dataframe_capture_groups_indexa  s     
r}   c                 C   sF   t g dd| d}|jjddd}tdg di| d	}t|| d S )
Nrj   rx   rB   (?P<letter>[a-z])Tr   rM   rn   r
   )r   r   r   r   r   r    r\   r   r   r   'test_extract_single_group_returns_frame{  s    r   c           
      C   s  dddddt jdg}g d}d}g d	}t|| d
}tjg ddd}t|||| d
}|jj|tj	d}t
|| tg d}	t||	| d}tjg ddd}t|||| d
}|jj|tj	d}t
|| t||	| d}d|j_d|_t|||| d
}|jj|tj	d}t
|| d S )Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.com ))ZdaveZgooglecom)Ztdhock5gmailr   )Zmaudelaperrierer   r   )Zrobr   r   )Zstever   r   )ro   rp   r   )rq   dr   )efr   zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    )userdomaintldr
   )r   r   r'   r      r   )   r   )r   r'   )   r   )r   r'   )r   r   Nr   names)flags))singleDave)r   Toby)r   Maude)multiplerobAndSteve)r   abcdef)nonemissing)r   emptyr_   ))r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r'   )r   r   r   )r   r   r'   )r   r   r   )NNr   )matchesdescription)r   r   r   )r   r   r   r   from_tuplesr   r   
extractallreVERBOSEr   r    r`   r   )
r   rh   Zexpected_tuplespatZexpected_columnsr!   expected_indexr"   r#   mir   r   r   test_extractall  sT    	

r   zpat,expected_namesrX   rM   rO   z([AB])?(?P<number>[123])c                 C   s\   t g d|d}|j| }tdtjdftjdfgtjg ddd||d	}t	|| d S )
N)r   r.   Z32r
   )r@   rJ   rW   rK   )r   r   r   r'   r   r   )r`   rQ   r   )
r   r   r   r   r   r   r   r   r   r    )r   Zexpected_namesr   r!   r#   r"   r   r   r   test_extractall_column_names  s    
r   c                 C   s   t g dd| d}tjg ddd}|jd}tdg d	i|| d
}t|| |jd}tg d	|| d
}t|| d S )Nrk   rl   Zd4c2rx   rB   r   r   r   r   r   r   r~   rM   )ro   rp   r   rq   r_   ([a-z]))r   r   r   r   r   r   r   r    )r   r!   r   r#   r"   r   r   r   test_extractall_single_group  s    
r   c                 C   sN   t g dd| d}|jd}tg dtjg ddd| d	}t|| d S )
N)Zab3Zabc3Zd4cd2rx   rB   z([a-z]+))ababcr   cdr   r   r   r_   )r   r   r   r   r   r   r   r    r\   r   r   r   ,test_extractall_single_group_with_quantifier  s    r   zdata, names)N)i1)Ni2)r   r   r   c           	         sB  t |  t |dkr*tt |d d}n$ fddt D }tj||d}t| d||d}tjg |d	 d}|jd
}tdg||d}t	
|| |jd}tddg||d}t	
|| |jd}tdg||d}t	
|| |jd}tddg||d}t	
|| |jd}tddg||d}t	
|| d S )Nr'   r   rC   c                 3   s    | ]}t |g d   V  qdS )r'   N)tuple).0inr   r   	<genexpr>(      z-test_extractall_no_matches.<locals>.<genexpr>r   rx   rC   r`   r   r   z(z)rc   z(z)(z)z(?P<first>z)firstz(?P<first>z)(?P<second>z)secondz(z)(?P<second>z))rd   r   ranger   r   r   r   r   r   r   r    )	rh   r   r   r`   Ztuplesr!   r   r#   r"   r   r   r   test_extractall_no_matches  s8    

r   c                 C   s   t g dd| d}|jd}tdg ditjg dd dgd	| d
}t|| | dkrtg dtg dddfD ]}|jd}t|| qvt g ddtg ddd| d}|jd}tdg ditjg dddgd	| d
}t|| d S )N)Za1a2b1c1ZxxxrB   z[ab](?P<digit>\d)digit)rJ   rK   rJ   )r   )r   r'   r   r   r   r_   objectr   Zs_name)XXyyzzZidx_namer   ))r   r   )r   r'   )r   r   )	r   r   r   r   r   r   r   r    r   )r   r!   r#   r"   r2   r   r   r   test_extractall_stringindexM  s:    


r   c                 C   sP   t g dd| d}tjtdd |jd W d    n1 sB0    Y  d S )Nr   rx   rB   zno capture groupsr   z[a-z])r   r   r   r   r   r   )r   r!   r   r   r   (test_extractall_no_capture_groups_raisess  s    r   c                  C   sz   t g dg ddd} | jjjddd}tg d}t|| | jjjd	dd}g d
}t|ddgd}t|| d S )Nr   )r0   ZB3ZD4rx   )r`   rC   z([A-Z])Tr   )r@   rI   Dz!(?P<letter>[A-Z])(?P<digit>[0-9])))r@   rW   )rI   rW   )r   4rM   r   )rQ   )r   r`   r   r   r   r   r    )r!   rr   Ze_listr   r   r   !test_extract_index_one_two_groups{  s    r   c                 C   s   t g dd| d}d}|jj|dd}|j|}|jddd	}t|| d
}|jj|dd}|j|}|jddd	}t|| d}|jj|dd}	|j|}|jddd	}t|	| d}
|jj|
dd}|j|
}|jddd	}t|| d S )Nrj   rx   rB   ([a-z])([0-9])Tr   r   r   level!(?P<letter>[a-z])(?P<digit>[0-9])(?P<group_name>[a-z])r   )r   r   r   r   xsr   r    )r   r!   pattern_two_nonameextract_two_nonameZhas_multi_indexZno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonamer   r   r   test_extractall_same_as_extract  s*    r   c                 C   s  t jg ddd}tg d|d| d}d}|jj|dd	}|j|}|jd
dd}t|| d}|jj|dd	}|j|}|jd
dd}t|| d}	|jj|	dd	}
|j|	}|jd
dd}t|
| d}|jj|dd	}|j|}|jd
dd}t|| d S )N))r@   r   )rI   r   )rZ   third)ZcapitalZordinalr   rj   rx   )r`   rC   r   r   Tr   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r    )r   r   r!   r   r   Zhas_match_indexZno_match_indexr   r   r   r   r   r   r   r   r   -test_extractall_same_as_extract_subject_index  s2    r   ))r   r   numpyr   r   Zpandasr   r   r   r   r   r   r   r$   r-   r4   r>   rE   r]   ri   rr   rs   ru   rv   rw   rf   Zparametrizerz   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sd   	
S
9
U

	





+&