a
    Sic-                     @   s  zd dl ZW n ey&   d dlZY n0 d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ e
ed
eedddZe
ed
eedddZ e
ed
eee dddZ!e
ed
eedddZ"e
ed
eedddZ#e
ed
eedddZ$e
ed
eedddZ%e
ed
eedddZ&e
ed
eedddZ'e
ed
eedddZ(eedd d!Z)e
ed
eedd"d#Z*e
ed
eedd$d%Z+e
ed
eedd&d'Z,e
ed
eedd(d)Z-e
ed
eedd*d+Z.e
e/ed
eed,d-d.Z0e
ed
eedd/d0Z1dSe2e3ee d2d3d4Z4e
d5d
eed6d7d8Z5e2eee e2f d9d:d;Z6eed<d=d>Z7dTeeed@dAdBZ8eee dCdDdEZ9eee:dFdGdHZ;eeedFdIdJZ<dKej=dLfee3eddMdNdOZ>dUe2ee?e3eee2eee eeddf dP
dQdRZ@dS )V    N)IncrementalDecoder)aliases)	lru_cache)findall)	GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize)	characterreturnc                 C   sT   zt | }W n ty"   Y dS 0 d|v pRd|v pRd|v pRd|v pRd|v pRd|v S )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEunicodedataname
ValueErrorr   description r   T/var/www/html/django/DPS/env/lib/python3.9/site-packages/charset_normalizer/utils.pyis_accentuated   s    r   c                 C   s.   t | }|s| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   Z
decomposedcodesr   r   r   remove_accent,   s
    

r'   c                 C   s.   t | }t D ]\}}||v r|  S qdS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   Zcharacter_ord
range_nameZ	ord_ranger   r   r   unicode_range7   s
    
r+   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFZLATINr   r   r   r   r   is_latinE   s
    r,   c                 C   s(   z|  d W n ty"   Y dS 0 dS )NasciiFT)encodeUnicodeEncodeErrorr   r   r   r   is_asciiN   s
    r1   c                 C   s2   t | }d|v rdS t| }|d u r*dS d|v S )NPTFPunctuationr   categoryr+   r   character_categorycharacter_ranger   r   r   is_punctuationW   s    
r9   c                 C   s:   t | }d|v sd|v rdS t| }|d u r2dS d|v S )NSNTFFormsr4   r6   r   r   r   	is_symbolf   s    
r=   c                 C   s   t | }|d u rdS d|v S )NF	Emoticons)r+   )r   r8   r   r   r   is_emoticonu   s    r?   c                 C   s&   |   s| dv rdS t| }d|v S )N>   u   ｜+;<>,TZ)isspacer   r5   r   r7   r   r   r   is_separator   s    
rH   c                 C   s   |   |  kS N)islowerisupperr0   r   r   r   is_case_variable   s    rL   c                 C   s   t | }|dkS )NZCo)r   r5   rG   r   r   r   is_private_use_only   s    
rM   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFCJKr   r   Zcharacter_namer   r   r   is_cjk   s
    rP   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFZHIRAGANAr   rO   r   r   r   is_hiragana   s
    rQ   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFZKATAKANAr   rO   r   r   r   is_katakana   s
    rR   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFZHANGULr   rO   r   r   r   	is_hangul   s
    rS   c                 C   s,   zt | }W n ty"   Y dS 0 d|v S )NFZTHAIr   rO   r   r   r   is_thai   s
    rT   )r*   r   c                    s   t  fddtD S )Nc                 3   s   | ]}| v V  qd S rI   r   ).0keywordr*   r   r   	<genexpr>       z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rW   r   rW   r   is_unicode_range_secondary   s    r[   c                 C   s(   |   du o&|  du o&| dko&| dkS )NFu   ﻿)rF   isprintabler0   r   r   r   is_unprintable   s    
r^      )sequencesearch_zoner   c                 C   s   t | tstt| }tt| dt|| jddd}t|dkrHdS |D ]N}| 	dd}t
 D ]0\}}||kr|    S ||krh|    S qhqLdS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr-   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r)   )r`   ra   Zseq_lenresultsspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s"    
rs      )r   r   c                 C   s    | dv pt td| jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   	utf_8_sigutf_7	utf_16_be	utf_32_beutf_16	utf_16_le	utf_32_leutf_32utf_8encodings.{})
issubclass	importlibimport_moduleformatr   r   )r   r   r   r   is_multi_byte_encoding   s    
r   )r`   r   c                 C   sJ   t D ]@}t | }t|tr |g}|D ]}| |r$||f    S q$qdS )z9
    Identify and extract SIG/BOM in given sequence.
    )NrY   )r   rg   rh   
startswith)r`   iana_encodingmarksmarkr   r   r   identify_sig_or_bom  s    

r   )r   r   c                 C   s   | dvS )N>   r|   ry   r   )r   r   r   r   should_strip_sig_or_bom  s    r   T)cp_namestrictr   c                 C   sL   |   dd} t D ]\}}| ||fv r|  S q|rHtd| | S )Nre   rf   z Unable to retrieve IANA for '{}')rm   rn   r   r)   r   r   )r   r   rq   rr   r   r   r   	iana_name!  s    
r   )decoded_sequencer   c                 C   s4   t  }| D ] }t|}|d u r q
|| q
t|S rI   )setr+   addlist)r   Zrangesr   r8   r   r   r   
range_scan1  s    r   )iana_name_aiana_name_br   c           	      C   s   t | st |rdS td| j}td|j}|dd}|dd}d}tdD ]*}t|g}||||krX|d7 }qX|d S )	Ng        r~   rb   rc   r      r      )r   r   r   r   r   rangerh   rl   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bcharacter_match_countiZto_be_decodedr   r   r   cp_similarity?  s     



r   c                 C   s   | t v o|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similarX  s    
r   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)r   levelformat_stringr   c                 C   s:   t | }|| t  }|t | || d S rI   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handlerc  s
    

r   )
	sequencesrr   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadr   c	                 c   s>  |r8|du r8|D ]"}	||	|	|  }
|
s, q4|
V  qn|D ]}	|	| }|t | d krZq<| |	|	|  }|r~|du r~|| }|j||rdndd}
|r2|	dkr2| |	 dkr2t|d}|r2|
d | |vr2t|	|	d	 d
D ]L}| || }|r|du r|| }|j|dd}
|
d | |v r q2q|
V  q<d S )NF   rb   r   rc   r   rt   r!      )rj   rl   rk   r   )r   rr   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr   r   r   cut_sequence_chunksq  s>    

r   )r_   )T)N)AZunicodedata2r   ImportErrorr   r   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   r   r   r	   r
   r   Z_multibytecodecr   constantr   r   r   r   r   r   strboolr   r'   r+   r,   r1   r9   r=   r?   rH   rL   rM   rP   rQ   rR   rS   rT   rj   r[   r^   rh   r%   rs   r   r   r   r   r   floatr   r   INFOr   r   r   r   r   r   r   <module>   s     

							
  