HEX
Server: Apache
System: Linux zacp120.webway.host 4.18.0-553.50.1.lve.el8.x86_64 #1 SMP Thu Apr 17 19:10:24 UTC 2025 x86_64
User: govancoz (1003)
PHP: 8.3.26
Disabled: exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Upload Files
File: //usr/local/lib/python3.7/site-packages/charset_normalizer/__pycache__/md.cpython-37.pyc
B

���gDN�@sbddlmZddlmZddlmZddlmZmZm	Z	ddl
mZmZm
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZGdd�d�ZGd	d
�d
e�ZGdd�de�ZGd
d�de�ZGdd�de�Z Gdd�de�Z!Gdd�de�Z"Gdd�de�Z#Gdd�de�Z$Gdd�de�Z%edd�dddd�d d!��Z&ed"d�d+d%d&dd&d'�d(d)��Z'd*S),�)�annotations)�	lru_cache)�	getLogger�)�COMMON_SAFE_ASCII_CHARACTERS�TRACE�UNICODE_SECONDARY_RANGE_KEYWORD)�is_accentuated�	is_arabic�is_arabic_isolated_form�is_case_variable�is_cjk�is_emoticon�	is_hangul�is_hiragana�is_katakana�is_latin�is_punctuation�is_separator�	is_symbol�is_thai�is_unprintable�
remove_accent�
unicode_rangec@sPeZdZdZddd�dd�Zddd�dd	�Zdd
�dd�Zed
d
�dd��ZdS)�MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    �str�bool)�	character�returncCst�dS)z@
        Determine if given character should be fed in.
        N)�NotImplementedError)�selfr�r!�?/usr/local/lib/python3.7/site-packages/charset_normalizer/md.py�eligible&szMessDetectorPlugin.eligible�NonecCst�dS)z�
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        N)r)r rr!r!r"�feed,szMessDetectorPlugin.feed)rcCst�dS)zB
        Permit to reset the plugin to the initial state.
        N)r)r r!r!r"�reset3szMessDetectorPlugin.reset�floatcCst�dS)z�
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        N)r)r r!r!r"�ratio9szMessDetectorPlugin.ratioN)	�__name__�
__module__�__qualname__�__doc__r#r%r&�propertyr(r!r!r!r"r s
rc@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)� TooManySymbolOrPunctuationPluginr$)rcCs"d|_d|_d|_d|_d|_dS)NrF)�_punctuation_count�
_symbol_count�_character_count�_last_printable_charZ_frenzy_symbol_in_word)r r!r!r"�__init__Cs
z)TooManySymbolOrPunctuationPlugin.__init__rr)rrcCs|��S)N)�isprintable)r rr!r!r"r#Ksz)TooManySymbolOrPunctuationPlugin.eligiblecCsp|jd7_||jkrf|tkrft|�r8|jd7_n.|��dkrft|�rft|�dkrf|jd7_||_dS)NrF�)	r1r2rrr/�isdigitrrr0)r rr!r!r"r%Ns
z%TooManySymbolOrPunctuationPlugin.feedcCsd|_d|_d|_dS)Nr)r/r1r0)r r!r!r"r&`sz&TooManySymbolOrPunctuationPlugin.resetr'cCs0|jdkrdS|j|j|j}|dkr,|SdS)Nrgg333333�?)r1r/r0)r Zratio_of_punctuationr!r!r"r(es


z&TooManySymbolOrPunctuationPlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"r.Bs
r.c@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�TooManyAccentuatedPluginr$)rcCsd|_d|_dS)Nr)r1�_accentuated_count)r r!r!r"r3rsz!TooManyAccentuatedPlugin.__init__rr)rrcCs|��S)N)�isalpha)r rr!r!r"r#vsz!TooManyAccentuatedPlugin.eligiblecCs(|jd7_t|�r$|jd7_dS)Nr)r1r	r8)r rr!r!r"r%yszTooManyAccentuatedPlugin.feedcCsd|_d|_dS)Nr)r1r8)r r!r!r"r&szTooManyAccentuatedPlugin.resetr'cCs*|jdkrdS|j|j}|dkr&|SdS)N�ggffffff�?)r1r8)r Zratio_of_accentuationr!r!r"r(�s
zTooManyAccentuatedPlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"r7qs
r7c@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�UnprintablePluginr$)rcCsd|_d|_dS)Nr)�_unprintable_countr1)r r!r!r"r3�szUnprintablePlugin.__init__rr)rrcCsdS)NTr!)r rr!r!r"r#�szUnprintablePlugin.eligiblecCs(t|�r|jd7_|jd7_dS)Nr)rr<r1)r rr!r!r"r%�szUnprintablePlugin.feedcCs
d|_dS)Nr)r<)r r!r!r"r&�szUnprintablePlugin.resetr'cCs|jdkrdS|jd|jS)Nrgr:)r1r<)r r!r!r"r(�s
zUnprintablePlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"r;�s
r;c@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�SuspiciousDuplicateAccentPluginr$)rcCsd|_d|_d|_dS)Nr)�_successive_countr1�_last_latin_character)r r!r!r"r3�sz(SuspiciousDuplicateAccentPlugin.__init__rr)rrcCs|��ot|�S)N)r9r)r rr!r!r"r#�sz(SuspiciousDuplicateAccentPlugin.eligiblecCst|jd7_|jdk	rjt|�rjt|j�rj|��rJ|j��rJ|jd7_t|�t|j�krj|jd7_||_dS)Nr)r1r?r	�isupperr>r)r rr!r!r"r%�s

z$SuspiciousDuplicateAccentPlugin.feedcCsd|_d|_d|_dS)Nr)r>r1r?)r r!r!r"r&�sz%SuspiciousDuplicateAccentPlugin.resetr'cCs|jdkrdS|jd|jS)Nrgr5)r1r>)r r!r!r"r(�s
z%SuspiciousDuplicateAccentPlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"r=�s
r=c@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�SuspiciousRanger$)rcCsd|_d|_d|_dS)Nr)�"_suspicious_successive_range_countr1�_last_printable_seen)r r!r!r"r3�szSuspiciousRange.__init__rr)rrcCs|��S)N)r4)r rr!r!r"r#�szSuspiciousRange.eligiblecCsx|jd7_|��s&t|�s&|tkr0d|_dS|jdkrD||_dSt|j�}t|�}t||�rn|jd7_||_dS)Nr)r1�isspacerrrCr� is_suspiciously_successive_rangerB)r r�unicode_range_a�unicode_range_br!r!r"r%�s


zSuspiciousRange.feedcCsd|_d|_d|_dS)Nr)r1rBrC)r r!r!r"r&�szSuspiciousRange.resetr'cCs"|jdkrdS|jd|j}|S)N�
gr5)r1rB)r Zratio_of_suspicious_range_usager!r!r"r(�s

zSuspiciousRange.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"rA�s
rAc@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�SuperWeirdWordPluginr$)rcCs@d|_d|_d|_d|_d|_d|_d|_d|_d|_d|_	dS)NrF�)
�_word_count�_bad_word_count�_foreign_long_count�_is_current_word_bad�_foreign_long_watchr1�_bad_character_count�_buffer�_buffer_accent_count�_buffer_glyph_count)r r!r!r"r3�szSuperWeirdWordPlugin.__init__rr)rrcCsdS)NTr!)r rr!r!r"r#
szSuperWeirdWordPlugin.eligiblecCs�|��r�|j|7_t|�r,|jd7_|jdkr�t|�dksJt|�r�t|�dkr�t|�dkr�t|�dkr�t	|�dkr�t
|�dkr�d|_t|�s�t|�s�t|�s�t	|�s�t
|�r�|jd7_dS|js�dS|��s�t
|�s�t|��r\|j�r\|jd7_t|j�}|j|7_|dk�r�|j|dk�r:d|_npt|jd��r�|jd���r�tdd�|jD��dk�r�|jd7_d|_n |jdk�r�d|_|jd7_|d	k�r|j�rd
d�t|jtd|��D�}d}|�r�t|�|d
k�r�d}|�s|jd7_d|_|j�rB|jd7_|jt|j�7_d|_d|_d|_d|_d|_n6|dk�r�|��dk�r�t|��r�d|_|j|7_dS)NrFT�g�?���css|]}|��VqdS)N)r@)�.0�_r!r!r"�	<genexpr>7sz,SuperWeirdWordPlugin.feed.<locals>.<genexpr>�cSsg|]\}}|��r|�qSr!)r@)rV�c�ir!r!r"�
<listcomp>@sz-SuperWeirdWordPlugin.feed.<locals>.<listcomp>rg333333�?rJ>�-�>�<�=�|�~rW)r9rQr	rRrOrr
rrrrrSrDrrrK�lenr1rNr@�allrM�zip�rangerLrPr6r)r rZ
buffer_lengthZcamel_case_dstZprobable_camel_casedr!r!r"r%
sv




zSuperWeirdWordPlugin.feedcCs4d|_d|_d|_d|_d|_d|_d|_d|_dS)NrJFr)rQrNrOrLrKr1rPrM)r r!r!r"r&^szSuperWeirdWordPlugin.resetr'cCs$|jdkr|jdkrdS|j|jS)N�
rg)rKrMrPr1)r r!r!r"r(hszSuperWeirdWordPlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"rI�s
Q
rIc@s^eZdZdZdd�dd�Zddd�d	d
�Zddd�dd�Zdd�d
d�Zedd�dd��Z	dS)�CjkInvalidStopPluginu�
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    r$)rcCsd|_d|_dS)Nr)�_wrong_stop_count�_cjk_character_count)r r!r!r"r3vszCjkInvalidStopPlugin.__init__rr)rrcCsdS)NTr!)r rr!r!r"r#zszCjkInvalidStopPlugin.eligiblecCs4|dkr|jd7_dSt|�r0|jd7_dS)N>�丄�丅r)rir
rj)r rr!r!r"r%}s
zCjkInvalidStopPlugin.feedcCsd|_d|_dS)Nr)rirj)r r!r!r"r&�szCjkInvalidStopPlugin.resetr'cCs|jdkrdS|j|jS)N�g)rjri)r r!r!r"r(�s
zCjkInvalidStopPlugin.ratioN)
r)r*r+r,r3r#r%r&r-r(r!r!r!r"rhpsrhc@sZeZdZdd�dd�Zddd�dd	�Zddd�d
d�Zdd�dd
�Zedd�dd��ZdS)�ArchaicUpperLowerPluginr$)rcCs.d|_d|_d|_d|_d|_d|_d|_dS)NFrT)�_buf�_character_count_since_last_sep�_successive_upper_lower_count�#_successive_upper_lower_count_finalr1�_last_alpha_seen�_current_ascii_only)r r!r!r"r3�sz ArchaicUpperLowerPlugin.__init__rr)rrcCsdS)NTr!)r rr!r!r"r#�sz ArchaicUpperLowerPlugin.eligiblecCs$|��ot|�}|dk}|r�|jdkr�|jdkrV|��dkrV|jdkrV|j|j7_d|_d|_d|_d|_|j	d7_	d|_dS|jdkr�|�
�dkr�d|_|jdk	r�|��r�|j��s�|��r�|j��r�|jdkr�|jd7_d|_q�d|_nd|_|j	d7_	|jd7_||_dS)NFr�@rTr5)
r9rrpr6rtrrrqrsror1�isasciir@�islower)r rZis_concernedZ	chunk_sepr!r!r"r%�s8




zArchaicUpperLowerPlugin.feedcCs.d|_d|_d|_d|_d|_d|_d|_dS)NrFT)r1rprqrrrsrort)r r!r!r"r&�szArchaicUpperLowerPlugin.resetr'cCs|jdkrdS|j|jS)Nrg)r1rr)r r!r!r"r(�s
zArchaicUpperLowerPlugin.ratioN)	r)r*r+r3r#r%r&r-r(r!r!r!r"rn�s

*	rnc@sZeZdZdd�dd�Zdd�dd�Zddd	�d
d�Zddd	�dd
�Zedd�dd��ZdS)�ArabicIsolatedFormPluginr$)rcCsd|_d|_dS)Nr)r1�_isolated_form_count)r r!r!r"r3�sz!ArabicIsolatedFormPlugin.__init__cCsd|_d|_dS)Nr)r1ry)r r!r!r"r&�szArabicIsolatedFormPlugin.resetrr)rrcCst|�S)N)r
)r rr!r!r"r#�sz!ArabicIsolatedFormPlugin.eligiblecCs(|jd7_t|�r$|jd7_dS)Nr)r1rry)r rr!r!r"r%�szArabicIsolatedFormPlugin.feedr'cCs|jdkrdS|j|j}|S)Nr:g)r1ry)r Zisolated_form_usager!r!r"r(�s
zArabicIsolatedFormPlugin.ratioN)	r)r*r+r3r&r#r%r-r(r!r!r!r"rx�s
rxi)�maxsizez
str | Noner)rFrGrcCs�|dks|dkrdS||kr dSd|kr4d|kr4dSd|ksDd|krHdSd|ksXd|krld|kshd|krldS|�d�|�d�}}x"|D]}|tkr�q�||kr�dSq�W|dk|dk}}|s�|r�d	|ks�d	|kr�dS|r�|r�dSd
|ks�d
|k�r"d	|k�sd	|k�r
dS|dk�s|dk�r"dSd	|k�sJd	|k�sJ|dk�r�|dk�r�d
|k�s^d
|k�rbdSd|k�svd|k�rzdS|dk�s�|dk�r�dSdS)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining� )�Hiragana�Katakana�CJK�HangulzBasic Latin)r}r|�PunctuationZForms)�splitr)rFrGZkeywords_range_aZkeywords_range_b�elZrange_a_jp_charsZrange_b_jp_charsr!r!r"rE�sR


rEi皙�����?Frr')�decoded_sequence�maximum_threshold�debugrc	Csbdd�t��D�}t|�d}d}|dkr0d}n|dkr>d}nd	}x|t|d
t|��D]f\}}x |D]}	|	�|�rd|	�|�qdW|dkr�||dks�||dkrVtdd
�|D��}||krVPqVW|�rXtd�}
|
�	t
d|�d|�d|���t|�dk�r.|
�	t
d|dd����|
�	t
d|dd����x(|D] }|
�	t
|j�d|j����q4Wt
|d�S)zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    cSsg|]
}|��qSr!r!)rVZmd_classr!r!r"r\Jszmess_ratio.<locals>.<listcomp>rgi� iru��
rcss|]}|jVqdS)N)r()rV�dtr!r!r"rX`szmess_ratio.<locals>.<genexpr>Zcharset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=rmzStarting with: Nz
Ending with: i�z: �)r�__subclasses__rcrerfr#r%�sumr�logr�	__class__r(�round)r�r�r�Z	detectors�lengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr�index�detector�loggerr�r!r!r"�
mess_ratioAs8	


 r�N)r�F)(�
__future__r�	functoolsr�loggingrZconstantrrr�utilsr	r
rrr
rrrrrrrrrrrrrr.r7r;r=rArIrhrnrxrEr�r!r!r!r"�<module>s"L"/%1vLJ