HEX
Server: Apache
System: Linux zacp120.webway.host 4.18.0-553.50.1.lve.el8.x86_64 #1 SMP Thu Apr 17 19:10:24 UTC 2025 x86_64
User: govancoz (1003)
PHP: 8.3.26
Disabled: exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Upload Files
File: //opt/alt/python37/lib/python3.7/site-packages/charset_normalizer/__pycache__/md.cpython-37.pyc
B

�bG�@s<ddlmZddlmZmZddlmZmZddlm	Z	m
Z
mZmZm
Z
mZmZmZmZmZmZmZmZmZmZGdd�d�ZGdd	�d	e�ZGd
d�de�ZGdd
�d
e�ZGdd�de�ZGdd�de�ZGdd�de�ZGdd�de�ZGdd�de�Z ee!ee!e"d�dd�Z#edd�d#e!e$e"e$d�d d!��Z%d"S)$�)�	lru_cache)�List�Optional�)�COMMON_SAFE_ASCII_CHARACTERS�UNICODE_SECONDARY_RANGE_KEYWORD)�is_accentuated�is_ascii�is_case_variable�is_cjk�is_emoticon�	is_hangul�is_hiragana�is_katakana�is_latin�is_punctuation�is_separator�	is_symbol�is_thai�
remove_accent�
unicode_rangec@sPeZdZdZeed�dd�Zedd�dd�Zdd�d	d
�Ze	e
d�dd��ZdS)
�MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    )�	character�returncCst�dS)z@
        Determine if given character should be fed in.
        N)�NotImplementedError)�selfr�r�F/opt/alt/python37/lib/python3.7/site-packages/charset_normalizer/md.py�eligibleszMessDetectorPlugin.eligibleNcCst�dS)z�
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        N)r)rrrrr�feed$szMessDetectorPlugin.feed)rcCst�dS)zB
        Permit to reset the plugin to the initial state.
        N)r)rrrr�reset+szMessDetectorPlugin.resetcCst�dS)z�
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        N)r)rrrr�ratio1szMessDetectorPlugin.ratio)�__name__�
__module__�__qualname__�__doc__�str�boolrrr �property�floatr!rrrrrsrc@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)� TooManySymbolOrPunctuationPluginN)rcCs"d|_d|_d|_d|_d|_dS)NrF)�_punctuation_count�
_symbol_count�_character_count�_last_printable_charZ_frenzy_symbol_in_word)rrrr�__init__;s
z)TooManySymbolOrPunctuationPlugin.__init__)rrcCs|��S)N)�isprintable)rrrrrrCsz)TooManySymbolOrPunctuationPlugin.eligiblecCsp|jd7_||jkrf|tkrft|�r8|jd7_n.|��dkrft|�rft|�dkrf|jd7_||_dS)NrF�)	r-r.rrr+�isdigitrrr,)rrrrrrFs
z%TooManySymbolOrPunctuationPlugin.feedcCsd|_d|_d|_dS)Nr)r+r-r,)rrrrr Xsz&TooManySymbolOrPunctuationPlugin.resetcCs0|jdkrdS|j|j|j}|dkr,|SdS)Nrgg333333�?)r-r+r,)rZratio_of_punctuationrrrr!]s


z&TooManySymbolOrPunctuationPlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrr*:sr*c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�TooManyAccentuatedPluginN)rcCsd|_d|_dS)Nr)r-�_accentuated_count)rrrrr/jsz!TooManyAccentuatedPlugin.__init__)rrcCs|��S)N)�isalpha)rrrrrrnsz!TooManyAccentuatedPlugin.eligiblecCs(|jd7_t|�r$|jd7_dS)Nr)r-rr4)rrrrrrqszTooManyAccentuatedPlugin.feedcCsd|_d|_dS)Nr)r-r4)rrrrr wszTooManyAccentuatedPlugin.resetcCs*|jdkrdS|j|j}|dkr&|SdS)Nrggffffff�?)r-r4)rZratio_of_accentuationrrrr!{s
zTooManyAccentuatedPlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrr3isr3c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�UnprintablePluginN)rcCsd|_d|_dS)Nr)�_unprintable_countr-)rrrrr/�szUnprintablePlugin.__init__)rrcCsdS)NTr)rrrrrr�szUnprintablePlugin.eligiblecCs@|��dkr.|��dkr.|dkr.|jd7_|jd7_dS)NF�r)�isspacer0r7r-)rrrrrr�s
zUnprintablePlugin.feedcCs
d|_dS)Nr)r7)rrrrr �szUnprintablePlugin.resetcCs|jdkrdS|jd|jS)Nrg�)r-r7)rrrrr!�s
zUnprintablePlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrr6�s	r6c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�SuspiciousDuplicateAccentPluginN)rcCsd|_d|_d|_dS)Nr)�_successive_countr-�_last_latin_character)rrrrr/�sz(SuspiciousDuplicateAccentPlugin.__init__)rrcCs|��ot|�S)N)r5r)rrrrrr�sz(SuspiciousDuplicateAccentPlugin.eligiblecCst|jd7_|jdk	rjt|�rjt|j�rj|��rJ|j��rJ|jd7_t|�t|j�krj|jd7_||_dS)Nr)r-r=r�isupperr<r)rrrrrr�s

z$SuspiciousDuplicateAccentPlugin.feedcCsd|_d|_d|_dS)Nr)r<r-r=)rrrrr �sz%SuspiciousDuplicateAccentPlugin.resetcCs|jdkrdS|jd|jS)Nrgr1)r-r<)rrrrr!�s
z%SuspiciousDuplicateAccentPlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrr;�sr;c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�SuspiciousRangeN)rcCsd|_d|_d|_dS)Nr)�"_suspicious_successive_range_countr-�_last_printable_seen)rrrrr/�szSuspiciousRange.__init__)rrcCs|��S)N)r0)rrrrrr�szSuspiciousRange.eligiblecCsx|jd7_|��s&t|�s&|tkr0d|_dS|jdkrD||_dSt|j�}t|�}t||�rn|jd7_||_dS)Nr)r-r9rrrAr� is_suspiciously_successive_ranger@)rr�unicode_range_a�unicode_range_brrrr�s

zSuspiciousRange.feedcCsd|_d|_d|_dS)Nr)r-r@rA)rrrrr �szSuspiciousRange.resetcCs.|jdkrdS|jd|j}|dkr*dS|S)Nrgr1g�������?)r-r@)rZratio_of_suspicious_range_usagerrrr!�s
zSuspiciousRange.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrr?�sr?c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�SuperWeirdWordPluginN)rcCs:d|_d|_d|_d|_d|_d|_d|_d|_d|_dS)NrF�)	�_word_count�_bad_word_count�_foreign_long_count�_is_current_word_bad�_foreign_long_watchr-�_bad_character_count�_buffer�_buffer_accent_count)rrrrr/�szSuperWeirdWordPlugin.__init__)rrcCsdS)NTr)rrrrrrszSuperWeirdWordPlugin.eligiblecCs�|��r�d�|j|g�|_t|�r0|jd7_|jdkr�t|�dksNt|�r�t|�dkr�t|�dkr�t	|�dkr�t
|�dkr�t|�dkr�d|_dS|js�dS|��s�t
|�s�t|��r�|j�r�|jd7_t|j�}|j|7_|dk�r:|j|dk�rd|_t|jd��r:|jd���r:|jd7_d|_|dk�r`|j�r`|jd7_d|_|j�r�|jd7_|jt|j�7_d|_d|_d|_d	|_n6|d
k�r�|��dk�r�t|��r�d|_|j|7_dS)NrFrFT�g�(\���?����r>�<�>�|�=�-�_�~)r5�joinrMrrNrKrrr
rrrr9rrrG�lenr-rJr>rIrHrLr2r)rrZ
buffer_lengthrrrrsR


 

zSuperWeirdWordPlugin.feedcCs4d|_d|_d|_d|_d|_d|_d|_d|_dS)NrFFr)rMrJrKrHrGr-rLrI)rrrrr DszSuperWeirdWordPlugin.resetcCs$|jdkr|jdkrdS|j|jS)N�
rg)rGrIrLr-)rrrrr!NszSuperWeirdWordPlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrrE�s6
rEc@s^eZdZdZdd�dd�Zeed�dd�Zedd�d	d
�Zdd�dd�Z	e
ed�d
d��ZdS)�CjkInvalidStopPluginu�
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    N)rcCsd|_d|_dS)Nr)�_wrong_stop_count�_cjk_character_count)rrrrr/\szCjkInvalidStopPlugin.__init__)rrcCsdS)NTr)rrrrrr`szCjkInvalidStopPlugin.eligiblecCs4|dkr|jd7_dSt|�r0|jd7_dS)N>�丅�丄r)r]rr^)rrrrrrcs
zCjkInvalidStopPlugin.feedcCsd|_d|_dS)Nr)r]r^)rrrrr jszCjkInvalidStopPlugin.resetcCs|jdkrdS|j|jS)N�g)r^r])rrrrr!ns
zCjkInvalidStopPlugin.ratio)
r"r#r$r%r/r&r'rrr r(r)r!rrrrr\Vsr\c@sZeZdZdd�dd�Zeed�dd�Zedd�dd	�Zdd�d
d�Ze	e
d�dd
��ZdS)�ArchaicUpperLowerPluginN)rcCs.d|_d|_d|_d|_d|_d|_d|_dS)NFrT)�_buf�_character_count_since_last_sep�_successive_upper_lower_count�#_successive_upper_lower_count_finalr-�_last_alpha_seen�_current_ascii_only)rrrrr/vsz ArchaicUpperLowerPlugin.__init__)rrcCsdS)NTr)rrrrrr�sz ArchaicUpperLowerPlugin.eligiblecCs$|��ot|�}|dk}|r�|jdkr�|jdkrV|��dkrV|jdkrV|j|j7_d|_d|_d|_d|_|j	d7_	d|_dS|jdkr�t
|�dkr�d|_|jdk	r�|��r�|j��s�|��r�|j��r�|jdkr�|jd7_d|_q�d|_nd|_|j	d7_	|jd7_||_dS)NFr�@rTr1)
r5r
rdr2rhrfrergrcr-r	r>�islower)rrZis_concernedZ	chunk_seprrrr�s8




zArchaicUpperLowerPlugin.feedcCs.d|_d|_d|_d|_d|_d|_d|_dS)NrFT)r-rdrerfrgrcrh)rrrrr �szArchaicUpperLowerPlugin.resetcCs|jdkrdS|j|jS)Nrg)r-rf)rrrrr!�s
zArchaicUpperLowerPlugin.ratio)r"r#r$r/r&r'rrr r(r)r!rrrrrbus
*	rb)rCrDrcCs~|dks|dkrdS||kr dSd|kr4d|kr4dSd|ksDd|krHdSd|ksXd|krld|kshd|krldS|�d�|�d�}}x"|D]}|tkr�q�||kr�dSq�W|dk|dk}}|s�|r�d	|ks�d	|kr�dS|r�|r�dSd
|ks�d
|k�r"d	|k�sd	|k�r
dS|dk�s|dk�r"dSd	|k�sJd	|k�sJ|dk�rz|dk�rzd
|k�s^d
|k�rbdSd|k�svd|k�rzdSdS)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining� )�Hiragana�KatakanaZCJKZHangulzBasic Latin)rmrlZPunctuationZForms)�splitr)rCrDZkeywords_range_aZkeywords_range_b�elZrange_a_jp_charsZrange_b_jp_charsrrrrB�sP


rBi)�maxsize皙�����?F)�decoded_sequence�maximum_threshold�debugrcCs�dd�t��D�}t|�d}d}|dkr0d}n|dkr>d}nd	}x|t|d
t|��D]f\}}x |D]}	|	�|�rd|	�|�qdW|dkr�||dks�||dkrVtdd
�|D��}||krVPqVW|r�x|D]}
t|
j	|
j
�q�Wt|d�S)zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    cSsg|]
}|��qSrr)�.0Zmd_classrrr�
<listcomp>szmess_ratio.<locals>.<listcomp>rgi� iri��
rcss|]}|jVqdS)N)r!)ru�dtrrr�	<genexpr>&szmess_ratio.<locals>.<genexpr>�)r�__subclasses__rZ�zip�rangerr�sum�print�	__class__r!�round)rrrsrtZ	detectors�lengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr�indexZdetectorrzrrr�
mess_ratios*	


r�N)rqF)&�	functoolsr�typingrrZconstantrr�utilsrr	r
rrr
rrrrrrrrrrr*r3r6r;r?rEr\rbr&r'rBr)r�rrrr�<module>s"D"/%6ZMD