3
g0                 @   s   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ G d
d deZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N   )CharSetGroupProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MBCSGroupProber)SBCSGroupProberc            	   @   sn   e Zd ZdZdZejdZejdZejdZ	dddd	d
ddddZ
ejfddZdd Zdd Zdd ZdS )UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13c             C   sN   d | _ g | _d | _d | _d | _d | _d | _|| _tj	t
| _d | _| j  d S )N)_esc_charset_prober_charset_probersresultdone	_got_data_input_state
_last_charlang_filterlogging	getLogger__name__logger_has_win_bytesreset)selfr    r   c/var/www/tester-filtro-web/env/lib/python3.6/site-packages/pip/_vendor/chardet/universaldetector.py__init__Q   s    zUniversalDetector.__init__c             C   sZ   dddd| _ d| _d| _d| _tj| _d| _| jr>| jj	  x| j
D ]}|j	  qFW dS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Ng        )encoding
confidencelanguageF    )r   r   r   r   r   
PURE_ASCIIr   r   r   r   r   )r   proberr   r   r   r   ^   s    
zUniversalDetector.resetc             C   s>  | j r
dS t|sdS t|ts(t|}| js|jtjrJdddd| _nv|jtj	tj
frldddd| _nT|jdrdddd| _n:|jd	rd
ddd| _n |jtjtjfrdddd| _d| _| jd dk	rd| _ dS | jtjkr.| jj|rtj| _n*| jtjkr.| jj| j| r.tj| _|dd | _| jtjkr| js^t| j| _| jj|tjkr:| jj| jj | jjd| _d| _ n| jtjkr:| jst | jg| _| jt!j"@ r| jj#t$  | jj#t%  x@| jD ]6}|j|tjkr|j|j |jd| _d| _ P qW | j&j|r:d| _'dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIGg      ? )r   r   r    zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143zUTF-16Tr   r   )(r   len
isinstance	bytearrayr   
startswithcodecsBOM_UTF8r   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr   r   r"   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr   Z	ESC_ASCIIr   r   r   feedr   ZFOUND_ITcharset_nameget_confidencer    r   r	   r   ZNON_CJKappendr
   r   WIN_BYTE_DETECTORr   )r   byte_strr#   r   r   r   r4   o   s|    





zUniversalDetector.feedc       	      C   s  | j r| jS d| _ | js&| jjd n| jtjkrBdddd| _n| jtjkrd}d}d}x,| j	D ]"}|slqb|j
 }||krb|}|}qbW |r|| jkr|j}|jj }|j
 }|jd	r| jr| jj||}|||jd| _| jj tjkrz| jd
 dkrz| jjd xn| j	D ]d}|s qt|trZxF|jD ] }| jjd|j|j|j
  q4W n| jjd|j|j|j
  qW | jS )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!asciig      ?r$   )r   r   r    Ng        ziso-8859r   z no probers hit minimum thresholdz%s %s confidence = %s)r   r   r   r   debugr   r   r"   r2   r   r6   MINIMUM_THRESHOLDr5   lowerr)   r   ISO_WIN_MAPgetr    getEffectiveLevelr   DEBUGr'   r   Zprobers)	r   Zprober_confidenceZmax_prober_confidenceZ
max_proberr#   r5   lower_charset_namer   Zgroup_proberr   r   r   close   s`    	

zUniversalDetector.closeN)r   
__module____qualname____doc__r<   recompiler0   r3   r8   r>   r   ZALLr   r   r4   rC   r   r   r   r   r   3   s"   


mr   )rF   r*   r   rG   Zcharsetgroupproberr   enumsr   r   r   Z	escproberr   Zlatin1proberr   Zmbcsgroupproberr	   Zsbcsgroupproberr
   objectr   r   r   r   r   <module>$   s   