3
ga~                  @   s  d dl mZmZmZ d dlmZ d dlmZmZ d dl	Z	d dl
Z
d dlmZmZ d dlmZ ddlmZmZmZmZ dd	lmZ dd
lmZ edd eD Zedd eD Zedd eD ZeeddgB ZdZejredE dkrejddkst e
j!eddF e"d d Z#n
e
j!eZ#ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3h Z$e
j!d4Z%i Z&G d5d6 d6e'Z(d7d8 Z)G d9d: d:e'Z*G d;d< d<e*Z+G d=d> d>e,Z-G d?d@ d@e'Z.G dAdB dBe'Z/dCdD Z0dS )G    )absolute_importdivisionunicode_literals)	text_type)http_clienturllibN)BytesIOStringIO)webencodings   )EOFspaceCharactersasciiLettersasciiUppercase)_ReparseException)_utilsc             C   s   g | ]}|j d qS )ascii)encode).0item r   _/var/www/tester-filtro-web/env/lib/python3.6/site-packages/pip/_vendor/html5lib/_inputstream.py
<listcomp>   s    r   c             C   s   g | ]}|j d qS )r   )r   )r   r   r   r   r   r      s    c             C   s   g | ]}|j d qS )r   )r   )r   r   r   r   r   r      s       >   <u   [---﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]]z"\uD800-\uDFFF"i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i z[	- -/:-@\[-`{-~]c               @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )BufferedStreamzBuffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    c             C   s   || _ g | _ddg| _d S )Nr   r   )streambufferposition)selfr   r   r   r   __init__:   s    zBufferedStream.__init__c             C   s@   d}x(| j d | jd  D ]}|t|7 }qW || jd 7 }|S )Nr   r   )r   r    len)r!   poschunkr   r   r   tell?   s
    zBufferedStream.tellc             C   sX   || j  kst|}d}x0t| j| |k rH|t| j| 8 }|d7 }qW ||g| _d S )Nr   r   )_bufferedBytesAssertionErrorr#   r   r    )r!   r$   offsetir   r   r   seekF   s    zBufferedStream.seekc             C   sT   | j s| j|S | jd t| j krF| jd t| j d krF| j|S | j|S d S )Nr   r   r   )r   _readStreamr    r#   _readFromBuffer)r!   bytesr   r   r   readO   s    

zBufferedStream.readc             C   s   t dd | jD S )Nc             S   s   g | ]}t |qS r   )r#   )r   r   r   r   r   r   Y   s    z1BufferedStream._bufferedBytes.<locals>.<listcomp>)sumr   )r!   r   r   r   r'   X   s    zBufferedStream._bufferedBytesc             C   s<   | j j|}| jj| | jd  d7  < t|| jd< |S )Nr   r   )r   r/   r   appendr    r#   )r!   r.   datar   r   r   r,   [   s
    zBufferedStream._readStreamc             C   s   |}g }| j d }| j d }x|t| jk r|dkr|dks@t| j| }|t|| krn|}||| g| _ n"t|| }|t|g| _ |d7 }|j||||   ||8 }d}qW |r|j| j| dj|S )Nr   r       )r    r#   r   r(   r1   r,   join)r!   r.   ZremainingBytesrvZbufferIndexZbufferOffsetZbufferedDataZbytesToReadr   r   r   r-   b   s&    


zBufferedStream._readFromBufferN)__name__
__module____qualname____doc__r"   r&   r+   r/   r'   r,   r-   r   r   r   r   r   3   s   		r   c             K   s   t | tjs(t | tjjr.t | jtjr.d}n&t| drJt | jdt	}n
t | t	}|rdd |D }|rvt
d| t| f|S t| f|S d S )NFr/   r   c             S   s   g | ]}|j d r|qS )Z	_encoding)endswith)r   xr   r   r   r      s    z#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
isinstancer   HTTPResponser   responseaddbasefphasattrr/   r   	TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargsZ	isUnicode	encodingsr   r   r   HTMLInputStream}   s    

rH   c               @   sp   e Zd ZdZdZdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dddZdd Zdd ZdddZdd ZdS )rC   zProvides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i (  c             C   sZ   t jsd| _ntddkr$| j| _n| j| _dg| _tddf| _| j	|| _
| j  dS )a  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu   􏿿r   r   zutf-8certain)r   supports_lone_surrogatesreportCharacterErrorsr#   characterErrorsUCS4characterErrorsUCS2ZnewLineslookupEncodingcharEncoding
openStream
dataStreamreset)r!   rE   r   r   r   r"      s    
zHTMLUnicodeInputStream.__init__c             C   s.   d| _ d| _d| _g | _d| _d| _d | _d S )N r   )r%   	chunkSizechunkOffseterrorsprevNumLinesprevNumCols_bufferedCharacter)r!   r   r   r   rR      s    zHTMLUnicodeInputStream.resetc             C   s   t |dr|}nt|}|S )zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r/   )rA   r	   )r!   rE   r   r   r   r   rP      s    
z!HTMLUnicodeInputStream.openStreamc             C   sT   | j }|jdd|}| j| }|jdd|}|dkr@| j| }n||d  }||fS )N
r   r   r   )r%   countrW   rfindrX   )r!   r)   r%   ZnLinesZpositionLineZlastLinePosZpositionColumnr   r   r   	_position   s    
z HTMLUnicodeInputStream._positionc             C   s   | j | j\}}|d |fS )z:Returns (line, col) of the current position in the stream.r   )r]   rU   )r!   linecolr   r   r   r       s    zHTMLUnicodeInputStream.positionc             C   s6   | j | jkr| j stS | j }| j| }|d | _ |S )zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r   )rU   rT   	readChunkr   r%   )r!   rU   charr   r   r   ra      s    

zHTMLUnicodeInputStream.charNc             C   s   |d kr| j }| j| j\| _| _d| _d| _d| _| jj|}| j	rX| j	| }d | _	n|s`dS t
|dkrt|d }|dksd|  kodkn  r|d | _	|d d }| jr| j| |jdd	}|jd
d	}|| _t
|| _dS )NrS   r   Fr      i   i  z
rZ   Tr   r   r   )_defaultChunkSizer]   rT   rW   rX   r%   rU   rQ   r/   rY   r#   ordrK   replace)r!   rT   r2   Zlastvr   r   r   r`      s0    
 


z HTMLUnicodeInputStream.readChunkc             C   s,   x&t ttj|D ]}| jjd qW d S )Nzinvalid-codepoint)ranger#   invalid_unicode_refindallrV   r1   )r!   r2   _r   r   r   rL     s    z*HTMLUnicodeInputStream.characterErrorsUCS4c             C   s   d}xt j|D ]}|rqt|j }|j }tj|||d  rttj|||d  }|tkrn| j	j
d d}q|dkr|dkr|t|d kr| j	j
d qd}| j	j
d qW d S )NF   zinvalid-codepointTi   i  r   )rh   finditerre   groupstartr   ZisSurrogatePairZsurrogatePairToCodepointnon_bmp_invalid_codepointsrV   r1   r#   )r!   r2   skipmatchZ	codepointr$   Zchar_valr   r   r   rM   #  s     z*HTMLUnicodeInputStream.characterErrorsUCS2Fc       
      C   s  yt ||f }W nl tk
r|   x|D ]}t|dk s&tq&W djdd |D }|s^d| }tjd|  }t ||f< Y nX g }x||j| j| j	}|dkr| j	| j
krP n0|j }|| j
kr|j| j| j	|  || _	P |j| j| j	d  | j sP qW dj|}	|	S )z Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
           rS   c             S   s   g | ]}d t | qS )z\x%02x)re   )r   cr   r   r   r   H  s    z5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)charsUntilRegExKeyErrorre   r(   r4   recompilerq   r%   rU   rT   endr1   r`   )
r!   
charactersZoppositecharsrs   Zregexr5   mrx   rr   r   r   
charsUntil:  s2    
 

z!HTMLUnicodeInputStream.charsUntilc             C   sT   |t k	rP| jdkr.|| j | _|  jd7  _n"|  jd8  _| j| j |ksPtd S )Nr   r   )r   rU   r%   rT   r(   )r!   ra   r   r   r   ungeti  s    
zHTMLUnicodeInputStream.unget)N)F)r6   r7   r8   r9   rd   r"   rR   rP   r]   r    ra   r`   rL   rM   r}   r~   r   r   r   r   rC      s    
&
/rC   c               @   sL   e Zd ZdZdddZdd Zd	d
 ZdddZdd Zdd Z	dd Z
dS )rD   zProvides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    Nwindows-1252Tc             C   sn   | j || _tj| | j d| _d| _|| _|| _|| _|| _	|| _
| j|| _| jd dk	sbt| j  dS )a  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        i   d   r   N)rP   	rawStreamrC   r"   numBytesMetanumBytesChardetoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermineEncodingrO   r(   rR   )r!   rE   r   r   r   r   r   Z
useChardetr   r   r   r"     s    zHTMLBinaryInputStream.__init__c             C   s&   | j d jj| jd| _tj|  d S )Nr   rf   )rO   Z
codec_infostreamreaderr   rQ   rC   rR   )r!   r   r   r   rR     s    zHTMLBinaryInputStream.resetc             C   sL   t |dr|}nt|}y|j|j  W n tk
rF   t|}Y nX |S )zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r/   )rA   r   r+   r&   	Exceptionr   )r!   rE   r   r   r   r   rP     s    
z HTMLBinaryInputStream.openStreamc             C   s  | j  df}|d d k	r|S t| jdf}|d d k	r:|S t| jdf}|d d k	rX|S | j df}|d d k	rt|S t| jdf}|d d k	r|d jjd r|S t| jdf}|d d k	r|S |rtyddl	m
} W n tk
r   Y nX g }| }xF|js>| jj| j}t|ts t|s(P |j| |j| qW |j  t|jd }| jjd |d k	rt|dfS t| jdf}|d d k	r|S tddfS )NrI   r   	tentativezutf-16)UniversalDetectorencodingzwindows-1252)	detectBOMrN   r   r   detectEncodingMetar   name
startswithr   Z%pip._vendor.chardet.universaldetectorr   ImportErrordoner   r/   r   r<   r.   r(   r1   feedcloseresultr+   r   )r!   chardetrO   r   Zbuffersdetectorr   r   r   r   r   r     sR    


z'HTMLBinaryInputStream.determineEncodingc             C   s   | j d dkstt|}|d kr&d S |jdkrFtd}|d k	stnT|| j d krf| j d df| _ n4| jjd |df| _ | j  td| j d |f d S )	Nr   rI   utf-16beutf-16lezutf-8r   zEncoding changed from %s to %s)r   r   )rO   r(   rN   r   r   r+   rR   r   )r!   ZnewEncodingr   r   r   changeEncoding  s    

z$HTMLBinaryInputStream.changeEncodingc          
   C   s   t jdt jdt jdt jdt jdi}| jjd}t|t	s<t
|j|dd }d}|s~|j|}d}|s~|j|dd	 }d	}|r| jj| t|S | jjd
 dS dS )zAttempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32be   N   rk   r   )codecsBOM_UTF8BOM_UTF16_LEBOM_UTF16_BEBOM_UTF32_LEBOM_UTF32_BEr   r/   r<   r.   r(   getr+   rN   )r!   ZbomDictstringr   r+   r   r   r   r     s$    
zHTMLBinaryInputStream.detectBOMc             C   sV   | j j| j}t|tstt|}| j jd |j }|dk	rR|j	dkrRt
d}|S )z9Report the encoding declared by the meta element
        r   Nutf-16beutf-16lezutf-8)r   r   )r   r/   r   r<   r.   r(   EncodingParserr+   getEncodingr   rN   )r!   r   parserr   r   r   r   r   3  s    z(HTMLBinaryInputStream.detectEncodingMeta)NNNNr   T)T)r6   r7   r8   r9   r"   rR   rP   r   r   r   r   r   r   r   r   rD   z  s     
(
>"rD   c               @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zeee
Zdd ZeeZefddZdd Zdd Zdd ZdS )EncodingByteszString-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedc             C   s   t |tsttj| |j S )N)r<   r.   r(   __new__lower)r!   valuer   r   r   r   F  s    zEncodingBytes.__new__c             C   s
   d| _ d S )Nr   r   )r]   )r!   r   r   r   r   r"   J  s    zEncodingBytes.__init__c             C   s   | S )Nr   )r!   r   r   r   __iter__N  s    zEncodingBytes.__iter__c             C   s>   | j d  }| _ |t| kr"tn|dk r.t| ||d  S )Nr   r   )r]   r#   StopIterationrB   )r!   pr   r   r   __next__Q  s    zEncodingBytes.__next__c             C   s   | j  S )N)r   )r!   r   r   r   nextY  s    zEncodingBytes.nextc             C   sB   | j }|t| krtn|dk r$t|d  | _ }| ||d  S )Nr   r   )r]   r#   r   rB   )r!   r   r   r   r   previous]  s    zEncodingBytes.previousc             C   s   | j t| krt|| _ d S )N)r]   r#   r   )r!   r    r   r   r   setPositionf  s    zEncodingBytes.setPositionc             C   s*   | j t| krt| j dkr"| j S d S d S )Nr   )r]   r#   r   )r!   r   r   r   getPositionk  s
    
zEncodingBytes.getPositionc             C   s   | | j | j d  S )Nr   )r    )r!   r   r   r   getCurrentByteu  s    zEncodingBytes.getCurrentBytec             C   sL   | j }x:|t| k r@| ||d  }||kr6|| _|S |d7 }qW || _dS )zSkip past a list of charactersr   N)r    r#   r]   )r!   rz   r   rs   r   r   r   rp   z  s    zEncodingBytes.skipc             C   sL   | j }x:|t| k r@| ||d  }||kr6|| _|S |d7 }qW || _d S )Nr   )r    r#   r]   )r!   rz   r   rs   r   r   r   	skipUntil  s    zEncodingBytes.skipUntilc             C   s(   | j || j}|r$|  jt|7  _|S )zLook for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r   r    r#   )r!   r.   r5   r   r   r   
matchBytes  s    zEncodingBytes.matchBytesc             C   s>   y | j || jt| d | _W n tk
r8   tY nX dS )zLook for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchr   T)indexr    r#   r]   
ValueErrorr   )r!   r.   r   r   r   jumpTo  s
     
zEncodingBytes.jumpToN)r6   r7   r8   r9   r   r"   r   r   r   r   r   r   propertyr    r   currentBytespaceCharactersBytesrp   r   r   r   r   r   r   r   r   B  s    	
	r   c               @   sX   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZdS )r   z?Mini parser for detecting character encoding from meta elementsc             C   s   t || _d| _dS )z3string - the data to work on for encoding detectionN)r   r2   r   )r!   r2   r   r   r   r"     s    
zEncodingParser.__init__c             C   s   d| j krd S d| jfd| jfd| jfd| jfd| jfd| jff}x| j D ]|}d}y| j jd W n tk
rx   P Y nX xD|D ]<\}}| j j|ry| }P W q tk
r   d}P Y qX qW |sJP qJW | j	S )	Ns   <metas   <!--s   </s   <!s   <?r   TF)
r2   handleComment
handleMetahandlePossibleEndTaghandleOtherhandlePossibleStartTagr   r   r   r   )r!   ZmethodDispatchrj   ZkeepParsingkeymethodr   r   r   r     s2    
zEncodingParser.getEncodingc             C   s   | j jdS )zSkip over commentss   -->)r2   r   )r!   r   r   r   r     s    zEncodingParser.handleCommentc             C   s   | j jtkrdS d}d }x| j }|d kr.dS |d dkr^|d dk}|r|d k	r|| _dS q|d dkr|d }t|}|d k	r|| _dS q|d dkrtt|d }|j }|d k	rt|}|d k	r|r|| _dS |}qW d S )	NTFr   s
   http-equivr   s   content-types   charsets   content)	r2   r   r   getAttributer   rN   ContentAttrParserr   parse)r!   Z	hasPragmaZpendingEncodingattrZtentativeEncodingcodecZcontentParserr   r   r   r     s:    zEncodingParser.handleMetac             C   s
   | j dS )NF)handlePossibleTag)r!   r   r   r   r     s    z%EncodingParser.handlePossibleStartTagc             C   s   t | j | jdS )NT)r   r2   r   )r!   r   r   r   r     s    
z#EncodingParser.handlePossibleEndTagc             C   sf   | j }|jtkr(|r$|j  | j  dS |jt}|dkrD|j  n| j }x|d k	r`| j }qNW dS )NTr   )r2   r   asciiLettersBytesr   r   r   spacesAngleBracketsr   )r!   ZendTagr2   rs   r   r   r   r   r     s    



z EncodingParser.handlePossibleTagc             C   s   | j jdS )Nr   )r2   r   )r!   r   r   r   r     s    zEncodingParser.handleOtherc             C   s  | j }|jttdgB }|dks2t|dks2t|d	kr>dS g }g }xt|dkrX|rXP nX|tkrl|j }P nD|d
krdj|dfS |tkr|j|j	  n|dkrdS |j| t
|}qHW |dkr|j  dj|dfS t
| |j }|dkrT|}xt
|}||kr(t
| dj|dj|fS |tkrB|j|j	  n
|j|  qW nJ|dkrldj|dfS |tkr|j|j	  n|dkrdS |j| x^t
|}|tkrdj|dj|fS |tkr|j|j	  n|dkrdS |j| qW dS )z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None   /Nr   r      =r3      '   ")r   N)r   r   )r   r   )r2   rp   r   	frozensetr#   r(   r4   asciiUppercaseBytesr1   r   r   r   r   )r!   r2   rs   attrName	attrValue	quoteCharr   r   r   r     sh    










zEncodingParser.getAttributeN)r6   r7   r8   r9   r"   r   r   r   r   r   r   r   r   r   r   r   r   r     s   $r   c               @   s   e Zd Zdd Zdd ZdS )r   c             C   s   t |tst|| _d S )N)r<   r.   r(   r2   )r!   r2   r   r   r   r"   a  s    zContentAttrParser.__init__c             C   s  y| j jd | j  jd7  _| j j  | j jdks8d S | j  jd7  _| j j  | j jdkr| j j}| j  jd7  _| j j}| j j|r| j || j j S d S nF| j j}y| j jt | j || j j S  tk
r   | j |d  S X W n tk
r    d S X d S )Ns   charsetr   r   r   r   )r   r   )r2   r   r    rp   r   r   r   r   )r!   Z	quoteMarkZoldPositionr   r   r   r   e  s.    

zContentAttrParser.parseN)r6   r7   r8   r"   r   r   r   r   r   r   `  s   r   c             C   s`   t | tr.y| jd} W n tk
r,   dS X | dk	rXy
tj| S  tk
rT   dS X ndS dS )z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.r   N)r<   r.   decodeUnicodeDecodeErrorr
   lookupAttributeError)r   r   r   r   rN     s    

rN   r   r   )1
__future__r   r   r   pip._vendor.sixr   pip._vendor.six.movesr   r   r   rv   ior   r	   pip._vendorr
   	constantsr   r   r   r   r   rS   r   r   r   r   r   r   Zinvalid_unicode_no_surrogaterJ   r[   r(   rw   evalrh   ro   Zascii_punctuation_rert   objectr   rH   rC   rD   r.   r   r   r   rN   r   r   r   r   <module>   sP   








J g Ib ='