
^V{              1   @   s  d  d l  m Z m Z m Z d  d l m Z d  d l m Z d  d l Z d  d l	 Z	 d d l
 m Z m Z m Z m Z d d l
 m Z m Z d d l m Z d  d	 l m Z y d  d
 l m Z Wn e k
 r e Z Yn Xy d  d l m Z Wn( e k
 rGd d   d e  Z Yn Xe d d   e D  Z e d d   e D  Z e d d   e D  Z e e d d g  BZ d Z e j re	 j  e e! d   Z" n e	 j  e  Z" e# d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 d2 d3 d4 d5 g   Z$ e	 j  d6  Z% i  Z& Gd7 d8   d8 e  Z' d d9 d9 d: d;  Z( Gd< d=   d= e  Z) Gd> d?   d? e)  Z* Gd@ dA   dA e+  Z, GdB dC   dC e  Z- GdD dE   dE e  Z. dF dG   Z/ d S)H    )absolute_importdivisionunicode_literals)	text_type)http_clientN   )EOFspaceCharactersasciiLettersasciiUppercase)	encodingsReparseException)utils)StringIO)BytesIO)BufferedIOBasec               @   s   e  Z d  Z d S)r   N)__name__
__module____qualname__ r   r   ?/tmp/pip-build-9m6vxulb/pip/pip/_vendor/html5lib/inputstream.pyr      s   r   c             C   s   g  |  ] } | j  d    q S)ascii)encode).0itemr   r   r   
<listcomp>   s   	 r   c             C   s   g  |  ] } | j  d    q S)r   )r   )r   r   r   r   r   r      s   	 c             C   s   g  |  ] } | j  d    q S)r   )r   )r   r   r   r   r   r      s   	    >   <u   [---﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]z"\uD800-\uDFFF"i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i z[	- -/:-@[-`{-~]c               @   sj   e  Z d  Z d Z d d   Z d d   Z d d   Z d d	   Z d
 d   Z d d   Z	 d d   Z
 d S)BufferedStreamzBuffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    c             C   s%   | |  _  g  |  _ d d g |  _ d  S)Nr   r   )streambufferposition)selfr    r   r   r   __init__A   s    		zBufferedStream.__init__c             C   sP   d } x2 |  j  d  |  j d  D] } | t |  7} q! W| |  j d 7} | S)Nr   r   )r!   r"   len)r#   poschunkr   r   r   tellF   s
    !zBufferedStream.tellc             C   sx   | |  j    k s t  | } d } x> t |  j |  | k  rd | t |  j |  8} | d 7} q' W| | g |  _ d  S)Nr   r   )_bufferedBytesAssertionErrorr%   r!   r"   )r#   r&   offsetir   r   r   seekM   s    zBufferedStream.seekc             C   sp   |  j  s |  j |  S|  j d t |  j   k r_ |  j d t |  j  d  k r_ |  j |  S|  j |  Sd  S)Nr   r   r   )r!   _readStreamr"   r%   _readFromBuffer)r#   bytesr   r   r   readV   s    	 zBufferedStream.readc             C   s   t  d d   |  j D  S)Nc             S   s   g  |  ] } t  |   q Sr   )r%   )r   r   r   r   r   r   `   s   	 z1BufferedStream._bufferedBytes.<locals>.<listcomp>)sumr!   )r#   r   r   r   r)   _   s    zBufferedStream._bufferedBytesc             C   sJ   |  j  j |  } |  j j |  |  j d d 7<t |  |  j d <| S)Nr   r   )r    r1   r!   appendr"   r%   )r#   r0   datar   r   r   r.   b   s
    zBufferedStream._readStreamc             C   s%  | } g  } |  j  d } |  j  d } x | t |  j  k  r | d k r | d k s\ t  |  j | } | t |  | k r | } | | | g |  _  n/ t |  | } | t |  g |  _  | d 7} | j | | | |   | | 8} d } q) W| r| j |  j |   n  d j |  S)Nr   r       )r"   r%   r!   r*   r3   r.   join)r#   r0   remainingBytesrvbufferIndexbufferOffsetbufferedDatabytesToReadr   r   r   r/   i   s&    $


zBufferedStream._readFromBufferN)r   r   r   __doc__r$   r(   r-   r1   r)   r.   r/   r   r   r   r   r   :   s   		r   Tc             C   s   t  |  t j  r d } n9 t |  d  rE t  |  j d  t  } n t  |  t  } | r | d  k	 ru t d   n  t |   St |  | | |  Sd  S)NFr1   r   z7Cannot explicitly set an encoding with a unicode string)	
isinstancer   HTTPResponsehasattrr1   r   	TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourceencoding	parseMetachardet	isUnicoder   r   r   HTMLInputStream   s    	
rI   c               @   s   e  Z d  Z d Z d Z d d   Z d d   Z d d   Z d	 d
   Z d d   Z	 d d   Z
 d d d  Z d d   Z d d   Z d d d  Z d d   Z d S)rB   zProvides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i (  c             C   s   t  j s d |  _ d |  _ n] t d  d k rW |  j |  _ t j t d   |  _ n$ |  j	 |  _ t j t d   |  _ d g |  _
 d	 |  _ |  j |  |  _ |  j   d S)
a  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        parseMeta - Look for a <meta> element containing encoding information

        Nu   􏿿r   z"[\uD800-\uDFFF]"zJ"([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])"r   utf-8certain)rJ   zcertain)r   supports_lone_surrogatesreportCharacterErrorsreplaceCharactersRegexpr%   characterErrorsUCS4recompileevalcharacterErrorsUCS2newLinescharEncoding
openStream
dataStreamreset)r#   rD   r   r   r   r$      s    			zHTMLUnicodeInputStream.__init__c             C   sC   d |  _  d |  _ d |  _ g  |  _ d |  _ d |  _ d  |  _ d  S)N r   )r'   	chunkSizechunkOffseterrorsprevNumLinesprevNumCols_bufferedCharacter)r#   r   r   r   rX      s    						zHTMLUnicodeInputStream.resetc             C   s(   t  | d  r | } n t |  } | S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r1   )r@   r   )r#   rD   r    r   r   r   rV      s    	z!HTMLUnicodeInputStream.openStreamc             C   st   |  j  } | j d d |  } |  j | } | j d d |  } | d k r\ |  j | } n | | d } | | f S)N
r   r   r   )r'   countr]   rfindr^   )r#   r+   r'   nLinespositionLinelastLinePospositionColumnr   r   r   	_position   s    	z HTMLUnicodeInputStream._positionc             C   s&   |  j  |  j  \ } } | d | f S)z:Returns (line, col) of the current position in the stream.r   )rg   r[   )r#   linecolr   r   r   r"      s    zHTMLUnicodeInputStream.positionc             C   sL   |  j  |  j k r% |  j   s% t Sn  |  j  } |  j | } | d |  _  | S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r   )r[   rZ   	readChunkr   r'   )r#   r[   charr   r   r   rk      s    	zHTMLUnicodeInputStream.charNc             C   sj  | d  k r |  j  } n  |  j |  j  \ |  _ |  _ d |  _ d |  _ d |  _ |  j j |  } |  j	 r |  j	 | } d  |  _	 n
 | s d St
 |  d k r t | d  } | d k s d | k o d k n r | d |  _	 | d  d  } q n  |  j r*|  j |  |  j j d |  } n  | j d	 d
  } | j d d
  } | |  _ t
 |  |  _ d S)NrY   r   Fr      i   i  u   �z
r`   Tr   r   r   )_defaultChunkSizerg   rZ   r]   r^   r'   r[   rW   r1   r_   r%   ordrM   rN   subreplace)r#   rZ   r4   lastvr   r   r   rj     s2    				(		z HTMLUnicodeInputStream.readChunkc             C   s:   x3 t  t t j |    D] } |  j j d  q Wd  S)Nzinvalid-codepoint)ranger%   invalid_unicode_refindallr\   r3   )r#   r4   r,   r   r   r   rO   +  s    "z*HTMLUnicodeInputStream.characterErrorsUCS4c             C   s  d } x t  j |  D] } | r( q n  t | j    } | j   } t j | | | d   r t j | | | d   } | t k r |  j	 j
 d  n  d } q | d k r | d k r | t |  d k r |  j	 j
 d  q d } |  j	 j
 d  q Wd  S)NF   zinvalid-codepointTi   i  r   )rt   finditerro   groupstartr   isSurrogatePairsurrogatePairToCodepointnon_bmp_invalid_codepointsr\   r3   r%   )r#   r4   skipmatch	codepointr&   char_valr   r   r   rS   /  s     	z*HTMLUnicodeInputStream.characterErrorsUCS2Fc       
      C   sq  y t  | | f } Wn t k
 r x& | D] } t |  d k  s+ t  q+ Wd j d d   | D  } | s| d | } n  t j d |  } t  | | f <Yn Xg  } x | j |  j |  j	  } | d k r |  j	 |  j
 k r-Pq-nE | j   } | |  j
 k r-| j |  j |  j	 |   | |  _	 Pn  | j |  j |  j	 d   |  j   s Pq q Wd j |  }	 |	 S)z Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
           rY   c             S   s    g  |  ] } d  t  |   q S)z\x%02x)ro   )r   cr   r   r   r   T  s   	 z5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)charsUntilRegExKeyErrorro   r*   r6   rP   rQ   r~   r'   r[   rZ   endr3   rj   )
r#   
charactersoppositecharsr   regexr8   mr   rr   r   r   
charsUntilF  s2    &	z!HTMLUnicodeInputStream.charsUntilc             C   so   | d  k	 rk |  j  d k r= | |  j |  _ |  j d 7_ qk |  j  d 8_  |  j |  j  | k sk t  n  d  S)Nr   r   )r[   r'   rZ   r*   )r#   rk   r   r   r   ungetu  s    zHTMLUnicodeInputStream.unget)r   r   r   r=   rn   r$   rX   rV   rg   r"   rk   rj   rO   rS   r   r   r   r   r   r   rB      s   &)/rB   c               @   sy   e  Z d  Z d Z d d d d d  Z d d   Z d d	   Z d d d
 d  Z d d   Z d d   Z	 d d   Z
 d S)rC   zProvides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    NTc             C   s   |  j  |  |  _ t j |  |  j  t |  d f |  _ d |  _ d |  _ d |  _ |  j d d k r |  j	 | |  |  _ n  |  j
   d S)a  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        parseMeta - Look for a <meta> element containing encoding information

        rK   i   d   zwindows-1252r   N)rV   	rawStreamrB   r$   	codecNamerU   numBytesMetanumBytesChardetdefaultEncodingdetectEncodingrX   )r#   rD   rE   rF   rG   r   r   r   r$     s    			zHTMLBinaryInputStream.__init__c             C   s6   t  j |  j d  |  j d  |  _ t j |   d  S)Nr   rq   )codecs	getreaderrU   r   rW   rB   rX   )r#   r   r   r   rX     s    zHTMLBinaryInputStream.resetc          	   C   sV   t  | d  r | } n t |  } y | j | j    Wn t |  } Yn X| S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r1   )r@   r   r-   r(   r   )r#   rD   r    r   r   r   rV     s    	z HTMLBinaryInputStream.openStreamc       
      C   s  |  j    } d } | d  k r9 | r9 |  j   } d } n  | d  k r:| r:d } y y d d l m } Wn" t k
 r d d l m } Yn Xg  } |   } x[ | j s |  j j |  j	  } t
 | t  s t  | s Pn  | j |  | j |  q W| j   | j d } |  j j d  Wq:t k
 r6Yq:Xn  | d  k rXd } |  j } n  i d d 6}	 | j   |	 k r|	 | j   } n  | | f S)NrK   	tentativer   )UniversalDetectorrE   zwindows-1252z
iso-8859-1)	detectBOMdetectEncodingMetacharade.universaldetectorr   ImportErrorchardet.universaldetectordoner   r1   r   r>   r0   r*   r3   feedcloseresultr-   r   lower)
r#   rF   rG   rE   
confidencer   buffersdetectorr!   encodingSubr   r   r   r     sB    		
z$HTMLBinaryInputStream.detectEncodingc             C   s   |  j  d d k s t  t |  } | d	 k r: d } n  | d  k rJ d  S| |  j  d k rv |  j  d d f |  _  nF |  j j d  |  j   | d f |  _  t d |  j  d | f   d  S)
Nr   rK   utf-16	utf-16-be	utf-16-lezutf-8r   zEncoding changed from %s to %s)r   r   r   )rU   r*   r   r   r-   rX   r   )r#   newEncodingr   r   r   changeEncoding  s    	
z$HTMLBinaryInputStream.changeEncodingc             C   s   i d t  j 6d t  j 6d t  j 6d t  j 6d t  j 6} |  j j d  } t | t	  s_ t
  | j | d d   } d } | s | j |  } d } | s | j | d d	   } d	 } q n  |  j j | r | p d
  | S)zAttempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Nonezutf-8z	utf-16-lez	utf-16-bez	utf-32-lez	utf-32-be   N   rv   r   )r   BOM_UTF8BOM_UTF16_LEBOM_UTF16_BEBOM_UTF32_LEBOM_UTF32_BEr   r1   r>   r0   r*   getr-   )r#   bomDictstringrE   r-   r   r   r   r   
  s     
zHTMLBinaryInputStream.detectBOMc             C   sk   |  j  j |  j  } t | t  s* t  t |  } |  j  j d  | j   } | d k rg d } n  | S)z9Report the encoding declared by the meta element
        r   utf-16	utf-16-be	utf-16-lezutf-8)r   r   r   )	r   r1   r   r>   r0   r*   EncodingParserr-   getEncoding)r#   r!   parserrE   r   r   r   r   )  s    	z(HTMLBinaryInputStream.detectEncodingMeta)r   r   r   r=   r$   rX   rV   r   r   r   r   r   r   r   r   rC     s   (-rC   c               @   s   e  Z d  Z d Z d d   Z d d   Z d d   Z d d	   Z d
 d   Z d d   Z	 d d   Z
 d d   Z e e e
  Z d d   Z e e  Z e d d  Z d d   Z d d   Z d d   Z d S)EncodingByteszString-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedc             C   s+   t  | t  s t  t j |  | j    S)N)r>   r0   r*   __new__r   )r#   valuer   r   r   r   <  s    zEncodingBytes.__new__c             C   s   d |  _  d  S)Nr   r   )rg   )r#   r   r   r   r   r$   @  s    zEncodingBytes.__init__c             C   s   |  S)Nr   )r#   r   r   r   __iter__C  s    zEncodingBytes.__iter__c             C   sV   |  j  d } |  _  | t |   k r/ t  n | d k  rD t  n  |  | | d  S)Nr   r   )rg   r%   StopIterationrA   )r#   pr   r   r   __next__F  s    		zEncodingBytes.__next__c             C   s
   |  j    S)N)r   )r#   r   r   r   nextN  s    zEncodingBytes.nextc             C   s\   |  j  } | t |   k r$ t  n | d k  r9 t  n  | d |  _  } |  | | d  S)Nr   r   )rg   r%   r   rA   )r#   r   r   r   r   previousR  s    			zEncodingBytes.previousc             C   s+   |  j  t |   k r t  n  | |  _  d  S)N)rg   r%   r   )r#   r"   r   r   r   setPosition[  s    	zEncodingBytes.setPositionc             C   s<   |  j  t |   k r t  n  |  j  d k r4 |  j  Sd  Sd  S)Nr   )rg   r%   r   )r#   r   r   r   getPosition`  s
    	zEncodingBytes.getPositionc             C   s   |  |  j  |  j  d  S)Nr   )r"   )r#   r   r   r   getCurrentBytej  s    zEncodingBytes.getCurrentBytec             C   sf   |  j  } xM | t |   k  rX |  | | d  } | | k rK | |  _ | S| d 7} q W| |  _ d S)zSkip past a list of charactersr   N)r"   r%   rg   )r#   r   r   r   r   r   r   r}   o  s    			zEncodingBytes.skipc             C   sf   |  j  } xM | t |   k  rX |  | | d  } | | k rK | |  _ | S| d 7} q W| |  _ d  S)Nr   )r"   r%   rg   )r#   r   r   r   r   r   r   	skipUntil{  s    			zEncodingBytes.skipUntilc             C   sT   |  j  } |  | | t |   } | j |  } | rP |  j  t |  7_  n  | S)zLook for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r"   r%   
startswith)r#   r0   r   r4   r8   r   r   r   
matchBytes  s    	zEncodingBytes.matchBytesc             C   sn   |  |  j  d  j |  } | d k rd |  j d k rC d |  _ n  |  j | t |  d 7_ d St  d S)zLook for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchNr   r   Tr   r   )r"   findrg   r%   r   )r#   r0   newPositionr   r   r   jumpTo  s    zEncodingBytes.jumpToN)r   r   r   r=   r   r$   r   r   r   r   r   r   propertyr"   r   currentBytespaceCharactersBytesr}   r   r   r   r   r   r   r   r   8  s    	r   c               @   s   e  Z d  Z d Z d d   Z d d   Z d d   Z d d	   Z d
 d   Z d d   Z	 d d   Z
 d d   Z d d   Z d S)r   z?Mini parser for detecting character encoding from meta elementsc             C   s   t  |  |  _ d |  _ d S)z3string - the data to work on for encoding detectionN)r   r4   rE   )r#   r4   r   r   r   r$     s    zEncodingParser.__init__c             C   s   d |  j  f d |  j f d |  j f d |  j f d |  j f d |  j f f } xw |  j D]l } d } xS | D]K \ } } |  j j |  rk y |   } PWq t k
 r d } PYq Xqk qk W| sX PqX qX W|  j S)	Ns   <!--s   <metas   </s   <!s   <?r   TF)	handleComment
handleMetahandlePossibleEndTaghandleOtherhandlePossibleStartTagr4   r   r   rE   )r#   methodDispatchbytekeepParsingkeymethodr   r   r   r     s&    	zEncodingParser.getEncodingc             C   s   |  j  j d  S)zSkip over commentss   -->)r4   r   )r#   r   r   r   r     s    zEncodingParser.handleCommentc             C   sE  |  j  j t k r d Sd } d  } x|  j   } | d  k rA d S| d d k r | d d k } | r=| d  k	 r=| |  _ d Sq% | d d k r | d } t |  } | d  k	 r=| |  _ d Sq% | d d k r% t t | d   } | j   } | d  k	 r=t |  } | d  k	 r:| r.| |  _ d S| } q:q=q% q% Wd  S)	NTFr   s
   http-equivr   s   content-types   charsets   content)	r4   r   r   getAttributerE   r   ContentAttrParserr   parse)r#   	hasPragmapendingEncodingattrtentativeEncodingcodeccontentParserr   r   r   r     s:    	
		zEncodingParser.handleMetac             C   s   |  j  d  S)NF)handlePossibleTag)r#   r   r   r   r     s    z%EncodingParser.handlePossibleStartTagc             C   s   t  |  j  |  j d  S)NT)r   r4   r   )r#   r   r   r   r     s    z#EncodingParser.handlePossibleEndTagc             C   s   |  j  } | j t k r9 | r5 | j   |  j   n  d S| j t  } | d k ra | j   n+ |  j   } x | d  k	 r |  j   } qp Wd S)NTr   )r4   r   asciiLettersBytesr   r   r   spacesAngleBracketsr   )r#   endTagr4   r   r   r   r   r   r     s    	
z EncodingParser.handlePossibleTagc             C   s   |  j  j d  S)Nr   )r4   r   )r#   r   r   r   r     s    zEncodingParser.handleOtherc             C   s  |  j  } | j t t d g  B } | d k sI t |  d k sI t  | d	 k rY d Sg  } g  } x | d k r~ | r~ Pnz | t k r | j   } Pn^ | d
 k r d j |  d f S| t k r | j | j	    n | d k r d S| j |  t
 |  } qh W| d k r1| j   d j |  d f St
 |  | j   } | d k r| } x t
 |  } | | k rt
 |  d j |  d j |  f S| t k r| j | j	    q\| j |  q\Wn^ | d k rd j |  d f S| t k r| j | j	    n | d k r!d S| j |  xw t
 |  } | t k red j |  d j |  f S| t k r| j | j	    q1| d k rd S| j |  q1Wd S)z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None   /Nr   r      =r5      '   ")r   N)r   r   )r   r   )r4   r}   r   	frozensetr%   r*   r6   asciiUppercaseBytesr3   r   r   r   r   )r#   r4   r   attrName	attrValue	quoteCharr   r   r   r     sh    	$


zEncodingParser.getAttributeN)r   r   r   r=   r$   r   r   r   r   r   r   r   r   r   r   r   r   r     s   $r   c               @   s(   e  Z d  Z d d   Z d d   Z d S)r   c             C   s"   t  | t  s t  | |  _ d  S)N)r>   r0   r*   r4   )r#   r4   r   r   r   r$   U  s    zContentAttrParser.__init__c             C   sN  y1|  j  j d  |  j  j d 7_ |  j  j   |  j  j d k sH d  S|  j  j d 7_ |  j  j   |  j  j d k r |  j  j } |  j  j d 7_ |  j  j } |  j  j |  r |  j  | |  j  j  Sd  Sn] |  j  j } y+ |  j  j t  |  j  | |  j  j  SWn# t k
 r/|  j  | d   SYn XWn t k
 rId  SYn Xd  S)Ns   charsetr   r   r   r   )r   r   )r4   r   r"   r}   r   r   r   r   )r#   	quoteMarkoldPositionr   r   r   r   Y  s.    zContentAttrParser.parseN)r   r   r   r$   r   r   r   r   r   r   T  s   r   c             C   st   t  |  t  r> y |  j d  }  Wq> t k
 r: d SYq> Xn  |  rl t j d |   j   } t j | d  Sd Sd S)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.r   NrY   )	r>   r0   decodeUnicodeDecodeErrorascii_punctuation_rerp   r   r   r   )rE   canonicalNamer   r   r   r   {  s    r   )0
__future__r   r   r   pip._vendor.sixr   pip._vendor.six.movesr   r   rP   	constantsr   r	   r
   r   r   r   rY   r   ior   r   r   r   objectr   r   r   r   r   invalid_unicode_no_surrogaterL   rQ   rR   rt   setr|   r   r   r   rI   rB   rC   r0   r   r   r   r   r   r   r   r   <module>   sT   "		Jg'