
^V,                @   s	  d  d l  m Z m Z m Z y
 e Z Wn e k
 r: Yn Xd  d l m Z d d l	 m
 Z
 d d l	 m Z d d l	 m Z m Z d d l	 m Z m Z m Z d d l	 m Z m Z d d	 l	 m Z d d
 l m Z d d l m Z e e  Z Gd d   d e  Z d S)    )absolute_importdivisionunicode_literals)deque   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Triec            	       s  e  Z d  Z d Z d d d d d d   f d d  Z d d   Z d d	   Z d d
 d d  Z d d   Z d d   Z	 d d   Z
 d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z d d    Z d! d"   Z d# d$   Z d% d&   Z d' d(   Z d) d*   Z d+ d,   Z d- d.   Z d/ d0   Z d1 d2   Z d3 d4   Z d5 d6   Z d7 d8   Z d9 d:   Z d; d<   Z d= d>   Z  d? d@   Z! dA dB   Z" dC dD   Z# dE dF   Z$ dG dH   Z% dI dJ   Z& dK dL   Z' dM dN   Z( dO dP   Z) dQ dR   Z* dS dT   Z+ dU dV   Z, dW dX   Z- dY dZ   Z. d[ d\   Z/ d] d^   Z0 d_ d`   Z1 da db   Z2 dc dd   Z3 de df   Z4 dg dh   Z5 di dj   Z6 dk dl   Z7 dm dn   Z8 do dp   Z9 dq dr   Z: ds dt   Z; du dv   Z< dw dx   Z= dy dz   Z> d{ d|   Z? d} d~   Z@ d d   ZA d d   ZB d d   ZC d d   ZD d d   ZE d d   ZF d d   ZG d d   ZH d d   ZI d d   ZJ d d   ZK d d   ZL   S)HTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    NTc                sz   t  | | | |  |  _ | |  _ | |  _ | |  _ d |  _ g  |  _ |  j |  _ d |  _	 d  |  _
 t t |   j   d  S)NF)r   streamparserlowercaseElementNamelowercaseAttrName
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   encoding	parseMeta
useChardetr   r   r   )	__class__ =/tmp/pip-build-9m6vxulb/pip/pip/_vendor/html5lib/tokenizer.pyr   %   s    							zHTMLTokenizer.__init__c             c   s}   t  g   |  _ xg |  j   rx x6 |  j j rV i t d d 6|  j j j d  d 6Vq! Wx |  j rt |  j j   VqZ Wq Wd S)z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrortyper   dataN)r   
tokenQueuer   r   errorsr   poppopleft)r    r%   r%   r&   __iter__9   s    *zHTMLTokenizer.__iter__c       	   %   C   s  t  } d } | r! t } d } n  g  } |  j j   } x8 | | k rp | t k	 rp | j |  |  j j   } q9 Wt d j |  |  } | t k r t | } |  j	 j i t
 d d 6d d 6i | d 6d	 6 nd
 | k o d k n s | d k r3d } |  j	 j i t
 d d 6d d 6i | d 6d	 6 nsd | k oJd k n sd | k ofd k n sd | k od k n sd | k od k n s| t d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d g#  k rQ|  j	 j i t
 d d 6d d 6i | d 6d	 6 n  y t |  } WnB t k
 r| d8 } t d
 | d ?B t d9 | d: @B } Yn X| d; k r|  j	 j i t
 d d 6d< d 6 |  j j |  n  | S)=zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r'   r(   z$illegal-codepoint-for-numeric-entityr)   	charAsIntdatavarsi   i  i u   �r                  i  i     i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i   i   i  ;z numeric-entity-without-semicolon)r   r   r   charr   appendintjoinr   r*   r   	frozensetchr
ValueErrorunget)	r    isHexallowedradix	charStackcr2   r;   vr%   r%   r&   consumeNumberEntityI   s`    	

+z!HTMLTokenizer.consumeNumberEntityFc       	      C   s  d } |  j  j   g } | d t k s] | d t d d f k s] | d  k	 rt | | d k rt |  j  j | d  n| d d k rpd } | j |  j  j    | d d k r d	 } | j |  j  j    n  | r | d t k s | r"| d t k r"|  j  j | d  |  j |  } qD|  j	 j i t
 d
 d 6d d 6 |  j  j | j    d d j |  } nxF | d t k	 rt j d j |   sPn  | j |  j  j    qsWy2 t j d j | d  d    } t |  } Wn t k
 rd  } Yn X| d  k	 r| d d k rG|  j	 j i t
 d
 d 6d d 6 n  | d d k r| r| | t k s| | t k s| | d k r|  j  j | j    d d j |  } qDt | } |  j  j | j    | d j | | d    7} nK |  j	 j i t
 d
 d 6d d 6 |  j  j | j    d d j |  } | rf|  j d d d | 7<n= | t k r{d } n d } |  j	 j i t
 | d 6| d 6 d  S)N&r   <#Fr   xXTr'   r(   zexpected-numeric-entityr)   r1   r:   znamed-entity-without-semicolon=zexpected-named-entitySpaceCharacters
Characters)rM   rN   rR   rR   rR   rR   rR   rR   rR   rR   )r   r;   r   r   rB   r<   r   r   rI   r*   r   r,   r>   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr	   r   r   )	r    allowedCharfromAttributeoutputrF   hex
entityNameentityLength	tokenTyper%   r%   r&   consumeEntity   sf    )"
 	zHTMLTokenizer.consumeEntityc             C   s   |  j  d | d d  d S)zIThis method replaces the need for "entityInAttributeValueState".
        rX   rY   TN)r_   )r    rX   r%   r%   r&   processEntityInAttribute   s    z&HTMLTokenizer.processEntityInAttributec             C   s   |  j  } | d t k r |  j r< | d j t  | d <n  | d t d k r | d r |  j j i t d d 6d d 6 n  | d r |  j j i t d d 6d d 6 q q n  |  j j |  |  j |  _	 d	 S)
zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r(   nameEndTagr)   r'   zattributes-in-end-tagselfClosingzself-closing-flag-on-end-tagN)
r   r   r   	translater
   r   r*   r<   r   r   )r    tokenr%   r%   r&   emitCurrentToken   s    		

zHTMLTokenizer.emitCurrentTokenc             C   s(  |  j  j   } | d k r* |  j |  _ n | d k rE |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j j i t d d 6d d 6 n | t k r d	 S| t	 k r |  j j i t d
 d 6| |  j  j
 t	 d  d 6 n8 |  j  j
 d  } |  j j i t d d 6| | d 6 d S)NrJ   rK    r'   r(   zinvalid-codepointr)   rQ   FrP   T)rJ   rK   rg   )r   r;   entityDataStater   tagOpenStater*   r<   r   r   r   
charsUntil)r    r)   charsr%   r%   r&   r      s&    !zHTMLTokenizer.dataStatec             C   s   |  j    |  j |  _ d S)NT)r_   r   r   )r    r%   r%   r&   rh     s    
zHTMLTokenizer.entityDataStatec             C   s(  |  j  j   } | d k r* |  j |  _ n | d k rE |  j |  _ n | t k rU d S| d k r |  j j i t d d 6d d 6 |  j j i t d	 d 6d
 d 6 n| | t	 k r |  j j i t d d 6| |  j  j
 t	 d  d 6 n8 |  j  j
 d  } |  j j i t d	 d 6| | d 6 d S)NrJ   rK   Frg   r'   r(   zinvalid-codepointr)   rQ   u   �rP   T)rJ   rK   rg   )r   r;   characterReferenceInRcdatar   rcdataLessThanSignStater   r*   r<   r   r   rj   )r    r)   rk   r%   r%   r&   rcdataState  s&    !zHTMLTokenizer.rcdataStatec             C   s   |  j    |  j |  _ d S)NT)r_   rn   r   )r    r%   r%   r&   rl   :  s    
z(HTMLTokenizer.characterReferenceInRcdatac             C   s   |  j  j   } | d k r* |  j |  _ n | d k r} |  j j i t d d 6d d 6 |  j j i t d d 6d d 6 nH | t k r d	 S|  j  j d  } |  j j i t d d 6| | d 6 d
 S)NrK   rg   r'   r(   zinvalid-codepointr)   rQ   u   �FT)rK   rg   )	r   r;   rawtextLessThanSignStater   r*   r<   r   r   rj   )r    r)   rk   r%   r%   r&   rawtextState?  s    zHTMLTokenizer.rawtextStatec             C   s   |  j  j   } | d k r* |  j |  _ n | d k r} |  j j i t d d 6d d 6 |  j j i t d d 6d d 6 nH | t k r d	 S|  j  j d  } |  j j i t d d 6| | d 6 d
 S)NrK   rg   r'   r(   zinvalid-codepointr)   rQ   u   �FT)rK   rg   )	r   r;   scriptDataLessThanSignStater   r*   r<   r   r   rj   )r    r)   rk   r%   r%   r&   scriptDataStateQ  s    zHTMLTokenizer.scriptDataStatec             C   s   |  j  j   } | t k r d S| d k rr |  j j i t d d 6d d 6 |  j j i t d d 6d d 6 n2 |  j j i t d d 6| |  j  j d  d 6 d	 S)
NFrg   r'   r(   zinvalid-codepointr)   rQ   u   �T)r   r;   r   r*   r<   r   rj   )r    r)   r%   r%   r&   plaintextStatec  s    zHTMLTokenizer.plaintextStatec             C   s  |  j  j   } | d k r* |  j |  _ nr| d k rE |  j |  _ nW| t k r i t d d 6| d 6g  d 6d d 6d d	 6|  _ |  j |  _ n| d
 k r |  j	 j
 i t d d 6d d 6 |  j	 j
 i t d d 6d d 6 |  j |  _ n | d k r<|  j	 j
 i t d d 6d d 6 |  j  j |  |  j |  _ n` |  j	 j
 i t d d 6d d 6 |  j	 j
 i t d d 6d d 6 |  j  j |  |  j |  _ d S)N!/StartTagr(   ra   r)   Frc   selfClosingAcknowledged>r'   z'expected-tag-name-but-got-right-bracketrQ   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerK   T)r   r;   markupDeclarationOpenStater   closeTagOpenStater	   r   r   tagNameStater*   r<   r   rB   bogusCommentState)r    r)   r%   r%   r&   ri   r  s6    ""zHTMLTokenizer.tagOpenStatec             C   s?  |  j  j   } | t k rS i t d d 6| d 6g  d 6d d 6|  _ |  j |  _ n | d k r |  j j i t d d 6d	 d 6 |  j	 |  _ n | t
 k r |  j j i t d d 6d
 d 6 |  j j i t d d 6d d 6 |  j	 |  _ nL |  j j i t d d 6d d 6i | d 6d 6 |  j  j |  |  j |  _ d S)Nrb   r(   ra   r)   Frc   rx   r'   z*expected-closing-tag-but-got-right-bracketz expected-closing-tag-but-got-eofrQ   z</z!expected-closing-tag-but-got-charr3   T)r   r;   r	   r   r   r|   r   r*   r<   r   r   rB   r}   )r    r)   r%   r%   r&   r{     s(    "zHTMLTokenizer.closeTagOpenStatec             C   s   |  j  j   } | t k r* |  j |  _ n | d k rC |  j   n | t k r |  j j i t	 d d 6d d 6 |  j
 |  _ nn | d k r |  j |  _ nS | d k r |  j j i t	 d d 6d d 6 |  j d	 d
 7<n |  j d	 | 7<d S)Nrx   r'   r(   zeof-in-tag-namer)   ru   rg   zinvalid-codepointra   u   �T)r   r;   r   beforeAttributeNameStater   rf   r   r*   r<   r   r   selfClosingStartTagStater   )r    r)   r%   r%   r&   r|     s"    zHTMLTokenizer.tagNameStatec             C   su   |  j  j   } | d k r3 d |  _ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j |  |  j	 |  _ d S)Nru   r1   rQ   r(   rK   r)   T)
r   r;   temporaryBufferrcdataEndTagOpenStater   r*   r<   r   rB   rn   )r    r)   r%   r%   r&   rm     s    	"z%HTMLTokenizer.rcdataLessThanSignStatec             C   s{   |  j  j   } | t k r9 |  j | 7_ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j	 |  |  j
 |  _ d S)NrQ   r(   z</r)   T)r   r;   r	   r   rcdataEndTagNameStater   r*   r<   r   rB   rn   )r    r)   r%   r%   r&   r     s    "z#HTMLTokenizer.rcdataEndTagOpenStatec             C   s  |  j  o( |  j  d j   |  j j   k } |  j j   } | t k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j |  _ n| d k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j	 |  _ n | d k r+| r+i t d d 6|  j d 6g  d 6d d 6|  _  |  j
   |  j |  _ nc | t k rI|  j | 7_ nE |  j j i t d	 d 6d
 |  j d 6 |  j j |  |  j |  _ d S)Nra   rb   r(   r)   Frc   ru   rx   rQ   z</T)r   lowerr   r   r;   r   r   r~   r   r   rf   r   r	   r*   r<   rB   rn   )r    appropriater)   r%   r%   r&   r     s2    +



z#HTMLTokenizer.rcdataEndTagNameStatec             C   su   |  j  j   } | d k r3 d |  _ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j |  |  j	 |  _ d S)Nru   r1   rQ   r(   rK   r)   T)
r   r;   r   rawtextEndTagOpenStater   r*   r<   r   rB   rp   )r    r)   r%   r%   r&   ro     s    	"z&HTMLTokenizer.rawtextLessThanSignStatec             C   s{   |  j  j   } | t k r9 |  j | 7_ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j	 |  |  j
 |  _ d S)NrQ   r(   z</r)   T)r   r;   r	   r   rawtextEndTagNameStater   r*   r<   r   rB   rp   )r    r)   r%   r%   r&   r      s    "z$HTMLTokenizer.rawtextEndTagOpenStatec             C   s  |  j  o( |  j  d j   |  j j   k } |  j j   } | t k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j |  _ n| d k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j	 |  _ n | d k r+| r+i t d d 6|  j d 6g  d 6d d 6|  _  |  j
   |  j |  _ nc | t k rI|  j | 7_ nE |  j j i t d	 d 6d
 |  j d 6 |  j j |  |  j |  _ d S)Nra   rb   r(   r)   Frc   ru   rx   rQ   z</T)r   r   r   r   r;   r   r   r~   r   r   rf   r   r	   r*   r<   rB   rp   )r    r   r)   r%   r%   r&   r     s2    +



z$HTMLTokenizer.rawtextEndTagNameStatec             C   s   |  j  j   } | d k r3 d |  _ |  j |  _ n{ | d k rp |  j j i t d d 6d d 6 |  j |  _ n> |  j j i t d d 6d d 6 |  j  j	 |  |  j
 |  _ d	 S)
Nru   r1   rt   rQ   r(   z<!r)   rK   T)r   r;   r   scriptDataEndTagOpenStater   r*   r<   r   scriptDataEscapeStartStaterB   rr   )r    r)   r%   r%   r&   rq   '  s    	""z)HTMLTokenizer.scriptDataLessThanSignStatec             C   s{   |  j  j   } | t k r9 |  j | 7_ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j	 |  |  j
 |  _ d S)NrQ   r(   z</r)   T)r   r;   r	   r   scriptDataEndTagNameStater   r*   r<   r   rB   rr   )r    r)   r%   r%   r&   r   5  s    "z'HTMLTokenizer.scriptDataEndTagOpenStatec             C   s  |  j  o( |  j  d j   |  j j   k } |  j j   } | t k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j |  _ n| d k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j	 |  _ n | d k r+| r+i t d d 6|  j d 6g  d 6d d 6|  _  |  j
   |  j |  _ nc | t k rI|  j | 7_ nE |  j j i t d	 d 6d
 |  j d 6 |  j j |  |  j |  _ d S)Nra   rb   r(   r)   Frc   ru   rx   rQ   z</T)r   r   r   r   r;   r   r   r~   r   r   rf   r   r	   r*   r<   rB   rr   )r    r   r)   r%   r%   r&   r   @  s2    +



z'HTMLTokenizer.scriptDataEndTagNameStatec             C   sl   |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n |  j  j |  |  j |  _ d S)N-rQ   r(   r)   T)	r   r;   r*   r<   r   scriptDataEscapeStartDashStater   rB   rr   )r    r)   r%   r%   r&   r   \  s    "z(HTMLTokenizer.scriptDataEscapeStartStatec             C   sl   |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n |  j  j |  |  j |  _ d S)Nr   rQ   r(   r)   T)	r   r;   r*   r<   r   scriptDataEscapedDashDashStater   rB   rr   )r    r)   r%   r%   r&   r   f  s    "z,HTMLTokenizer.scriptDataEscapeStartDashStatec             C   s  |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n | d k rg |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j j i t d d 6d	 d 6 nS | t k r |  j	 |  _ n8 |  j  j
 d  } |  j j i t d d 6| | d 6 d
 S)Nr   rQ   r(   r)   rK   rg   r'   zinvalid-codepointu   �T)rK   r   rg   )r   r;   r*   r<   r   scriptDataEscapedDashStater   "scriptDataEscapedLessThanSignStater   r   rj   )r    r)   rk   r%   r%   r&   scriptDataEscapedStatep  s"    "z$HTMLTokenizer.scriptDataEscapedStatec             C   s  |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n | d k rg |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j j i t d d 6d	 d 6 |  j |  _ nI | t	 k r |  j
 |  _ n. |  j j i t d d 6| d 6 |  j |  _ d
 S)Nr   rQ   r(   r)   rK   rg   r'   zinvalid-codepointu   �T)r   r;   r*   r<   r   r   r   r   r   r   r   )r    r)   r%   r%   r&   r     s"    ""z(HTMLTokenizer.scriptDataEscapedDashStatec             C   sD  |  j  j   } | d k r@ |  j j i t d d 6d d 6 n | d k r[ |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j |  _ n | d k r |  j j i t d d 6d	 d 6 |  j j i t d d 6d
 d 6 |  j |  _ nI | t	 k r|  j
 |  _ n. |  j j i t d d 6| d 6 |  j |  _ d S)Nr   rQ   r(   r)   rK   rx   rg   r'   zinvalid-codepointu   �T)r   r;   r*   r<   r   r   r   rr   r   r   r   )r    r)   r%   r%   r&   r     s&    %""z,HTMLTokenizer.scriptDataEscapedDashDashStatec             C   s   |  j  j   } | d k r3 d |  _ |  j |  _ n | t k r} |  j j i t d d 6d | d 6 | |  _ |  j	 |  _ n> |  j j i t d d 6d d 6 |  j  j
 |  |  j |  _ d S)Nru   r1   rQ   r(   rK   r)   T)r   r;   r    scriptDataEscapedEndTagOpenStater   r	   r*   r<   r    scriptDataDoubleEscapeStartStaterB   r   )r    r)   r%   r%   r&   r     s    	&	"z0HTMLTokenizer.scriptDataEscapedLessThanSignStatec             C   su   |  j  j   } | t k r3 | |  _ |  j |  _ n> |  j j i t d d 6d d 6 |  j  j	 |  |  j
 |  _ d S)NrQ   r(   z</r)   T)r   r;   r	   r    scriptDataEscapedEndTagNameStater   r*   r<   r   rB   r   )r    r)   r%   r%   r&   r     s    	"z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatec             C   s  |  j  o( |  j  d j   |  j j   k } |  j j   } | t k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j |  _ n| d k r | r i t d d 6|  j d 6g  d 6d d 6|  _  |  j	 |  _ n | d k r+| r+i t d d 6|  j d 6g  d 6d d 6|  _  |  j
   |  j |  _ nc | t k rI|  j | 7_ nE |  j j i t d	 d 6d
 |  j d 6 |  j j |  |  j |  _ d S)Nra   rb   r(   r)   Frc   ru   rx   rQ   z</T)r   r   r   r   r;   r   r   r~   r   r   rf   r   r	   r*   r<   rB   r   )r    r   r)   r%   r%   r&   r     s2    +



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatec             C   s   |  j  j   } | t t d  Bk rz |  j j i t d d 6| d 6 |  j j   d k rk |  j	 |  _
 q |  j |  _
 n\ | t k r |  j j i t d d 6| d 6 |  j | 7_ n |  j  j |  |  j |  _
 d S)	Nru   rx   rQ   r(   r)   scriptT)ru   rx   )r   r;   r   r?   r*   r<   r   r   r   scriptDataDoubleEscapedStater   r   r	   rB   )r    r)   r%   r%   r&   r     s    ""z.HTMLTokenizer.scriptDataDoubleEscapeStartStatec             C   s?  |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j j i t d d 6d	 d 6 n_ | t k r|  j j i t d d 6d
 d 6 |  j	 |  _ n" |  j j i t d d 6| d 6 d S)Nr   rQ   r(   r)   rK   rg   r'   zinvalid-codepointu   �zeof-in-script-in-scriptT)
r   r;   r*   r<   r    scriptDataDoubleEscapedDashStater   (scriptDataDoubleEscapedLessThanSignStater   r   )r    r)   r%   r%   r&   r     s$    """z*HTMLTokenizer.scriptDataDoubleEscapedStatec             C   sW  |  j  j   } | d k rL |  j j i t d d 6d d 6 |  j |  _ n| d k r |  j j i t d d 6d d 6 |  j |  _ n | d k r |  j j i t d d 6d d 6 |  j j i t d d 6d	 d 6 |  j |  _ nk | t	 k r%|  j j i t d d 6d
 d 6 |  j
 |  _ n. |  j j i t d d 6| d 6 |  j |  _ d S)Nr   rQ   r(   r)   rK   rg   r'   zinvalid-codepointu   �zeof-in-script-in-scriptT)r   r;   r*   r<   r   $scriptDataDoubleEscapedDashDashStater   r   r   r   r   )r    r)   r%   r%   r&   r   	  s(    """z.HTMLTokenizer.scriptDataDoubleEscapedDashStatec             C   s  |  j  j   } | d k r@ |  j j i t d d 6d d 6 nD| d k r} |  j j i t d d 6d d 6 |  j |  _ n| d k r |  j j i t d d 6d d 6 |  j |  _ n | d k r|  j j i t d d 6d	 d 6 |  j j i t d d 6d
 d 6 |  j |  _ nk | t	 k rV|  j j i t d d 6d d 6 |  j
 |  _ n. |  j j i t d d 6| d 6 |  j |  _ d S)Nr   rQ   r(   r)   rK   rx   rg   r'   zinvalid-codepointu   �zeof-in-script-in-scriptT)r   r;   r*   r<   r   r   r   rr   r   r   r   )r    r)   r%   r%   r&   r      s,    %"""z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatec             C   su   |  j  j   } | d k rU |  j j i t d d 6d d 6 d |  _ |  j |  _ n |  j  j |  |  j	 |  _ d S)Nru   rQ   r(   r)   r1   T)
r   r;   r*   r<   r   r   scriptDataDoubleEscapeEndStater   rB   r   )r    r)   r%   r%   r&   r   9  s    "	z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatec             C   s   |  j  j   } | t t d  Bk rz |  j j i t d d 6| d 6 |  j j   d k rk |  j	 |  _
 q |  j |  _
 n\ | t k r |  j j i t d d 6| d 6 |  j | 7_ n |  j  j |  |  j |  _
 d S)	Nru   rx   rQ   r(   r)   r   T)ru   rx   )r   r;   r   r?   r*   r<   r   r   r   r   r   r   r	   rB   )r    r)   r%   r%   r&   r   D  s    ""z,HTMLTokenizer.scriptDataDoubleEscapeEndStatec             C   s  |  j  j   } | t k r1 |  j  j t d  nz| t k rf |  j d j | d g  |  j |  _ nE| d k r |  j	   n,| d k r |  j
 |  _ n| d k r |  j j i t d
 d 6d d 6 |  j d j | d g  |  j |  _ n | d k rH|  j j i t d
 d 6d d 6 |  j d j d d g  |  j |  _ nc | t k r|  j j i t d
 d 6d d 6 |  j |  _ n& |  j d j | d g  |  j |  _ d S)NTr)   r1   rx   ru   '"rO   rK   r'   r(   z#invalid-character-in-attribute-namerg   zinvalid-codepointu   �z#expected-attribute-name-but-got-eof)r   r   rO   rK   )r   r;   r   rj   r	   r   r<   attributeNameStater   rf   r   r*   r   r   r   )r    r)   r%   r%   r&   r~   T  s6    z&HTMLTokenizer.beforeAttributeNameStatec             C   s  |  j  j   } d } d } | d k r6 |  j |  _ n| t k rw |  j d d d | |  j  j t d  7<d } nG| d k r d } n2| t k r |  j |  _ n| d k r |  j	 |  _ n | d	 k r|  j
 j i t d
 d 6d d 6 |  j d d d d 7<d } n | d k rb|  j
 j i t d
 d 6d d 6 |  j d d d | 7<d } n\ | t k r|  j
 j i t d
 d 6d d 6 |  j |  _ n |  j d d d | 7<d } | r||  j r|  j d d d j t  |  j d d d <n  xf |  j d d  d  D]M \ } } |  j d d d | k r|  j
 j i t d
 d 6d d 6 PqqW| r||  j   q|n  d S)NTFrO   r)   r   r   rx   ru   rg   r'   r(   zinvalid-codepointu   �r   r   rK   z#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerR   rR   )r   r   rK   rR   rR   rR   rR   rR   rR   )r   r;   beforeAttributeValueStater   r	   r   rj   r   afterAttributeNameStater   r*   r<   r   r   r   r   rd   r
   rf   )r    r)   leavingThisState	emitTokenra   valuer%   r%   r&   r   r  sT    					0$z HTMLTokenizer.attributeNameStatec             C   s  |  j  j   } | t k r1 |  j  j t d  n| d k rL |  j |  _ nz| d k re |  j   na| t k r |  j d j	 | d g  |  j
 |  _ n,| d k r |  j |  _ n| d k r|  j j	 i t d d	 6d
 d 6 |  j d j	 d d g  |  j
 |  _ n | d k rc|  j j	 i t d d	 6d d 6 |  j d j	 | d g  |  j
 |  _ nc | t k r|  j j	 i t d d	 6d d 6 |  j |  _ n& |  j d j	 | d g  |  j
 |  _ d S)NTrO   rx   r)   r1   ru   rg   r'   r(   zinvalid-codepointu   �r   r   rK   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   r   rK   )r   r;   r   rj   r   r   rf   r	   r   r<   r   r   r*   r   r   r   )r    r)   r%   r%   r&   r     s:    z%HTMLTokenizer.afterAttributeNameStatec             C   s  |  j  j   } | t k r1 |  j  j t d  n| d k rL |  j |  _ n| d k rw |  j |  _ |  j  j |  nd| d k r |  j |  _ nI| d k r |  j	 j
 i t d d 6d d	 6 |  j   n| d
 k r#|  j	 j
 i t d d 6d d	 6 |  j d	 d d d 7<|  j |  _ n | d k ry|  j	 j
 i t d d 6d d	 6 |  j d	 d d | 7<|  j |  _ nb | t k r|  j	 j
 i t d d 6d d	 6 |  j |  _ n% |  j d	 d d | 7<|  j |  _ d S)NTr   rJ   r   rx   r'   r(   z.expected-attribute-value-but-got-right-bracketr)   rg   zinvalid-codepointr   u   �rO   rK   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrR   )rO   rK   r   rR   rR   )r   r;   r   rj   attributeValueDoubleQuotedStater   attributeValueUnQuotedStaterB   attributeValueSingleQuotedStater*   r<   r   rf   r   r   r   )r    r)   r%   r%   r&   r     s>    z'HTMLTokenizer.beforeAttributeValueStatec             C   s   |  j  j   } | d k r* |  j |  _ n | d k rF |  j d  n | d k r |  j j i t d d 6d d 6 |  j d d d d	 7<nf | t	 k r |  j j i t d d 6d
 d 6 |  j
 |  _ n) |  j d d d | |  j  j d  7<d S)Nr   rJ   rg   r'   r(   zinvalid-codepointr)   r   u   �z#eof-in-attribute-value-double-quoteTrR   rR   )r   rJ   rg   )r   r;   afterAttributeValueStater   r`   r*   r<   r   r   r   r   rj   )r    r)   r%   r%   r&   r     s     z-HTMLTokenizer.attributeValueDoubleQuotedStatec             C   s   |  j  j   } | d k r* |  j |  _ n | d k rF |  j d  n | d k r |  j j i t d d 6d d 6 |  j d d d d	 7<nf | t	 k r |  j j i t d d 6d
 d 6 |  j
 |  _ n) |  j d d d | |  j  j d  7<d S)Nr   rJ   rg   r'   r(   zinvalid-codepointr)   r   u   �z#eof-in-attribute-value-single-quoteTrR   rR   )r   rJ   rg   )r   r;   r   r   r`   r*   r<   r   r   r   r   rj   )r    r)   r%   r%   r&   r     s     z-HTMLTokenizer.attributeValueSingleQuotedStatec             C   sg  |  j  j   } | t k r* |  j |  _ n9| d k rF |  j d  n| d k r_ |  j   n| d k r |  j j i t	 d d	 6d
 d 6 |  j
 d d d | 7<n | d k r |  j j i t	 d d	 6d d 6 |  j
 d d d d 7<np | t k r0|  j j i t	 d d	 6d d 6 |  j |  _ n3 |  j
 d d d | |  j  j t d  t B 7<d S)NrJ   rx   r   r   rO   rK   r   r'   r(   z0unexpected-character-in-unquoted-attribute-valuer)   r   rg   zinvalid-codepointu   �z eof-in-attribute-value-no-quotesT)r   r   rO   rK   r   rR   rR   rR   )rJ   rx   r   r   rO   rK   r   rg   )r   r;   r   r~   r   r`   rf   r*   r<   r   r   r   r   rj   r?   )r    r)   r%   r%   r&   r     s,    z)HTMLTokenizer.attributeValueUnQuotedStatec             C   s   |  j  j   } | t k r* |  j |  _ n | d k rC |  j   n | d k r^ |  j |  _ n | t k r |  j j	 i t
 d d 6d d 6 |  j  j |  |  j |  _ n> |  j j	 i t
 d d 6d d 6 |  j  j |  |  j |  _ d S)	Nrx   ru   r'   r(   z$unexpected-EOF-after-attribute-valuer)   z*unexpected-character-after-attribute-valueT)r   r;   r   r~   r   rf   r   r   r*   r<   r   rB   r   )r    r)   r%   r%   r&   r   *  s"    z&HTMLTokenizer.afterAttributeValueStatec             C   s   |  j  j   } | d k r5 d |  j d <|  j   n | t k r |  j j i t d d 6d d 6 |  j  j |  |  j	 |  _
 n> |  j j i t d d 6d d 6 |  j  j |  |  j |  _
 d S)	Nrx   Trc   r'   r(   z#unexpected-EOF-after-solidus-in-tagr)   z)unexpected-character-after-solidus-in-tag)r   r;   r   rf   r   r*   r<   r   rB   r   r   r~   )r    r)   r%   r%   r&   r   >  s    z&HTMLTokenizer.selfClosingStartTagStatec             C   sc   |  j  j d  } | j d d  } |  j j i t d d 6| d 6 |  j  j   |  j |  _ d S)Nrx   rg   u   �Commentr(   r)   T)	r   rj   replacer*   r<   r   r;   r   r   )r    r)   r%   r%   r&   r}   P  s    	zHTMLTokenizer.bogusCommentStatec             C   s0  |  j  j   g } | d  d k rv | j |  j  j    | d! d k ri t d d 6d d 6|  _ |  j |  _ d Sne| d" d# k rd } x> d* D]6 } | j |  j  j    | d+ | k r d } Pq q W| ri t d d 6d d 6d  d 6d  d 6d d 6|  _ |  j |  _ d Sn | d, d k r|  j d  k	 r|  j j	 j
 r|  j j	 j
 d- j |  j j	 j k rd } xP d d	 d d d d g D]6 } | j |  j  j    | d. | k rd } PqqW| r|  j |  _ d Sn  |  j j i t d d 6d d 6 x  | r|  j  j | j    q W|  j |  _ d S)/Nr   r   r   r(   r1   r)   TdDoOrG   CtTyYpPeEFDoctypera   publicIdsystemIdcorrect[Ar'   zexpected-dashes-or-doctyperR   rR   rR   )r   r   r   r   rG   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   rR   rR   rR   rR   )r   r;   r<   r   r   commentStartStater   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater*   rB   r,   r}   )r    rF   matchedexpectedr%   r%   r&   rz   _  sR    
%	z(HTMLTokenizer.markupDeclarationOpenStatec             C   s-  |  j  j   } | d k r* |  j |  _ n | d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d k r |  j j i t d d 6d	 d 6 |  j j |  j  |  j |  _ nm | t	 k r|  j j i t d d 6d
 d 6 |  j j |  j  |  j |  _ n |  j d | 7<|  j
 |  _ d S)Nr   rg   r'   r(   zinvalid-codepointr)   u   �rx   zincorrect-commentzeof-in-commentT)r   r;   commentStartDashStater   r*   r<   r   r   r   r   commentState)r    r)   r%   r%   r&   r     s(    zHTMLTokenizer.commentStartStatec             C   s1  |  j  j   } | d k r* |  j |  _ n| d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d k r |  j j i t d d 6d	 d 6 |  j j |  j  |  j |  _ nq | t	 k r|  j j i t d d 6d
 d 6 |  j j |  j  |  j |  _ n! |  j d d | 7<|  j
 |  _ d S)Nr   rg   r'   r(   zinvalid-codepointr)   u   -�rx   zincorrect-commentzeof-in-commentT)r   r;   commentEndStater   r*   r<   r   r   r   r   r   )r    r)   r%   r%   r&   r     s(    z#HTMLTokenizer.commentStartDashStatec             C   s   |  j  j   } | d k r* |  j |  _ n | d k rl |  j j i t d d 6d d 6 |  j d d 7<nq | t k r |  j j i t d d 6d d 6 |  j j |  j  |  j	 |  _ n! |  j d | |  j  j
 d
  7<d	 S)Nr   rg   r'   r(   zinvalid-codepointr)   u   �zeof-in-commentT)r   rg   )r   r;   commentEndDashStater   r*   r<   r   r   r   r   rj   )r    r)   r%   r%   r&   r     s    zHTMLTokenizer.commentStatec             C   s   |  j  j   } | d k r* |  j |  _ n | d k rx |  j j i t d d 6d d 6 |  j d d 7<|  j |  _ nq | t	 k r |  j j i t d d 6d d 6 |  j j |  j  |  j
 |  _ n! |  j d d | 7<|  j |  _ d	 S)
Nr   rg   r'   r(   zinvalid-codepointr)   u   -�zeof-in-comment-end-dashT)r   r;   r   r   r*   r<   r   r   r   r   r   )r    r)   r%   r%   r&   r     s     z!HTMLTokenizer.commentEndDashStatec             C   s  |  j  j   } | d k r= |  j j |  j  |  j |  _ n`| d k r |  j j i t d d 6d d 6 |  j d d 7<|  j |  _ n| d k r |  j j i t d d 6d	 d 6 |  j	 |  _ n | d
 k r
|  j j i t d d 6d d 6 |  j d | 7<n | t
 k rZ|  j j i t d d 6d d 6 |  j j |  j  |  j |  _ nC |  j j i t d d 6d d 6 |  j d d | 7<|  j |  _ d S)Nrx   rg   r'   r(   zinvalid-codepointr)   u   --�rt   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   r;   r*   r<   r   r   r   r   r   commentEndBangStater   )r    r)   r%   r%   r&   r     s6    zHTMLTokenizer.commentEndStatec             C   s,  |  j  j   } | d k r= |  j j |  j  |  j |  _ n | d k ri |  j d d 7<|  j |  _ n | d k r |  j j i t d d 6d d 6 |  j d d	 7<|  j	 |  _ nq | t
 k r|  j j i t d d 6d
 d 6 |  j j |  j  |  j |  _ n! |  j d d | 7<|  j	 |  _ d S)Nrx   r   r)   z--!rg   r'   r(   zinvalid-codepointu   --!�zeof-in-comment-end-bang-stateT)r   r;   r*   r<   r   r   r   r   r   r   r   )r    r)   r%   r%   r&   r     s(    z!HTMLTokenizer.commentEndBangStatec             C   s   |  j  j   } | t k r* |  j |  _ n | t k r |  j j i t d d 6d d 6 d |  j	 d <|  j j |  j	  |  j
 |  _ n> |  j j i t d d 6d d 6 |  j  j |  |  j |  _ d S)	Nr'   r(   z!expected-doctype-name-but-got-eofr)   Fr   zneed-space-after-doctypeT)r   r;   r   beforeDoctypeNameStater   r   r*   r<   r   r   r   rB   )r    r)   r%   r%   r&   r     s    zHTMLTokenizer.doctypeStatec             C   s?  |  j  j   } | t k r n| d k r{ |  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n | d k r |  j j i t d d 6d	 d 6 d
 |  j d <|  j	 |  _ nv | t
 k r"|  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n | |  j d <|  j	 |  _ d S)Nrx   r'   r(   z+expected-doctype-name-but-got-right-bracketr)   Fr   rg   zinvalid-codepointu   �ra   z!expected-doctype-name-but-got-eofT)r   r;   r   r*   r<   r   r   r   r   doctypeNameStater   )r    r)   r%   r%   r&   r   &  s.    z$HTMLTokenizer.beforeDoctypeNameStatec             C   so  |  j  j   } | t k rG |  j d j t  |  j d <|  j |  _ n$| d k r |  j d j t  |  j d <|  j j	 |  j  |  j
 |  _ n | d k r |  j j	 i t d d 6d d 6 |  j d d 7<|  j |  _ n | t k rZ|  j j	 i t d d 6d	 d 6 d
 |  j d <|  j d j t  |  j d <|  j j	 |  j  |  j
 |  _ n |  j d | 7<d S)Nra   rx   rg   r'   r(   zinvalid-codepointr)   u   �zeof-in-doctype-nameFr   T)r   r;   r   r   rd   r
   afterDoctypeNameStater   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r   @  s,    zHTMLTokenizer.doctypeNameStatec             C   s  |  j  j   } | t k r n| d k rL |  j j |  j  |  j |  _ n| t k r d |  j d <|  j  j	 |  |  j j i t
 d d 6d d 6 |  j j |  j  |  j |  _ n| d! k rd
 } x3 d' D]+ } |  j  j   } | | k r d } Pq q W| r{|  j |  _ d
 Sna | d( k r{d
 } x3 d. D]+ } |  j  j   } | | k r3d } Pq3q3W| r{|  j |  _ d
 Sn  |  j  j	 |  |  j j i t
 d d 6d d 6i | d 6d  6 d |  j d <|  j |  _ d
 S)/Nrx   Fr   r'   r(   zeof-in-doctyper)   r   r   TuUbBlLiIrG   r   sSr   r   r   r   r   r   mMz*expected-space-or-right-bracket-in-doctyper3   )r   r   r   r   r   r   r   r   r   r   rG   r   )r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   )r   r;   r   r*   r<   r   r   r   r   rB   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r    r)   r   r   r%   r%   r&   r   Y  sT    

z#HTMLTokenizer.afterDoctypeNameStatec             C   s   |  j  j   } | t k r* |  j |  _ n | d k rw |  j j i t d d 6d d 6 |  j  j |  |  j |  _ ny | t	 k r |  j j i t d d 6d d 6 d |  j
 d	 <|  j j |  j
  |  j |  _ n |  j  j |  |  j |  _ d
 S)Nr   r   r'   r(   zunexpected-char-in-doctyper)   zeof-in-doctypeFr   T)r   r   )r   r;   r   "beforeDoctypePublicIdentifierStater   r*   r<   r   rB   r   r   r   )r    r)   r%   r%   r&   r     s"    z,HTMLTokenizer.afterDoctypePublicKeywordStatec             C   sg  |  j  j   } | t k r nE| d k rF d |  j d <|  j |  _ n| d k rn d |  j d <|  j |  _ n | d k r |  j j i t	 d d 6d d	 6 d
 |  j d <|  j j |  j  |  j
 |  _ n | t k r(|  j j i t	 d d 6d d	 6 d
 |  j d <|  j j |  j  |  j
 |  _ n; |  j j i t	 d d 6d d	 6 d
 |  j d <|  j |  _ d S)Nr   r1   r   r   rx   r'   r(   zunexpected-end-of-doctyper)   Fr   zeof-in-doctypezunexpected-char-in-doctypeT)r   r;   r   r   (doctypePublicIdentifierDoubleQuotedStater   (doctypePublicIdentifierSingleQuotedStater*   r<   r   r   r   r   )r    r)   r%   r%   r&   r     s4    z0HTMLTokenizer.beforeDoctypePublicIdentifierStatec             C   s;  |  j  j   } | d k r* |  j |  _ n| d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d	 k r |  j j i t d d 6d
 d 6 d |  j d <|  j j |  j  |  j |  _ nn | t	 k r&|  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n |  j d | 7<d S)Nr   rg   r'   r(   zinvalid-codepointr)   r   u   �rx   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r;   !afterDoctypePublicIdentifierStater   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r     s*    z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatec             C   s;  |  j  j   } | d k r* |  j |  _ n| d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d	 k r |  j j i t d d 6d
 d 6 d |  j d <|  j j |  j  |  j |  _ nn | t	 k r&|  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n |  j d | 7<d S)Nr   rg   r'   r(   zinvalid-codepointr)   r   u   �rx   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r;   r   r   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r     s*    z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatec             C   s  |  j  j   } | t k r* |  j |  _ nZ| d k rX |  j j |  j  |  j |  _ n,| d k r |  j j i t	 d d 6d d 6 d |  j d <|  j
 |  _ n | d	 k r |  j j i t	 d d 6d d 6 d |  j d <|  j |  _ n | t k rI|  j j i t	 d d 6d
 d 6 d |  j d <|  j j |  j  |  j |  _ n; |  j j i t	 d d 6d d 6 d |  j d <|  j |  _ d S)Nrx   r   r'   r(   zunexpected-char-in-doctyper)   r1   r   r   zeof-in-doctypeFr   T)r   r;   r   -betweenDoctypePublicAndSystemIdentifiersStater   r*   r<   r   r   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   r   )r    r)   r%   r%   r&   r     s6    z/HTMLTokenizer.afterDoctypePublicIdentifierStatec             C   s8  |  j  j   } | t k r n| d k rL |  j j |  j  |  j |  _ n | d k rt d |  j d <|  j |  _ n | d k r d |  j d <|  j	 |  _ n | t
 k r |  j j i t d d 6d d	 6 d
 |  j d <|  j j |  j  |  j |  _ n; |  j j i t d d 6d d	 6 d
 |  j d <|  j |  _ d S)Nrx   r   r1   r   r   r'   r(   zeof-in-doctyper)   Fr   zunexpected-char-in-doctypeT)r   r;   r   r*   r<   r   r   r   r   r   r   r   r   )r    r)   r%   r%   r&   r     s.    z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatec             C   s   |  j  j   } | t k r* |  j |  _ n | d k rw |  j j i t d d 6d d 6 |  j  j |  |  j |  _ ny | t	 k r |  j j i t d d 6d d 6 d |  j
 d	 <|  j j |  j
  |  j |  _ n |  j  j |  |  j |  _ d
 S)Nr   r   r'   r(   zunexpected-char-in-doctyper)   zeof-in-doctypeFr   T)r   r   )r   r;   r   "beforeDoctypeSystemIdentifierStater   r*   r<   r   rB   r   r   r   )r    r)   r%   r%   r&   r   %  s"    z,HTMLTokenizer.afterDoctypeSystemKeywordStatec             C   sg  |  j  j   } | t k r nE| d k rF d |  j d <|  j |  _ n| d k rn d |  j d <|  j |  _ n | d k r |  j j i t	 d d 6d d	 6 d
 |  j d <|  j j |  j  |  j
 |  _ n | t k r(|  j j i t	 d d 6d d	 6 d
 |  j d <|  j j |  j  |  j
 |  _ n; |  j j i t	 d d 6d d	 6 d
 |  j d <|  j |  _ d S)Nr   r1   r   r   rx   r'   r(   zunexpected-char-in-doctyper)   Fr   zeof-in-doctypeT)r   r;   r   r   r   r   r   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r   9  s4    z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatec             C   s;  |  j  j   } | d k r* |  j |  _ n| d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d	 k r |  j j i t d d 6d
 d 6 d |  j d <|  j j |  j  |  j |  _ nn | t	 k r&|  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n |  j d | 7<d S)Nr   rg   r'   r(   zinvalid-codepointr)   r   u   �rx   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r;   !afterDoctypeSystemIdentifierStater   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r   V  s*    z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatec             C   s;  |  j  j   } | d k r* |  j |  _ n| d k rl |  j j i t d d 6d d 6 |  j d d 7<n | d	 k r |  j j i t d d 6d
 d 6 d |  j d <|  j j |  j  |  j |  _ nn | t	 k r&|  j j i t d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n |  j d | 7<d S)Nr   rg   r'   r(   zinvalid-codepointr)   r   u   �rx   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r;   r   r   r*   r<   r   r   r   r   )r    r)   r%   r%   r&   r   n  s*    z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatec             C   s   |  j  j   } | t k r n | d k rL |  j j |  j  |  j |  _ n | t k r |  j j i t	 d d 6d d 6 d |  j d <|  j j |  j  |  j |  _ n. |  j j i t	 d d 6d d 6 |  j
 |  _ d	 S)
Nrx   r'   r(   zeof-in-doctyper)   Fr   zunexpected-char-in-doctypeT)r   r;   r   r*   r<   r   r   r   r   r   r   )r    r)   r%   r%   r&   r     s     z/HTMLTokenizer.afterDoctypeSystemIdentifierStatec             C   s   |  j  j   } | d k r= |  j j |  j  |  j |  _ n> | t k r{ |  j  j |  |  j j |  j  |  j |  _ n  d S)Nrx   T)	r   r;   r*   r<   r   r   r   r   rB   )r    r)   r%   r%   r&   r     s    zHTMLTokenizer.bogusDoctypeStatec             C   sf  g  } x | j  |  j j d   | j  |  j j d   |  j j   } | t k rZ Pq	 | d k sl t  | d d d   d k r | d d  d  | d <Pq	 | j  |  q	 Wd j |  } | j d  } | d k r+x6 t |  D]( } |  j	 j  i t
 d	 d
 6d d 6 q W| j d d  } n  | rV|  j	 j  i t
 d d
 6| d 6 n  |  j |  _ d S)N]rx   r      z]]r1   rg   r   r'   r(   zinvalid-codepointr)   u   �rQ   TrR   rR   r   rR   )r<   r   rj   r;   r   AssertionErrorr>   countranger*   r   r   r   r   )r    r)   r;   	nullCountr   r%   r%   r&   r     s0    zHTMLTokenizer.cdataSectionState)M__name__
__module____qualname____doc__r   r.   rI   r_   r`   rf   r   rh   rn   rl   rp   rr   rs   ri   r{   r|   rm   r   r   ro   r   r   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r}   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r%   r%   )r$   r&   r      s   	HP#

7 "-3r   N)
__future__r   r   r   unichrr@   	NameErrorcollectionsr   	constantsr   r   r	   r
   r   r   r   r   r   r   inputstreamr   trier   rS   objectr   r%   r%   r%   r&   <module>   s   
