
^VE                 @   s   d  d l  m Z m Z m Z d  d l Z d  d l m Z m Z d  d l m	 Z
 d d l m Z d d l m Z e j d e j  Z Gd	 d
   d
 e  Z Gd d   d e e  Z d S)    )absolute_importdivisionunicode_literalsN)escapeunescape)urllib_parse   )HTMLTokenizer)
tokenTypesaB  
                               ^
                               # Match a content type <application>/<type>
                               (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
                               # Match any character set and encoding
                               (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
                                 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
                               # Assume the rest is data
                               ,.*
                               $
                               c               @   sr  e  Z d  Z d Z d d d d d d d d	 d
 d d d d d d d d d d d d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; d< d= d> d? d@ dA dB dC dD dE dF dG dH dI dJ dK dL dM dN dO dP dQ dR dS dT dU dV dW dX dY dZ d[ d\ d] d^ d_ d` da db dc dd gc Z de df dg dh di dj dk dl dm dn do dp dq dr ds dt du dv dw dx dy dz d{ d| d} d~ d g Z d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d g# Z d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d) d d d d d d d d d d d d d d d d d8 d d d d d d d d d d d d d d d d d d d d d d d d d d d d d ddddddddd	d
dddddddR dddddddddd ddddddd d!d"d#d$d%d&d'g Z d(d d)d)d)d*d+d,d-d.d/d0d1d2d3d4d d d5d6d7d8d9d9d:d;d<d=d=d=d>d?dd@dAdBdCdDd%d%dEdFdGdHdIg- Z dJdKdLdMdNdOdPdQdRdSdTdUdVdWd dXd dYdZd[d\d]d^d_d`d.dad dbdcdddedfdgdhdidjdkdldmdndodpdqdrd dsdtd dudvdwdxdyd dzd{d|d}d~ddd d d dddddddd ddddddddddddddddddddddddddddddddddddddddddddddd%ddddddddEddFddGdd'ddHdIddddg Z	 d dd d d d d d d d d d dEdg Z
 dXdddbdd d|d{dzddg Z dd d d d dddd d d ddd d g Z ddddddddd d ddd.ddd( dedfdhdidjd dddddddddddddddddddddd"dd%g. Z dddddddddd dddddddddddd d ddd ddddddd	d
dddddg' Z dbdcdddddddg Z dddddddddddddddddd d!d"d#d$d g Z d%d&d'd(d)d*g Z e e e Z e e e	 Z e Z e Z e Z e Z e Z d+d,  Z d-d.  Z d/d0  Z d1d2  Z d3S(4  HTMLSanitizerMixinzA sanitization of XHTML+MathML+SVG and of inline style attributes.aZabbrZacronymaddressareaarticleasideaudiobbig
blockquotebrbuttonZcanvascaptioncenterZcitecodecolcolgroupcommanddatagridZdatalistdddeldetailsdfndialogdirdivdldtemzevent-sourcefieldset
figcaptionfigurefooterfontformheaderh1h2h3h4h5h6hriimginputZinskeygenZkbdlabelZlegendlimmapmenuZmeterZmulticolnavZnextidoloutputoptgroupoptionppreprogressqsZsampsectionselectsmallZsoundsourceZspacerspanstrikestrongsubsuptabletbodytdtextareatimetfootththeadtrttuulvarvideoZmactionmathZmerrorZmfracmiZmmultiscriptsmnmoZmoverZmpaddedZmphantomZmprescriptsZmrootZmrowZmspaceZmsqrtZmstyleZmsubZmsubsupZmsupZmtableZmtdmtextZmtrZmunderZ
munderovernoneZanimateanimateColoranimateMotionanimateTransformclipPathZcircleZdefsdescZellipsez	font-facezfont-face-namezfont-face-srcgZglyphZhkernlinearGradientlinemarkermetadatazmissing-glyphZmpathpathZpolygonZpolylineradialGradientZrectsetstopsvgswitchtexttitleZtspanZuseacceptzaccept-charsetZ	accesskeyactionZalignZaltautocomplete	autofocusZaxis
backgroundZbalanceZbgcolorZbgpropertiesborderZbordercolorZbordercolordarkZbordercolorlightZbottompaddingZcellpaddingZcellspacingch	challengecharZcharoffZchoffcharsetcheckedclassclearcolorcolsZcolspancompactZcontenteditablecontrolsZcoordsdataZdatafldZdatapagesizeZdatasrcdatetimedefaultdelaydisabledZ	draggableZdynsrcZenctypeendfaceforframeZ
galleryimgZgutterheadersheightZ	hidefocushiddenhighhrefZhreflangZhspaceZiconidZ	inputmodeismapZkeytypeZleftspacinglanglistZlongdescZloopZ	loopcountZloopendZ	loopstartlowZlowsrcmaxZ	maxlengthZmediamethodminmultiplenameZnohrefnoshadeZnowrapopenZoptimumpatternZpingz
point-sizeZposterZpqgZpreloadpromptZ
radiogroupreadonlyrelz
repeat-maxz
repeat-minreplacerequiredrevZrightspacingrowsZrowspanrulesZscopeselectedshapesizesrcstartstepstylesummarysuppressZtabindextargettemplateZ
toppaddingtypeZunselectableZusemapurnZvalignvaluevariablevolumeZvspaceZvrmlwidthwrapzxml:langZ
actiontypeZcolumnalignZcolumnlinesZcolumnspacingZ
columnspandepthZdisplayZdisplaystyleZequalcolumnsZ	equalrowsZfenceZ	fontstyleZ
fontweightZlinethicknessZlspaceZmathbackgroundZ	mathcolorZmathvariantmaxsizeZminsizeotherZrowalignZrowlinesZ
rowspacingZrspaceZscriptlevelZ	selection	separatorZstretchyz
xlink:hrefz
xlink:showz
xlink:typexmlnszxmlns:xlinkzaccent-height
accumulateZadditiveZ
alphabeticzarabic-formZascentattributeNameattributeTypebaseProfileZbboxbeginZbycalcModez
cap-heightz	clip-pathzcolor-renderingcontentZcxcydZdxZdyZdescentZdurfillzfill-opacityz	fill-rulezfont-familyz	font-sizezfont-stretchz
font-stylezfont-variantzfont-weightfromZfxZfyZg1Zg2z
glyph-namegradientUnitsZhangingzhoriz-adv-xzhoriz-origin-xideographick	keyPoints
keySplineskeyTimesz
marker-endz
marker-midzmarker-startmarkerHeightmarkerUnitsmarkerWidthZmathematicaloffsetZopacityZorientoriginzoverline-positionzoverline-thicknesszpanose-1
pathLengthZpointspreserveAspectRatiorrefXrefYrepeatCount	repeatDurrequiredExtensionsrequiredFeaturesZrestartrotaterxZryZslopeZstemhZstemvz
stop-colorzstop-opacityzstrikethrough-positionzstrikethrough-thicknessZstrokezstroke-dasharrayzstroke-dashoffsetzstroke-linecapzstroke-linejoinzstroke-miterlimitzstroke-opacityzstroke-widthsystemLanguageztext-anchortoZ	transformu1u2zunderline-positionzunderline-thicknessunicodezunicode-rangezunits-per-emvaluesversionviewBoxZ
visibilityZwidthsxzx-heightZx1Zx2zxlink:actuatezxlink:arcrolez
xlink:rolezxlink:titlezxml:basez	xml:spaceyy1y2
zoomAndPanzcolor-profilecursorfiltermaskaltGlyphfeImagetextpathZtrefZazimuthzbackground-colorzborder-bottom-colorzborder-collapsezborder-colorzborder-left-colorzborder-right-colorzborder-top-colorZ	directionZ	elevationfloatzletter-spacingzline-heightZoverflowpausezpause-afterzpause-beforeZpitchzpitch-rangeZrichnessZspeakzspeak-headerzspeak-numeralzspeak-punctuationzspeech-rateZstressz
text-alignztext-decorationztext-indentzunicode-bidizvertical-alignzvoice-familyzwhite-spaceZautoZaquaZblackblockZblueZboldZbothZbottomZbrownZcollapseZdashedZdottedZfuchsiaZgrayZgreenz
!importantZitalicleftZlimeZmaroonZmediumZnavyZnormalZolivepointerZpurpleZredrightZsolidZsilverZtealtopZtransparentZ	underlineZwhiteZyellowZed2kftphttphttpsZircmailtonewsgophernntptelnetZwebcalZxmppZcalltofeedZaimrsynctagsshsftprtspZafsz	image/pngz
image/jpegz	image/gifz
image/webpz	image/bmpz
text/plainc             C   s   | d } | t  t j    k r/ t | } n  | t d t d t d f k r | d |  j k rs |  j | |  S|  j | |  Sn | t d k r n | Sd  S)Nr   StartTagEndTagEmptyTagr   Comment)r   r
   keysallowed_elementsallowed_tokendisallowed_token)selftoken
token_type r  =/tmp/pip-build-9m6vxulb/pip/pip/_vendor/html5lib/sanitizer.pysanitize_token   s    
z!HTMLSanitizerMixin.sanitize_tokenc                s'  d | k r#t    f d d   | d d  d  d  D  } x  j D]} | | k r^ qF n  t j d d t | |   j   } | j d d  } y t j |  } Wn t k
 r d  } | | =Yn X| rF | j	 rF | j	   j
 k r | | =n  | j	 d k rNt j | j  } | s&| | =qK| j d    j k rK| | =qKqNqF qF WxC   j D]8 } | | k r\t j d	 d
 t | |   | | <q\q\W| d   j k rd | k rt j d | d  r| d =n  d | k r  j | d  | d <n  d d   t | j    D | d <n  | S)Nr   c                s1   g  |  ]' \ } } |   j  k r | | f  q Sr  )allowed_attributes).0r   val)r  r  r  
<listcomp>   s   	 	z4HTMLSanitizerMixin.allowed_token.<locals>.<listcomp>r   u   [` - - \s]+ u   �content_typezurl\s*\(\s*[^#\s][^)]+?\) r   z
xlink:hrefz^\s*[^#\s].*r   c             S   s"   g  |  ] \ } } | | g  q Sr  r  )r  r   r  r  r  r  r     s   	 )dictattr_val_is_urirerP   r   lowerr   urlparse
ValueErrorschemeallowed_protocolscontent_type_rgxmatchrp   groupallowed_content_typessvg_attr_val_allows_refsvg_allow_local_hrefsearchsanitize_cssr   items)r  r  r  attrsattrZval_unescapedurir<   r  )r  r  r     sH    

	
&z HTMLSanitizerMixin.allowed_tokenc             C   s   | t  d k r% d | d | d <nW | d rj d j d d   | d D  } d | d | f | d <n d	 | d | d <| j d
  r | d d  d  d | d <n  | d t t  j    k r d | d <n t  d | d <| d =| S)Nr  z</%s>r   r   r  c             S   s,   g  |  ]" \ } } d  | t  |  f  q S)z %s="%s")r   )r  r   vr  r  r  r     s   	 z7HTMLSanitizerMixin.disallowed_token.<locals>.<listcomp>z<%s%s>z<%s>selfClosingr   z/>r   
Charactersr!  )r
   joingetr   r  )r  r  r  r3  r  r  r  r     s    
 z#HTMLSanitizerMixin.disallowed_tokenc             C   sc  t  j d  j d |  } t  j d |  s1 d St  j d |  sG d Sg  } xt  j d |  D] \ } } | sx q` n  | j   |  j k r | j | d | d  q` | j d	  d
 j   d k r!x | j   D], } | |  j	 k r t  j d |  r Pq q W| j | d | d  q` | j   |  j
 k r` | j | d | d  q` q` Wd j |  S)Nzurl\s*\(\s*[^\s)]+?\s*\)\s*r   z@^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$r  z ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: ;-r   r|   r}   marginpaddingz\^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$)r|   r}   zmarginzpadding)r$  compilerP   r+  findallr%  allowed_css_propertiesappendsplitacceptable_css_keywordsallowed_svg_propertiesr9  )r  r   cleanpropr   keywordr  r  r  r1    s*    	 zHTMLSanitizerMixin.sanitize_cssN)__name__
__module____qualname____doc__Zacceptable_elementsZmathml_elementsZsvg_elementsZacceptable_attributesZmathml_attributesZsvg_attributesr#  r.  r/  Zacceptable_css_propertiesrD  Zacceptable_svg_propertiesZacceptable_protocolsZacceptable_content_typesr  r  rA  Zallowed_css_keywordsrE  r)  r-  r  r  r  r1  r  r  r  r  r      s  						)r   c               @   s:   e  Z d  Z d d d d d d d d  Z d d   Z d S)HTMLSanitizerNTFc          
   C   s)   t  j |  | | | | | | d | d  S)Nparser)r	   __init__)r  streamencoding	parseMeta
useChardetlowercaseElementNamelowercaseAttrNamerN  r  r  r  rO  !  s    zHTMLSanitizer.__init__c             c   s;   x4 t  j |   D]# } |  j |  } | r | Vq q Wd  S)N)r	   __iter__r  )r  r  r  r  r  rV  (  s    zHTMLSanitizer.__iter__)rI  rJ  rK  rO  rV  r  r  r  r  rM     s   	rM  )
__future__r   r   r   r$  xml.sax.saxutilsr   r   Z	six.movesr   r&  	tokenizerr	   	constantsr
   r?  VERBOSEr*  objectr   rM  r  r  r  r  <module>   s   
 