
    ]jEQ              
          d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ g dZ	ddgdgdgd	Z
g d
Zdj                   e ed d       edd       edd            D  cg c]
  }  e|        c}       Z ej                   dez   dz   ej"                        ZdZ G d d      Zd Z G d dej,                        Zyc c} w )    )chainN)unescape)html5lib_shim)
parse_shim)aabbracronymb
blockquotecodeemiliolstrongulhreftitle)r   r   r	   )httphttpsmailto 	                []?c                   ,    e Zd ZdZeeeddddfdZd Zy)Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc                 F   || _         || _        || _        || _        || _        |xs g | _        || _        t        j                  | j                   | j                  dd      | _	        t        j                  d      | _        t        j                  dddddd      | _        y)a<  Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealwaysT)quote_attr_valuesomit_optional_tagsescape_lt_in_attrsresolve_entitiessanitizealphabetical_attributesN)r$   
attributes	protocolsr%   strip_commentsfilterscss_sanitizerr   BleachHTMLParserparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer)selfr$   r0   r1   r%   r2   r3   r4   s           :/root/env/lib/python3.12/site-packages/bleach/sanitizer.py__init__zCleaner.__init__P   s    L 	$"
,}"*#44**""'	
 $11':'<<&$# #$)
    c           	         t        |t              s1dj                  |j                  j                        }t        |      |sy| j                  j                  |      }t        | j                  |      | j                  | j                  | j                  | j                  | j                  | j                        }| j                   D ]  } ||      } | j"                  j%                  |      S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        z9argument cannot be of '{name}' type, must be of text type)namer   )sourcer0   strip_disallowed_elementsstrip_html_commentsr4   allowed_elementsallowed_protocols)rA   )
isinstancestrformat	__class____name__	TypeErrorr6   parseFragmentBleachSanitizerFilterr8   r0   r%   r2   r4   r$   r1   r3   r:   render)r;   textmessagedomfilteredfilter_classs         r<   cleanzCleaner.clean   s     $$KRR00 S  
 G$$kk''-(;;s#&*jj $ 3 3,,!YY"nn

 !LL 	5L#84H	5 %%h//r>   )	rJ   
__module____qualname____doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_PROTOCOLSr=   rT    r>   r<   r"   r"   3   s*    < %#@
D&0r>   r"   c                      t               r S t         t              r fd}|S t         t              r fd}|S t	        d      )a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                     | v r|    }t        |      r
 || ||      S ||v rydv rd   }t        |      r
 || ||      S ||v S y)NT*F)callable)tagattrvalueattr_valr0   s       r<   _attr_filterz.attribute_filter_factory.<locals>._attr_filter   sl    j %c?H%#Cu558#j %c?H%#Cu55x''r>   c                     |v S Nr[   )r`   ra   rb   r0   s      r<   rd   z.attribute_filter_factory.<locals>._attr_filter   s    :%%r>   z3attributes needs to be a callable, a list or a dict)r_   rF   dictlist
ValueError)r0   rd   s   ` r<   attribute_filter_factoryrj      sM     
*d#	$ *d#	& 
J
KKr>   c                   `     e Zd ZdZeeedddf fd	Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Z xZS )rM   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTNc                     t        |      | _        || _        || _        || _        t        j                  ddt        d       t        	| $  |f||d|S )ag  Creates a BleachSanitizerFilter instance

        :arg source: html5lib TreeWalker stream as an html5lib TreeWalker

        :arg list allowed_elements: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
            elements

        :arg bool strip_html_comments: whether or not to strip HTML comments

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        ignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)rP   categorymodule)rD   rE   )
rj   attr_filterrB   rC   r4   warningsfilterwarningsDeprecationWarningsuperr=   )
r;   rA   rD   r0   rE   rB   rC   r4   kwargsrI   s
            r<   r=   zBleachSanitizerFilter.__init__   ss    B 4J?)B&#6 * 	8',		
 w
-/
 	
 	
r>   c              #      K   |D ]5  }| j                  |      }|st        |t              r|E d {    2| 7 y 7 wrf   )sanitize_tokenrF   rh   )r;   token_iteratortokenrets       r<   sanitize_streamz%BleachSanitizerFilter.sanitize_stream!  sF     # 		E%%e,C#t$			 s   /A >A c              #   P  K   g }|D ]h  }|rF|d   dk(  r|j                  |       dj                  |D cg c]  }|d   	 c}      dd}g }| n|d   dk(  r|j                  |       e| j dj                  |D cg c]  }|d   	 c}      dd}| yc c}w c c}w w)z/Merge consecutive Characters tokens in a streamtype
Charactersr   data)r   r}   N)appendjoin)r;   rx   characters_bufferry   
char_token	new_tokens         r<   merge_charactersz&BleachSanitizerFilter.merge_characters-  s     # 	E =L0%,,U3
 !#BSTJZ/T! !-	!I )+%#Ov,.!((/K+	0 GGBSTJZ/TU 
	 # U Us   3B&B
A B&B!B&c                 |    | j                  | j                  t        j                  j	                  |                   S rf   )r   r{   r   Filter__iter__)r;   s    r<   r   zBleachSanitizerFilter.__iter__N  s4    $$  !5!5!>!>t!DE
 	
r>   c                 ,   |d   }|dv r@|d   | j                   v r| j                  |      S | j                  ry| j                  |      S |dk(  r/| j                  s"t        j                  |d   ddd	
      |d<   |S y|dk(  r| j                  |      S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        r}   )StartTagEndTagEmptyTagr@   NCommentr   z&quot;z&#x27;)"')entitiesr~   )rD   allow_tokenrB   disallowed_tokenrC   r   escapesanitize_characters)r;   ry   
token_types      r<   rw   z$BleachSanitizerFilter.sanitize_tokenS  s      6]
;;V} 5 55''..// ,,U339$++ - 4 4&M(,J!f <'++E22 Lr>   c                    |j                  dd      }|s|S t        j                  t        |      }||d<   d|vr|S g }t	        j
                  |      D ]  }|s|j                  d      rmt	        j                  |      }|V|dk(  r|j                  ddd       n|j                  d|d	       |t        |      d
z   d }|r|j                  d|d       |j                  d|d        |S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        r   r   &Nampr~   )r}   r   Entity)r}   r@      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   next_possible_entity
startswithmatch_entityr   len)r;   ry   r   
new_tokenspartentity	remainders          r<   r   z)BleachSanitizerFilter.sanitize_characters~  s    yy$L&**+EtLf d?L
 "66t< 	DDs#&33D9% #))<*MN"))8V*LM !%S[1_%6 7I "))<*ST|TBC5	D8 r>   c                    t        j                  |      }t        j                  dd|      }|j	                  dd      }|j                         }	 t        j                  |      }|j                  r|j                  |v r|S y|j                  d      r|S d|v r|j                  d      d   |v r|S d|v sd	|v r|S y# t        $ r Y yw xY w)
zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   r   )r   convert_entitiesrer   replacelowerr   urlparseri   schemer   split)r;   rb   rE   normalized_uriparseds        r<   sanitize_uri_valuez(BleachSanitizerFilter.sanitize_uri_value  s     '77>  ;RP (//"= (--/	  ((8F
 ==}} 11&  ((- ~%"((-a04EE **g9J.J5  		s   B< <	CCc                 b   d|v r)i }|d   j                         D ]  \  }}|\  }}| j                  |d   ||      s#|| j                  v r!| j                  || j                        }|P|}|| j
                  v r5t        j                  ddt        |            }|j                         }|s|}d|d   f| j                  v r0|dt        j                  d   dffv rt        j                  d	|      r|d
k(  r*| j                  r| j                  j                  |      }nd}|||<    ||d<   |S )z-Handles the case where we're allowing the tagr   r@   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   xlinkr   z
^\s*[^#\s])Nstyler   )itemsrp   attr_val_is_urir   rE   svg_attr_val_allows_refr   r   r   r%   svg_allow_local_hrefr   
namespacessearchr4   sanitize_css)	r;   ry   attrsnamespaced_nameval	namespacer@   	new_valuenew_vals	            r<   r   z!BleachSanitizerFilter.allow_token  sh   U? E(-f(;(;(= 5-$"1	4 ''ftSA #d&:&:: $ 7 7T=S=S TI ( #C #d&B&BB ff%A3QTVG%mmoG" 
 & %-(D,E,EE&&&11':FC+  99]C8$ #o5))"00==cB ! *-o&k5-n "E&Mr>   c                    |d   }|dk(  rd|d   z  |d<   n|d   r|dv sJ g }|d   j                         D ]b  \  \  }}}|r|s||}}||t        j                  vr|}n#dj                  t        j                  |   |      }|j	                  d|d	|d
       d dj                  |d   dj                  |            |d<   nd|d   z  |d<   |j                  d      r|d   d d dz   |d<   d|d<   |d= |S )Nr}   r   z</%s>r@   r   )r   r   z{}:{}r   z="r   z<{}{}>r   z<%s>selfClosingz/>r~   )r   r   prefixesrH   r   r   r   )r;   ry   r   r   nsr@   vr   s           r<   r   z&BleachSanitizerFilter.disallowed_token:  s4   6]
!#eFm3E&M6]!9999E!&v!4!4!6 
TA d#RB :=+A+A!A&*O&-nn]5K5KB5OQU&VO ( 	. %OOE&M2775>JE&M #U6]2E&M99]#!&M#2.5E&M$f&Mr>   )rJ   rU   rV   rW   rX   rY   rZ   r=   r{   r   r   rw   r   r   r   r   __classcell__)rI   s   @r<   rM   rM      sP     &%+"' 2
h
B

)V;z8tCJ*r>   rM   )	itertoolsr   r   rq   xml.sax.saxutilsr   bleachr   r   rX   rY   rZ   r   rangechrINVISIBLE_CHARACTERScompileUNICODEr   r   r"   rj   SanitizerFilterrM   )cs   0r<   <module>r      s     	  %   $ '	Iy  0  ww5A;b"uR}EFSVF 
 %"**S+?%?#%ErzzR  ! E0 E0P(LV~M99 ~} Gs   B=