
    \j'                     :   d Z 	 ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZ ddlmZmZ dZdadad ZddZ	 dd	Z ed
      d        ZddZ G d dej2                  j4                        Zd ZddZ G d de	j<                        Zg dZy)z+Centralized I/O security sentinel for NLTK.    N)	lru_cache)Path)unquoteurlparseFc                  (   g } dt         j                  v r't        t        t         j                  d   dg             } t        j
                  j                  dd      }| |f}t        t        |k(  rt        S t               }| |j                  t        j                        z   D ]Q  }|s	 t        |d      r|j                  n|}|j                  t        t!        |            j#                                S ddl}dd|j-                         fD ]K  }	 t        |      j/                         j#                         }|j1                         r|j                  |       M |a|a|S # t$        t&        t(        f$ r Y w xY w# t$        t&        t(        f$ r Y w xY w)	zDDynamically determines allowed directories based on NLTK data paths.z	nltk.datapath	NLTK_DATA Nr   z~/nltk_dataz/usr/share/nltk_data)sysmoduleslistgetattrosenvironget_ALLOWED_ROOTS_CACHE_LAST_DATA_PATHSsetsplitpathsephasattrr   addr   strresolveOSError
ValueErrorRuntimeErrortempfile
gettempdir
expanduserexists)current_paths	env_pathscurrent_staterootspraw_pr   locs           6/root/env/lib/python3.12/site-packages/nltk/pathsec.py_get_allowed_rootsr*      sg    Mckk!WS[[%=vrJK

{B/I"I.M',<,M##EEY__RZZ88 ")!V"4!		$s5z*2245 5x7J7J7LM 	S	$$&..0Axxz		!	 !$L Z6  \2 		s&   (AE AE: E76E7:FFc                 >   t        | t              s| rt        |       j                         sy	 t	        | d      r| j
                  n
t        |       }d|v r>t        |      }|j                  dv ry|j                  dk(  rt        |j
                        }	 t        |      j                         |rft	        |d      r|j
                  n
t        |      }t        |      j                         }|k(  s%j                  |      st        d| d	 d
|       t!               }	t#        fd|	D              ry	 t        t%        j&                               j                         k(  sj                        rPt#        fd|	D              ryd| d}
t(        rt+        |
      t-        j.                  d| d	 dt0        d       y	 d| d }
t(        rt+        |
      t-        j.                  |
t0        d       y# t        t        f$ rT |j                         }d|v r1|j                  d      dz   }t        |d|       j                         nt        |      Y w xY w# t        t        f$ r Y w xY w# t*        t        f$ r  t2        $ r
 t(        r Y yw xY w)aG  
    Ensures file access is restricted to allowed data directories.

    :param path_input: The path to validate.
    :param context: Diagnostic context for warnings/errors.
    :param required_root: If provided, enforces that the path is strictly
                          within this specific directory (scoped sandbox).
    Nr   z://)httphttpsftpfilez.zip   Security Violation [z]: Path z escapes root c              3   N   K   | ]  }|k(  xs j                  |        y wN)is_relative_to).0roottargets     r)   	<genexpr>z validate_path.<locals>.<genexpr>x   s(     Wv~<!6!6t!<<Ws   "%c              3   (   K   | ]	  }|k(    y wr3    )r5   r6   cwds     r)   r8   z validate_path.<locals>.<genexpr>   s     =tsd{=s   zS]: CWD access restricted in ENFORCE mode. Authorize via: nltk.data.path.append('.')zSecurity Warning [z allowed via CWD.   
stacklevelz]: Unauthorized path )
isinstanceintr   stripr   r   r   schemer   r   r   r   r   lowerfindr4   r*   anyr   getcwdENFORCEPermissionErrorwarningswarnRuntimeWarning	Exception)
path_inputcontextrequired_rootrawparsed	lower_rawzip_idxroot_rawscoped_rootallowed_rootsmsgr;   r7   s              @@r)   validate_pathrX   F   s    *c"*C
O<Q<Q<SI!(V!<joo#j/C<c]F}} 88}}&fkk*		##Y&&(F  =&1 ""' 
 x.002Kk)V-B-B;-O *7)8F8>R]Q^_ 
 +,WWW	ryy{#++-C} 5 5c :=}==*7) 4@ @  )#..MM,WIXfXEVW&#$
  !;& %WI-B6(K!#&&MM#~!<o $ 	#		I"#..014c(7m,446c	#` $ 		 Z(   si   A I: 1$I: G? /BI: 7AI% :I% 6I: ?AI"I: !I""I: %I74I: 6I77I: :JJc                 v   	 t        |      j                         fd}t        | t        j                        r	 ||        yt        j                  | d      5 } ||       ddd       y# 1 sw Y   yxY w# t
        t        f$ r  t        t        j                  f$ r t        rt        d      Y yw xY w)zEEnhanced Zip-Slip protection using Pathlib for cross-platform safety.c                 j   gn| j                         }|D ]  }t        |d      r|j                  n
t        |      }d|v rt	        d|       |z  j                         }|k(  rQ|j                        rcd d| d}t        rt        |      t        j                  |t        d        y )	Nfilename zNull byte in ZIP member: r1   z]: Traversal member 'z' detected.r<   r=   )namelistr   r[   r   r   r   r4   rG   rH   rI   rJ   rK   )	zfmembersnamename_strmember_pathrW   rN   specific_memberr7   s	         r)   _auditz$validate_zip_archive.<locals>._audit   s    %4%@!bkkm    I,3D*,E4==3t98#$'@
%KLL%099;#v-1K1KF1S0	9NxjXcdC-c22 c>aHI    rNzZip validation failed)
r   r   r?   zipfileZipFilerH   r   r   
BadZipFilerG   )zip_obj_or_pathtarget_rootrc   rN   rd   r^   r7   s     ``  @r)   validate_zip_archiverl      s    ;k"**,	I" ow7?##6 "r
  Z( W''( ;!"9:: ;s6   AA< A< 	A0'A< 0A95A< 9A< <9B87B8   )maxsizec                 ~    	 t        j                  | dt         j                        S # t        t        f$ r g cY S w xY w)z5Cached hostname resolution to mitigate DNS rebinding.N)proto)socketgetaddrinfoIPPROTO_TCPr   r   )hostnames    r)   _resolve_hostnameru      s;    !!(D8J8JKKZ  	s   %( <<c                    | rt        |       j                         sy	 t        t        |             }|j                  dk(  r$t	        t        |j                        | d       y|j                  dvrAd| d|j                   d}t        rt        |      t        j                  |t        d	
       yt        |j                  xs d      D ]  }t        j                  |d   d         }|j                   s%|j"                  s|j$                  s|j&                  sOd| d| }t        rt        |      t        j                  |t        d	
        y# t        t(        f$ r  t*        $ r
 t        r Y yw xY w)z-Hardened URL validation with SSRF protection.Nr/   z.file_schemerN   )r,   r-   r1   z]: Unsupported scheme 'z'.r<   r=   r
   r0   r   z!]: SSRF attempt to restricted IP )r   rA   r   rB   rX   r   r   rG   rH   rI   rJ   rK   ru   rt   	ipaddress
ip_addressis_loopbackis_link_localis_multicast
is_privater   rL   )	url_inputrN   rQ   rW   resultips         r)   validate_network_urlr      sQ   C	N002#i.)==F"'&++.7)<8PQ== 11&wi/Fv}}oUWX  %c**c>a@'(=2> 	EF%%fQil3B~~!1!1R__,WI5VWYVZ[)#..MM#~!D	E Z(   s&   AE %AE 4A'E 7E E65E6c                   "     e Zd ZdZ fdZ xZS )_ValidatingRedirectHandlerzIEnsures that every step of a redirect chain is re-validated against SSRF.c                 F    t        |d       t        | 	  ||||||      S )NNetworkRedirectrw   )r   superredirect_request)selfreqfpcoderW   headersnewurl	__class__s          r)   r   z+_ValidatingRedirectHandler.redirect_request   s(    V->?w'RsGVLLre   )__name__
__module____qualname____doc__r   __classcell__r   s   @r)   r   r      s    SM Mre   r   c                     t        | d      r| j                  n
t        |       }t        |d       t        j
                  j                  t                     } |j                  | g|i |S )zCSecure wrapper for urllib.request.urlopen with redirect validation.full_urlzpathsec.urlopenrw   )	r   r   r   r   urllibrequestbuild_openerr   open)urlargskwargsurl_stropeners        r)   urlopenr      sX    %c:6cllCHG*;<^^(()C)EFF6;;s,T,V,,re   c                 L    t        | d       t        j                  | fd|i|S )z!Secure wrapper for builtins.open.zpathsec.openrw   mode)rX   builtinsr   )r/   r   r   s      r)   r   r      s%    $/==3D3F33re   c                   :     e Zd ZdZ fdZd fd	Zd fd	Z xZS )rh   z#Secure wrapper for zipfile.ZipFile.c                 r    t        |t        t        f      rt        |d       t	        |   |g|i | y )Nzpathsec.ZipFilerw   )r?   r   r   rX   r   __init__)r   r/   r   r   r   s       r)   r   zZipFile.__init__  s3    dS$K($(9:///re   c                 n    t        | |xs t        j                         |       t        |   |||      S )N)rc   )rl   r   rF   r   extract)r   memberr   pwdr   s       r)   r   zZipFile.extract  s-    T4#6299;OwvtS11re   c                 l    t        | |xs t        j                                t        |   |||       y r3   )rl   r   rF   r   
extractall)r   r   r_   r   r   s       r)   r   zZipFile.extractall  s)    T4#6299;74#.re   )NN)NNN)r   r   r   r   r   r   r   r   r   s   @r)   rh   rh     s    -0
2/ /re   rh   )rX   r   rl   r   r   rh   rG   )NLTKN)NZipAudit)	NetworkIO)rf   ) r   r   rx   r   rq   r   urllib.requestr   rI   rg   	functoolsr   pathlibr   urllib.parser   r   rG   r   r   r*   rX   rl   ru   r   r   HTTPRedirectHandlerr   r   r   rh   __all__r:   re   r)   <module>r      s    2 1   	  
      *   %PTp AK!;H 3  FM!C!C M-4/goo /"re   