
    	]j7                     L   d Z ddlZddlZddlZddlZddlZ	 ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ d	d
lmZ  ej,                  e      Z ej2                  d       d Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#e	d        Z$d Z%ddZ&d Z'y#  ddlZY xY w)zThis file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
    N)timeit)extract_message)ssrf_safe_getsettings)SimpleUploadedFile)ValidationError   )
FileUploadi   c                 V    t        | t        j                  t        j                  f      S N)
isinstanceio	RawIOBaseBufferedIOBase)fs    K/root/env/lib/python3.12/site-packages/label_studio/data_import/uploader.py	is_binaryr      s    a",,(9(9:;;    c                 &   | j                  d       g }| j                         }t        |j                  t	        |t
              rdnd            }t        |      D ]"  }|j                  dt        |dz         z          $ | j                  d       |S )z+Generate column names for headless csv filer      ,,columnr
   )	seekreadlinelensplitr   bytesrangeappendstr)filenameslinenum_columnsis        r   csv_generate_headerr'      sx    IIaLE==?DdjjD%)@cJKK; ,XAE
*+,IIaLLr   c                     t        |       t        j                  kD  r(t        dt        j                   dt        |              y )NzMaximum task number is z, current task number is )r   r   TASKS_MAX_NUMBERr	   )taskss    r   check_max_task_numberr+   ,   sF    
5zH---%h&?&?%@@]^abg^h]ij
 	
 .r   c                 j    | t         j                  k\  r t        dt         j                   d|  d      y )Nz#Maximum total size of all files is z bytes, current size is z bytes)r   TASKS_MAX_FILE_SIZEr	   )values    r   check_tasks_max_file_sizer/   4   sB    ,,,1(2N2N1O P$gV-
 	
 -r   c                     | j                         D ]_  \  }}t        j                  j                  |j                        \  }}|j                         t        j                  vsSt        | d       y )N extension is not supported)	itemsospathsplitextnamelowerr   SUPPORTED_EXTENSIONSr	   )filesfilenamefile_obj_exts        r   check_extensionsr>   <   sa    #kkm G(!!(--0399;h;;;!SE)D"EFFGr   c                     t        | j                         D cg c]  \  }}|j                   c}}      }t        |       y c c}}w r   )sumr2   sizer/   )r9   r<   r"   totals       r   check_request_files_sizerC   C   s2    %++-8wq$89Ee$ 9s   A
c                    t        | ||      }t        j                  rt        j                  t        |j                  j                              \  }}|dv rt        |j                  j                         j                               }|j                  j                  d       |j                  j                  |j                                |j                  j                          |j                          |S )N)userprojectr"   )zimage/svg+xmlr   )r   r   SVG_SECURITY_CLEANUP	mimetypes
guess_typer!   r"   r6   allowlist_svgreaddecoder   writeencodetruncatesave)rE   rF   r"   instancecontent_typeencoding	clean_xmls          r   create_file_uploadrU   I   s    tW4@H$$!*!5!5c(--:L:L6M!Nh,,%hmm&8&8&:&A&A&CDIMMq!MM	 0 0 23MM""$MMOOr   c           	      l    ddl m} g d}|j                  |dddddd      }|j                  |       }|S )zUFilter out malicious/harmful content from SVG files
    by defining allowed tags
    r   )clean)	xmlsvgcircleellipser$   r4   polygonvectorrectTF)
allow_tagsstylelinksadd_nofollowpage_structuresafe_attrs_onlyremove_unknown_tags)	lxml.htmlrW   Cleaner
clean_html)	dirty_xmlrW   r_   cleanerrT   s        r   rJ   rJ   V   sP      
J mm!  G ""9-Ir   c                 p    	 | j                  dd      }t        j                  |      S # t        $ r Y y w xY w)N'")replacejsonloads
ValueError)datajson_acceptable_strings     r   str_to_jsonrt   v   s:    !%c3!7zz011 s   &) 	55c                    	 |j                  dd      d   }t        ||j                  j                         dddi      }t	        |d      r|j
                  n|}||k7  rsd	d
lm}m}	  |	|      }
 ||
j                        }|j                  dd      d   }d|v r|j                  d      d	   }t        j                  j                  |      \  }}|}t        j                  j                  |      \  }}|r.|j                         t        j                  vrt!        | d      |j"                  j%                  d      }|rt'        t)        |             |j*                  }t-        ||t/        ||            }|j0                  rd}| j3                  |j4                         t7        j8                  ||       \  }}}|||| |fS # t         $ r}|d}~wt:        $ r}t!        t=        |            d}~ww xY w)z.Download file using URL and read tasks from it/r
   TzAccept-EncodingN)verifystreamheadersurlr   )unquoteurlparse?r1   zcontent-length)rsplitr   organizationshould_verify_ssl_certshasattrr{   urllib.parser|   r}   r4   r   r3   r5   r7   r   r8   r	   rz   getr/   intcontentrU   r   format_could_be_tasks_listr    idr   load_tasks_from_uploaded_files	Exceptionr   )file_upload_idsrF   rE   r{   could_be_tasks_listr:   responseresolved_urlr|   r}   
parsed_urlr4   resolved_filenamer<   resolved_extr=   content_lengthfile_contentfile_uploadr*   found_formats	data_keyses                          r   tasks_from_urlr   ~   s   *2::c1%b) ,,DDFt^oqu]v

 (/x'?x||S36!,/J:??+D $C 3B 7''$5$;$;C$@$C! gg../@AOA|(H !!(+3399;h&C&CC!SE)D"EFF "))--.>?%c.&9:''(w8J8Ua8bc11"&{~~.*4*S*ST[]l*m'}i mUO=PPP	   2oa0112s$   F9G 	G2GG2G--G2c                    d}g }t        |       t        |       |j                         D ];  \  }}t        | ||      }|j                  rd}|j                  |j                         = t        j                  d| d|        ||fS )NFTzcreated file uploads: z could_be_tasks_list: )	rC   r>   r2   rU   r   r    r   loggerdebug)rE   rF   FILESr   r   r<   r"   r   s           r   create_file_uploadsr      s    OU#U;;= /4(w=11"&{~~.	/ LL)/)::PQdPefg///r   c           	         g g t               }}}| j                  r1| j                  }t        j                  | j                  |      \  }}}n| j
                  r| j
                  }t        |      }|rot        || j                  t        d|j                                     }|j                  |j                         t        j                  | j                  |      \  }}}nVd}	t        || j                  |||	      \  }}}}}	|	r3d| _        | j                  dg       n| j                  r| j                  }t!        t"              st%        d      |st%        d      t'        |       |||t#        |      fS )z]Load tasks from different types of request.data / request.files saved in project_import modelinplace.jsonFTr   update_fields"load_tasks: Data root must be listload_tasks: No tasks added)setr   r   r   rF   r{   rt   rU   r   rN   r    r   r   r   rP   r*   r   listr	   r+   )
project_importrE   r   r   r   r*   r{   	json_datar   r   s
             r   load_tasks_for_async_importr      ss   02BI]O%%(88*4*S*S""O+
'}i
 
		  $	,&&">3::<@K
 "";>>2.8.W.W&&/+E=) #( 0F0FcSfg#"592##3H2I#J			$$ eT"BCC :;;% /=$y/AAr   c           	   #     K   ddl m} |s|j                  }g }i }t               }| j                  r| j                  }|j                         }t        j                  | j                  ||      D ]b  \  }}	}
|j                  |	       |j                  |
       t        |t              st        d      |sGt        |       |||	t        |
      f d n| j                  rk| j                  }g g t               }}}t        |      }|rot!        || j                  t#        d|j%                                     }|j'                  |j(                         t        j*                  | j                  |      \  }}}n=d}t-        || j                  |||      \  }}}}}|rd| _        | j1                  dg	       t        |t              st        d      |st        d
      t        |       |j                         }|j                         }|j                         }t3        dt5        |      |      D ]  }||||z    }|||t        |      f  n| j6                  rk| j6                  }t        |t              st        d      |st        d
      t        |       t3        dt5        |      |      D ]  }||||z    }|g i g f  nt        d
      ||t        |      fS w)zLoad tasks from different types of request.data / request.files saved in project_import model,
    yielding tasks in batches to reduce memory usager   r   )
batch_sizer   r   FTr   r   r   )django.confr   IMPORT_BATCH_SIZEr   r   copyr   (load_tasks_from_uploaded_files_streamingrF   updater   r   r	   r+   r{   rt   rU   r   rN   r    r   r   r   r   rP   r   r   r*   )r   rE   r   r   all_file_upload_idsall_found_formatsall_data_keysr   batch_tasksbatch_formatsbatch_data_keysr{   r   r   r   r   r*   r   r&   s                      r   %load_tasks_for_async_import_streamingr      s     %//
EM%%(88-224;E;n;n""O
<
 	U7K $$]3  1 k40%&JKK!+.tO?TTT	U 
		   46CE	  $	,&&">3::<@K
 "";>>2.8.W.W&&/+E=) #( 0F0FcSfg#"592##3H2I#J%&!"FGG!">??e$-224)..0!(q#e*j1 	OAA
N3KtINN	O 
		$$%&!"FGG!">??e$q#e*j1 	*AA
N3Kr2r))	*
 :;; 143FFFs   KKc           	         g g t               }}}d}t        | j                        rt        | j                         t	        | j                         | j                  j                         D ]E  \  }}t        | j                  ||      }|j                  rd}|j                  |j                         G t        j                  ||      \  }	}}n>d| j                  v r| j                  j                  d      }
|
st!        d      t#        |
      }|ret        | j                  |t%        d|
j'                                     }|j                  |j                         t        j                  ||      \  }	}}nt)        ||| j                  |
|      \  }}}	}}nvd| j                  v r(t+        | j                  t,              r| j                  g}	n@d| j                  v r't+        | j                  t.              r| j                  }	nt!        d      t+        |	t.              st!        d	      |	st!        d
      t1        |	       |	|||t/        |      fS )z?Load tasks from different types of request.data / request.filesFTz!application/x-www-form-urlencodedr{   z""url" is not found in request datar   zapplication/jsonz-load_tasks: No data found in DATA or in FILESr   r   )r   r   r   rC   r>   r2   rU   rE   r   r    r   r   r   rR   rr   r   r	   rt   r   rN   r   r   dictr   r+   )requestrF   r   r   r   r   r:   r"   r   r*   r{   r   s               r   
load_tasksr   W  s   02BI]O 7== /'%mm113 	3NHd,W\\7DIK55&*#"";>>2		3
 +5*S*ST[]l*m'}i 
-0D0D	Dllu%!"FGG  $	,W\\7DVWegjgqgqgsDtuK"";>>2.8.W.WX_ap.q+E=) sL_`# 
w33	3
7<<QU8V 
w33	3
7<<QU8V MNN eT"BCC :;;% /#6tIVVr   )i  )(__doc__csvr   loggingrH   r3   ujsonro   core.utils.commonr   core.utils.exceptionsr   core.utils.ior   r   r   django.core.files.uploadedfiler   rest_framework.exceptionsr	   modelsr   	getLogger__name__r   field_size_limitr   r'   r+   r/   r>   rC   rU   rJ   rt   r   r   r   r   r    r   r   <module>r      s    
 	   	 % 1 '   = 5 			8	$   [ !<


G%
@.Qb 0 04Bn^GB=WY
s   B B#