
    	]jG                         d Z ddlZddlZddlZddlmZ ddlZddlZ	 ddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  ej*                  e      Zd	 Z G d
 dej2                        Zd Zy#  ddl
Z
Y TxY w)zThis file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
    N)Counter)extract_message)settings)models)cached_property)ValidationErrorc                 X   t        | j                        }t        j                  j	                  t
        j                  t
        j                  |      }t        j                  |d       t
        j                  dz   |z   dz   t        t        j                               dd z   dz   |z   }|S )NT)exist_ok/r      -)str
project_idospathjoinr   
MEDIA_ROOT
UPLOAD_DIRmakedirsuuiduuid4)instancefilenameprojectproject_dirr   s        I/root/env/lib/python3.12/site-packages/label_studio/data_import/models.pyupload_name_generatorr      s    (%%&G'',,x22H4G4GQKKKd+$w.4s4::<7H17MMPSSV^^DK    c                      e Zd Z ej                  ddej
                        Z ej                  ddej
                        Z ej                  e	      Z
d Zed        Zed        Zed	        Zed
        Zed        Zd Zd Zd Zd Zd ZddZd Zd Zd Zed        ZddZddZe	 dd       Z e	 dd       Z!y) 
FileUploadz
users.Userfile_uploads)related_name	on_deletezprojects.Project)	upload_toc                 Z    | j                   |_         | j                   j                  |      S N)r   has_permission)selfusers     r   r'   zFileUpload.has_permission&   s"    ||||**400r   c                 .    | j                   j                  S r&   )filenamer(   s    r   filepathzFileUpload.filepath*   s    yy~~r   c                 h    t         j                  j                  | j                  j                        S r&   )r   r   basenamer+   r,   r-   s    r   	file_namezFileUpload.file_name.   s    ww		//r   c                     t         j                  rYt         j                  rt         j                  s9t         j                  dz   | j                  j
                  j                  d      z   S | j                  j
                  S )Nr   )r   FORCE_SCRIPT_NAMEHOSTNAMECLOUD_FILE_STORAGE_ENABLEDr+   urllstripr-   s    r   r6   zFileUpload.url2   sR    %%x/@/@XEhEh--3diimm6J6J36OOO99== r   c                     d }	 t         j                  j                  | j                        d   }t        j                  dt        |      z          |S #  Y 'xY w# t        j                  dt        |      z          w xY w)NzGet file format )r   r   splitextr.   loggerdebugr   )r(   file_formats     r   formatzFileUpload.format9   sk    	@''**4==9"=K LL+c+.>>?		LL+c+.>>?s   ,A AA #A=c                     t        | d      rt        | d      }|S | j                  j                         j	                  d      }t        | d|       |S )N
_file_bodyutf-8)hasattrgetattrr+   readdecodesetattr)r(   bodys     r   contentzFileUpload.contentD   sO     4&4.D  99>>#**73DD,-r   c                    	 | j                   j                         5 }|j                         }t        |t              r|j                  d      }|j                  d      }|j                  d      }||kD  r&t        j                  d| d| d       	 ddd       yt        j                  d| d	| d
       	 ddd       y# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)aO  
        Detect the CSV separator by analyzing the first line of the file.

        This method implements a reliable heuristic:
        1. If semicolons are more frequent than commas in the first line, use semicolon
        2. Otherwise, default to comma

        Returns:
            str: The detected separator (',' or ';')
        rA   ,;z$Detected semicolon separator (found z semicolons vs z commas)Nz%Using default comma separator (found z commas vs z semicolons)z5Failed to detect CSV separator, defaulting to comma: )r+   openreadline
isinstancebytesrE   countr;   r<   	Exceptionwarning)r(   f
first_linecomma_countsemicolon_countes         r   _detect_csv_separatorz FileUpload._detect_csv_separatorN   s    	! QZZ\
j%0!+!2!27!;J )..s3","2"23"7 #[0LL>>O_j^kkst    LL?}KXgWhhtu '  (  	NNRSTRUVW	sA   C A6CC C8C C
C 
C 	C8C33C8c                 H   t         j                  dj                  | j                               | j	                         }t        j                  | j                  j                         |      j                  d      j                  d      }|D cg c]  }d|i }}|S c c}w )a  
        Read tasks from a CSV file with automatic separator detection.

        The separator is automatically detected by analyzing the first line:
        - If semicolons are clearly indicated (more frequent than commas), use semicolon
        - Otherwise, use the default comma separator

        Returns:
            list: List of tasks in the format [{'data': {...}}, ...]
        z Read tasks list from CSV file {}sep recordsdata)r;   r<   r>   r.   rX   pdread_csvr+   rL   fillnato_dict)r(   	separatortaskstasks       r   read_tasks_list_from_csvz#FileUpload.read_tasks_list_from_csvs   s     	7>>t}}MN..0	DIINN,)<CCBGOOPYZ,12D&$22 3s   Bc                 (   t         j                  dj                  | j                               t	        j
                  | j                  j                         d      j                  d      j                  d      }|D cg c]  }d|i }}|S c c}w )z
        Read tasks from a TSV (tab-separated values) file.

        Returns:
            list: List of tasks in the format [{'data': {...}}, ...]
        z Read tasks list from TSV file {}	rZ   r\   r]   r^   )
r;   r<   r>   r.   r_   r`   r+   rL   ra   rb   )r(   rd   re   s      r   read_tasks_list_from_tsvz#FileUpload.read_tasks_list_from_tsv   sr     	7>>t}}MNDIINN,$7>>rBJJ9U,12D&$22 3s    Bc                     t         j                  dj                  | j                               | j                  j                         }|D cg c]  }dt        j                  |ii }}|S c c}w )Nz!Read tasks list from text file {}r^   )r;   r<   r>   r.   rH   
splitlinesr   DATA_UNDEFINED_NAME)r(   lineslinerd   s       r   read_tasks_list_from_txtz#FileUpload.read_tasks_list_from_txt   s^    8??NO'')LQRD&877>?RR Ss   A,c                    t         j                  dj                  | j                               | j                  }	 t        j                  |      }t        |t              r|g}g }t        |      D ]I  \  }}|j                  d      sd|i}t        |d   t              st        d      |j                  |       K |S # t        $ r' t        j                  |j                  d            }Y w xY w)Nz!Read tasks list from JSON file {}utf8r^   Task item should be dict)r;   r<   r>   r.   rH   jsonloads	TypeErrorrE   rN   dict	enumerategetr   append)r(   raw_datard   tasks_formattedire   s         r   read_tasks_list_from_jsonz$FileUpload.read_tasks_list_from_json   s    8??NO<<	8JJx(E eT"GE ' 	)GAt88F#~d6lD1%&@AA""4(	)   	8JJxv67E	8s   B? ?-C/.C/c           	   #     K   t         j                  dj                  | j                               	 | j                  j                  d      5 }|j                  d      xs d}d }|D ]
  }|dvs|} n |j                  d       g }|t        d      k(  rUt        j                  |dd	
      D ]9  }| j                  |      }|j                  |       t        |      |k\  s4| g }; nb|t        d      k(  rI|j                         }		 t        j                  |	      }
| j                  |
      }|j                  |       nt%        d      |r| d d d        y # t         $ r' t        j                  |	j#                  d            }
Y lw xY w# 1 sw Y   y xY w# t&        $ r)}t%        d| j(                   dt+        |             d }~ww xY ww)Nz+Read tasks list from JSON file streaming {}rbi   r   )    	   
      r   [itemT)	use_float{rq   z%Unsupported or invalid JSON structurezFailed to parse JSON file : )r;   r<   r>   r.   r+   rL   rD   seekordijsonitems_format_task_for_json_streamingry   lenrs   rt   ru   rE   r   rQ   r1   r   )r(   
batch_sizefile_handlesniff
first_bytebbatchre   formatted_taskrz   	task_dataexcs               r   #read_tasks_list_from_json_streamingz.FileUpload.read_tasks_list_from_json_streaming   s    BII$--XY-	i% ) #((.5#!
 A 88%&
   #S) !&K4 P ')-)M)Md)S^4u:3"'K$&E'  3s8+*//1HH$(JJx$8	 &*%I%I)%TNLL0 **QRR KS)  ) < % H$(JJxv/F$G	H=)  ) V  	i!$>t~~>NbQ`adQePf"ghh	is}   /GF !F/A1F!'F	E4FF G-FF
FFFF GF 	G#$GGGc                     t        |t              r|j                  d      s	d|i}nd|i}t        |d   t              st        d      |S )zNFormat task data for JSON streaming consistency with read_tasks_list_from_jsonr^   rr   )rN   rv   rx   r   )r(   re   s     r   r   z*FileUpload._format_task_for_json_streaming   sM     dD!88F#~ D>D$v,-!"<==r   c                     t         j                  dj                  | j                               | j                  }dt
        j                  |iig}|S )Nz"Read 1 task from hypertext file {}r^   )r;   r<   r>   r.   rH   r   rl   )r(   rG   rd   s      r   read_task_from_hypertext_bodyz(FileUpload.read_task_from_hypertext_body   sD    9@@OP||877>?@r   c                    t         j                  dj                  | j                               t        j
                  r!dt        j                  | j                  iig}|S dt        j                  | j                  iig}|S )Nz!Read 1 task from uploaded file {}r^   )r;   r<   r>   r.   r   r5   rl   r6   )r(   rd   s     r   read_task_from_uploaded_filez'FileUpload.read_task_from_uploaded_file   sm    8??NO..x;;T]]KLME  x;;TXXFGHEr   c                     | j                   dv S )N).csv.tsv.txt)r>   r-   s    r   format_could_be_tasks_listz%FileUpload.format_could_be_tasks_list   s    {{666r   c                    | j                   }	 |dk(  r|r| j                         }|S |dk(  r|r| j                         }|S |dk(  r|r| j                         }|S |dk(  r| j	                         }|S | j
                  j                  st        d      |dv r| j                         }|S | j                         }	 |S # t        $ r,}t        d| j                  z   dz   t        |      z         d }~ww xY w)	Nr   r   r   .jsonYour label config has more than one data key and direct file upload supports only one data key. To import data with multiple data keys, use a JSON or CSV file.z.htmlz.htmz.xmlFailed to parse input file r   )r>   rf   ri   ro   r}   r   one_object_in_label_configr   r   r   rQ   r1   r   )r(   file_as_tasks_listr=   rd   r   s        r   
read_taskszFileUpload.read_tasks   s(   kk	pf$);557. - &+=557* ) &+=557& % '668"  \\<<%d   99::< 	 99;   	p!"?$.."PSW"WZijmZn"noo	ps4   B: B:  B: B: 05B: 'B: :	C/'C**C/c              #   @  K   | j                   }	 |dk(  r| j                  |      D ]  }|  y
|dk(  r|r| j                         }nv|dk(  r|r| j                         }n^|dk(  r|r| j	                         }nF| j
                  j                  st        d      |dv r| j                         }n| j                         }t        dt        |      |      D ]  }||||z    }|  y
# t        $ r,}t        d| j                  z   d	z   t        |      z         d
}~ww xY ww)zRStreaming version of read_tasks that yields tasks in batches for memory efficiencyr   r   r   r   r   r   r   r   r   N)r>   r   rf   ri   ro   r   r   r   r   r   ranger   rQ   r1   r   )r(   r   r   r=   r   rd   r|   r   s           r   read_tasks_streamingzFileUpload.read_tasks_streaming  s=    kk	pg%!EEjQ  EK  &(-? 99;E F*/A 99;E F*/A 99;E@@)h  !$== >>@E ==?E q#e*j9  A!!a*n5EK   	p!"?$.."PSW"WZijmZn"noo	ps3   DC& DB5C& %D&	D/'DDDNc                    g }g }t               }t        j                  j                  |      }	|r|	j                  |      }	|	D ]  }
|
j                  }|r||vr|
j                  |      }|D ]  }|
j                  |d<    t        |      dkD  r(t        t        |d   d   j                                     n	t               }|s|}n@|j                  |      s*t        t        |||
j                  j                              ||z  }||z  }|j                  |       |t        |      |kD  s n |t!        t#        |            |fS )Nr   id__infile_upload_idr   r^   )setr    objectsfilterr>   r   idr   iterkeysintersectionr   +_old_vs_new_data_keys_inconsistency_messager+   r,   ry   rv   r   )clsr   file_upload_idsformatsfiles_as_tasks_list	trim_sizerd   fileformatscommon_data_fieldsr!   file_uploadr=   	new_tasksre   new_data_fieldss                  r   load_tasks_from_uploaded_filesz)FileUpload.load_tasks_from_uploaded_files?  sh     U "))000A'..o.FL' 	K%,,K;g5#../BCI! 8)4%&8 ILIYZHZc$y|F';'@'@'B"CD`c`eO%%4"'44_E%?');[=M=M=R=R  #o5"YE{+$u:	)3	6 d7;/02DDDr   c           	   #   d  K   g }t               }g }d}	t        j                  j                  |      }
|r|
j                  |      }
|
D ]  }|j                  }|r||vr|j                  |       |j                  ||      D ]  }|D ]  }|j                  |d<    |rdt        |d   d   j                               }|s|}n@|j                  |      s*t        t        |||j                  j                              ||z  }|j                  |       t        |      |k\  s|d| }||d }|t!        t#        |            |f |	t        |      z  }	t        |      |k\  r@  |r'|t!        t#        |            |f |	t        |      z  }	|	dk(  rg t!        t#        |            |f yyw)zfStream tasks from uploaded files in batches to reduce memory usage using true streaming for JSON filesr   r   r   r   r^   N)r   r    r   r   r>   ry   r   r   r   r   r   r   r+   r,   extendr   rv   r   )r   r   r   r   r   r   r   r   accumulated_batchtotal_yieldedr!   r   r=   
task_batchre   r   batch_to_yields                    r   (load_tasks_from_uploaded_files_streamingz3FileUpload.load_tasks_from_uploaded_files_streamingh  s    
  U "))000A'..o.FL' $	9K%,,K;g5{+ *>>?RT^_ 9
& <D-8^^D)*< &)*Q-*?*D*D*F&GO--<*/<<_M-G /1C[EUEUEZEZ  +o=* "((4 +,
:%6{
%CN(9*+(F%($w{/C*DFXXX!S%88M +,
:/9$	9N #T'+*>%?ASSSS!233M Ad7;/02DDD s   DF0"?F0"AF0)d   )T)Tr   )NNTN)NNTi  )"__name__
__module____qualname__r   
ForeignKeyCASCADEr)   r   	FileFieldr   r+   r'   r   r.   r1   propertyr6   r>   rH   rX   rf   ri   ro   r}   r   r   r   r   r   r   r   classmethodr   r    r   r   r    r    !   s<   6\RXR`R`aDf 2[a[i[ijG6&;<D1   0 0 ! !    #J"
(0id 7 7<#pJ ^b&E &EP _c;E ;Er   r    c                 "   dj                  |       }dj                  |      }d}||k(  ry|t        j                  k(  r|dj                  ||      z   S |t        j                  k(  r|dj                  ||      z   S |dj                  |||      z   S )NrJ   z+You're trying to import inconsistent data:
r\   zRuploading a single file {0} clashes with data key(s) found from other files:
"{1}"zpuploading tabular data from {0} with data key(s) {1}, clashes with other raw binary files (images, audios, etc.)zpuploading tabular data from "{0}" with data key(s) "{1}", clashes with data key(s) found from other files:
"{2}")r   r   rl   r>   )new_data_keysold_data_keyscurrent_filenew_data_keys_listold_data_keys_listcommon_prefixs         r   r   r     s    -0-0BM//	x;;	; FFLf\[mFno	
 
x;;	; IIOP\^pIqr	
  FFLf02DG	
r   )__doc__loggingr   r   collectionsr   r   pandasr_   ujsonrs   core.utils.exceptionsr   django.confr   	django.dbr   django.utils.functionalr   rest_framework.exceptionsr   	getLoggerr   r;   r   Modelr    r   r   r   r   <module>r      sq     	     2    3 5			8	$CE CEL
us   A. .A6