
    	]j#                     d   U d Z ddlZddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  ej<                  e      Z  e	       Z!d Z"d Z#d Z$d Z%ddZ&d Z'e"e!jP                  e!jR                  gdddddddgZ*e+e   e,d<   y)zThis file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
    N)defaultdict)-replace_task_data_undefined_with_config_field)AllPermissions)start_job_async_or_sync)DataManagerAction)delete_tasks)AzureBlobImportStorageLink)GCSImportStorageLink)LocalFilesImportStorageLink)RedisImportStorageLink)S3ImportStorageLink)ValidationError)Taskc                 F    t        t        | || j                  d       ddiS )zRemove duplicated tasks with the same data fields:
    Duplicated tasks will be deleted and all annotations will be moved to the first of the duplicated tasks.
    Storage links will be restored for the first task.
    high)organization_id
queue_nameresponse_code   )r   remove_duplicates_jobr   )projectquerysetkwargss      ]/root/env/lib/python3.12/site-packages/label_studio/data_manager/actions/remove_duplicates.pyremove_duplicatesr      s.    
 // S!!    c                     t        | |      }t        |       t        |       t        || |       | j	                  | j
                  j                         dddd       y)zJob for start_job_async_or_syncTF)maximum_annotations_changed!overlap_cohort_percentage_changedtasks_number_changedfrom_scratchN)find_duplicated_tasks_by_data*restore_storage_links_for_duplicated_tasksmove_annotationsremove_duplicated_tasks&_update_tasks_counters_and_task_statestasksall)r   r   r   
duplicatess       r   r   r   (   s[    .wAJ.z:Z J: 22$(*." 3 r   c                    g }| D ]c  }| |   }t        |      dk(  rd}g }|D ]$  }|d   |d   z   dkD  rd}|j                  |       & |D ]  }|sd}|j                  |d           e |j                  |d      }|j                  |d      }	|j	                         t        |      k7  r)t        d	|j	                          d
t        |       d      t        ||       t        j                  dt        |       d       |	S )a  Remove duplicated tasks from queryset with condition that they don't have annotations

    :param duplicates: dict with duplicated tasks
    :param project: Project instance
    :param queryset: queryset with input tasks
    :return: queryset with tasks which should be kept
       Ftotal_annotationscancelled_annotationsr   Tid)id__inannotations__isnullzDRemove duplicates failed, operation is not finished: queryset count z != removing zK. It means that some of duplicated tasks have been annotated twice or more.zRemoved  duplicated tasks)	lenappendfilterexcludecountr   r   loggerinfo)
r)   r   r   removingdatarootone_task_savednew_roottaskkepts
             r   r%   r%   9   s=    H ,$t9> 	&D'(40G+HH1L!%%	&  	,D!!% T
+	,,. hDIH8FD ~~3x=(&nn./}S]O LXX
 	
 (#
KK(3x=/):;<Kr   c                    d}| D ]  }| |   }t        |      dk(  rd|d   }}t        |      D ]  \  }}|}|d   |d   z   dkD  s n ||dz   d D ]  }|d   |d   z   dkD  st        j                  j	                  |d         j
                  j                  |d          ||d   |d   z   z  }t        j                  d	|d    d
|d    d|d           d|d<   d|d<     y)z8Move annotations to the first task from duplicated tasksr   r+   r,   r-   Nr.   r.   )task_idzMoved z annotations from task z	 to task )	r2   	enumerater   objectsgetannotationsupdater7   r8   )r)   total_moved_annotationsr:   r;   ifirstr>   s          r   r$   r$   j   sH    2$t9> d1g5  	GAtE'(40G+HH1L	 QM 	2D'(40G+HH1L  DJ /;;BB5QU;BW'40C+DtLcGd+dd'T"5677NtTXzlZcdijndocpq -.()01,-	22r   c           	      ^   t         t        t        t        t        dd}t        |       D ]  }| |   }fd}g }g }|D ]-  } ||      r|j                  |       |j                  |       / |sF ||d         \  }}	|j                  j                  |	      }
|D ]s  } ||d   |
j                  |
j                  |
j                  |
j                        }|j                          |dz  }t        j                  d|d    d	|d   d           u  t        j                  d
| d       y)zDBuild storage links for duplicated tasks and save them to Task in DB)io_storages_s3importstoragelink io_storages_gcsimportstoragelink&io_storages_azureblobimportstoragelink'io_storages_localfilesimportstoragelink"io_storages_redisimportstoragelinkr   c                 L    D ]  }| j                  |      x}s|   |fc S  y )N)rE   )r>   linklink_idclassess      r   _get_storagelinkzDrestore_storage_links_for_duplicated_tasks.<locals>._get_storagelink   s9     2"hhtn,7,"4='112 r   rA   r.   )rB   key	row_index	row_groupstorager+   zRestored storage link for task z from source task z	Restored z# storage links for duplicated tasksN)r   r
   r	   r   r   listr3   rD   rE   rV   rW   rX   rY   saver7   r8   )r)   total_restored_linksr:   r'   rU   tasks_without_storagelinkstasks_with_storagelinksr>   storage_link_classstorage_link_idlink_instancerR   rT   s               @r   r#   r#      s`   
 ,?,@2L3N.DG Z  &4 	 &(""$ 	8D%'..t4*11$7		8 #2BCZ[\C]2^/.66::o:NM2 ) J%))+55+55)11 		$)$5d4j\ASTklmTnosTtSuv3&P KK)011TUVr   c           
         g }t        t              D ]  }|j                  d      s||gz  } t        t              }t	         |j
                  ddddg|       }t        j                  dt        |       d       t	        |      D ]C  }t        |d   |        t        j                  |d         |d<   ||d      j                  |       E |D ci c]  }t        ||         dkD  s|||    }}|D ci c]  }|||   D cg c]  }|d   	 c} }	}}t        j                  d	t        |       d
       t        j                  d|	        |S c c}w c c}w c c}}w )z>Find duplicated tasks by `task.data` and return them as a dictio_storages_r:   r.   r,   r-   z
Retrieved z tasks from querysetr+   zFound r1   zDuplicated tasks: )dirr   
startswithr   rZ   valuesr7   r8   r2   r   jsondumpsr3   )
r   r   storagesfieldgroupsr'   r>   dr)   r8   s
             r   r"   r"      se    HT  N+H  F/BD[g^fghE
KK*SZL(<=>U *5d6lGLzz$v,/VtF|##D)* )/E1#fQi.12D!VAY,EJE?IJ!Az!}5tT
55JDJ
KK&Z)):;<
KK$TF+, F5Js$   E+E:E!EE!E!zRemove Duplicated Tasks_   FaR  Confirm that you want to remove duplicated tasks with the same data fields. Duplicated tasks will be deleted and all annotations will be moved to the first task from duplicated tasks. Also Source Storage Links will be restored if at least one duplicated task has a storage link. Warning: Task assignments (enterprise only) won't be saved.confirm)texttype)entry_point
permissiontitleorderexperimentaldialogactions)returnN)-__doc__loggingcollectionsr   ujsonrg   core.label_configr   core.permissionsr   
core.redisr   data_manager.actionsr   data_manager.actions.basicr   io_storages.azure_blob.modelsr	   io_storages.gcs.modelsr
   io_storages.localfiles.modelsr   io_storages.redis.modelsr   io_storages.s3.modelsr   rest_framework.exceptionsr   tasks.modelsr   	getLogger__name__r7   all_permissionsr   r   r%   r$   r#   r"   projects_changetasks_deleterw   rZ   __annotations__ r   r   <module>r      s     #  K + . 2 3 D 7 E ; 5 5 			8	$ """.b286Wr: )&668T8TU*N
 
$	  r   