a
    yf6                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlZd dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZmZ G d	d
 d
eZdS )    N)deepcopy)
ThreadPool)Path)Optional)Dataset)FORMATS_HELP_MSGHELP_URLIMG_FORMATS)DEFAULT_CFG
LOCAL_RANKLOGGERNUM_THREADSTQDMc                       s   e Zd ZdZdddeddddddd	d
f fdd	Zdd Zee dddZ	d(ddZ
dd Zdd Zd)ddZdd Zdd Zdd Zd d! Zd"d# Zd*d$d%Zd&d' Z  ZS )+BaseDataseta  
    Base dataset class for loading and processing image data.

    Args:
        img_path (str): Path to the folder containing images.
        imgsz (int, optional): Image size. Defaults to 640.
        cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
        augment (bool, optional): If True, data augmentation is applied. Defaults to True.
        hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
        prefix (str, optional): Prefix to print in log messages. Defaults to ''.
        rect (bool, optional): If True, rectangular training is used. Defaults to False.
        batch_size (int, optional): Size of batches. Defaults to None.
        stride (int, optional): Stride. Defaults to 32.
        pad (float, optional): Padding. Defaults to 0.0.
        single_cls (bool, optional): If True, single class training is used. Defaults to False.
        classes (list): List of included classes. Default is None.
        fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).

    Attributes:
        im_files (list): List of image file paths.
        labels (list): List of label data dictionaries.
        ni (int): Number of images in the dataset.
        ims (list): List of loaded images.
        npy_files (list): List of numpy file paths.
        transforms (callable): Image transformation function.
    i  FT              ?Ng      ?c                    s|  t    || _|| _|| _|| _|| _|| _| | j| _	| 
 | _| j|d t| j| _|| _|| _|	| _|
| _| jr| jdusJ |   g | _| jrt| j| jd dfnd| _dg| j dg| j dg| j   | _| _| _dd | j	D | _t|tr| n|du rd	nd| _| jd	kr8|  sD| jd
krj| jd	krb|j rbt!"d | #  | j$|d| _%dS )z<Initialize BaseDataset with given configuration and options.include_classN   i  r   c                 S   s   g | ]}t |d qS )z.npy)r   with_suffix).0f r   Q/var/www/html/django/DPS/env/lib/python3.9/site-packages/ultralytics/data/base.py
<listcomp>[       z(BaseDataset.__init__.<locals>.<listcomp>Tramdisku   WARNING ⚠️ cache='ram' may produce non-deterministic training results. Consider cache='disk' as a deterministic alternative if your disk space allows.)hyp)&super__init__img_pathimgszaugment
single_clsprefixfractionget_img_filesim_files
get_labelslabelsupdate_labelslennirect
batch_sizestridepadset_rectanglebufferminmax_buffer_lengthimsim_hw0im_hw	npy_files
isinstancestrlowercachecheck_cache_ramZdeterministicr   warningcache_imagesbuild_transforms
transforms)selfr#   r$   r?   r%   r    r'   r0   r1   r2   r3   r&   classesr(   	__class__r   r   r"   1   s<    

".("zBaseDataset.__init__c              
      sl  zg }t |tr|n|gD ]}t|}| rN|tjt|d d dd7 }q| rt|F}| 	 
 }t|jtj  | fdd|D 7 }W d   q1 s0    Y  qt| j | dqtd	d
 |D }|sJ | j d| dt W nB ty@ } z(t| j d| dt |W Y d}~n
d}~0 0 | jdk rh|dtt|| j  }|S )zRead image files.z**z*.*T)	recursivec                    s&   g | ]}| d r|d  n|qS )z./)
startswithreplacer   xparentr   r   r   u   r   z-BaseDataset.get_img_files.<locals>.<listcomp>Nz does not existc                 s   s2   | ]*}| d d  tv r|dtjV  qdS )./N)splitr>   r	   rK   osseprL   r   r   r   	<genexpr>y   r   z,BaseDataset.get_img_files.<locals>.<genexpr>zNo images found in z. zError loading data from 
   )r<   listr   is_dirglobr=   is_fileopenreadstrip
splitlinesrO   rT   rU   FileNotFoundErrorr'   sortedr   	Exceptionr   r(   roundr.   )rE   r#   r   ptr*   er   rN   r   r)   h   s&     
6"2zBaseDataset.get_img_filesr   c                    s   t |dd}tt| jD ]}|dur| j| d }| j| d }| j| d  | j| d }||kd}|| | j| d< || | j| d<  r fdd	t|D | j| d< |dur|| | j| d< | jr d
| j| d ddd
f< q dS )z7Update labels to include only these classes (optional).rX   rQ   Nclsbboxessegments	keypointsc                    s   g | ]\}}|r | qS r   r   )r   siidxrj   r   r   r      r   z-BaseDataset.update_labels.<locals>.<listcomp>r   )	nparrayZreshaperanger.   r,   any	enumerater&   )rE   r   Zinclude_class_arrayirh   ri   rk   jr   rn   r   r-      s      zBaseDataset.update_labelsc              
   C   s>  | j | | j| | j|   }}}|du r"| rzt|}W q ty } zBt| j	 d| d|  t
|jdd t|}W Y d}~qd}~0 0 n
t|}|du rtd| |jdd \}}|r8| jt|| }	|	dkrntt||	 | jtt||	 | j }
}tj||
|ftjd	}n6||  krR| jksnn tj|| j| jftjd	}| jr
|||f|jdd   | j |< | j|< | j|< | j| dt| j  k r| jkr
n n4| jd
}| jdkr
d\| j |< | j|< | j|< |||f|jdd fS | j | | j| | j| fS )z?Loads 1 image from dataset index 'i', returns (im, resized hw).Nu1   WARNING ⚠️ Removing corrupt *.npy image file z	 due to: T)
missing_okzImage Not Found    rX   )interpolationr   r   )NNN) r8   r*   r;   existsro   loadrc   r   rA   r'   r   unlinkcv2imreadra   shaper$   maxr6   mathceilresizeZINTER_LINEARr%   r9   r:   r5   appendr.   r7   popr?   )rE   rt   Z	rect_modeimr   fnrg   Zh0Zw0rwhru   r   r   r   
load_image   s:    "
"

.0$zBaseDataset.load_imagec           
      C   s  d\}}| j dkr| jdfn| jdf\}}tt}||t| j}tt	|| jt
dkd}|D ]t\}}	| j dkr|| j|  j7 }n,|	\| j|< | j|< | j|< || j| j7 }| j d|| dd	| d
|_qb|  W d   n1 s0    Y  dS )zCache images to memory or disk.r   i   @r   ZDiskZRAMr   )totaldisablezCaching images (.1fzGB )N)r?   cache_images_to_diskr   r   r   imaprq   r/   r   rs   r   r;   statst_sizer8   r9   r:   nbytesr'   descclose)
rE   bgbZfcnZstoragepoolresultsZpbarrt   rM   r   r   r   rB      s    "

"zBaseDataset.cache_imagesc                 C   s6   | j | }| s2tj| t| j| dd dS )z3Saves an image as an *.npy file for faster loading.F)Zallow_pickleN)r;   ry   ro   saveas_posixr|   r}   r*   )rE   rt   r   r   r   r   r      s    
z BaseDataset.cache_images_to_diskc                 C   s   d\}}t | jd}t|D ]D}tt| j}| jt	|j
d |j
d  }||j|d  7 }q|| j | d|  }t }	||	jk }
|
sd| _t| j || ddt|d	  d
|	j| dd|	j| dd	 |
S )z5Check image caching requirements vs available memory.r      r   rX   rw   Nr   z%GB RAM required to cache images with d   z% safety margin but only rR   u'   GB available, not caching images ⚠️)r6   r/   rq   r|   r}   randomchoicer*   r$   r   r~   r   psutilZvirtual_memory	availabler?   r   infor'   intr   )rE   Zsafety_marginr   r   n_r   ratioZmem_requiredZmemsuccessr   r   r   r@      s*    



zBaseDataset.check_cache_ramc                    s:  t t  j j t}|d d }t dd  jD }|dddf |dddf  }|	 } fdd|D  _
 fdd|D  _|| }ddgg| }t|D ]P}|||k }| |  }	}
|
dk r|
dg||< q|	dkrdd|	 g||< qt t | j  j  j t j  _| _dS )	zCSets the shape of bounding boxes for YOLO detections as rectangles.rQ   rX   c                 S   s   g | ]}| d qS )r~   )r   rL   r   r   r   r      r   z-BaseDataset.set_rectangle.<locals>.<listcomp>Nr   c                    s   g | ]} j | qS r   )r*   r   rt   rE   r   r   r      r   c                    s   g | ]} j | qS r   )r,   r   r   r   r   r      r   )ro   floorZaranger/   r1   Zastyper   rp   r,   Zargsortr*   rq   r6   r   r   r$   r2   r3   batch_shapesbatch)rE   ZbinbsarZirectZshapesrt   ZariZminiZmaxir   r   r   r4      s$     0zBaseDataset.set_rectanglec                 C   s   |  | |S )z6Returns transformed label information for given index.)rD   get_image_and_label)rE   indexr   r   r   __getitem__   s    zBaseDataset.__getitem__c                 C   s   t | j| }|dd | |\|d< |d< |d< |d d |d d  |d d |d d  f|d< | jr| j| j|  |d	< | |S )
z2Get and return label information from the dataset.r~   NZimgZ	ori_shapeZresized_shaper   rX   Z	ratio_padZ
rect_shape)r   r,   r   r   r0   r   r   update_labels_info)rE   r   labelr   r   r   r     s    zBaseDataset.get_image_and_labelc                 C   s
   t | jS )z6Returns the length of the labels list for the dataset.)r.   r,   r   r   r   r   __len__  s    zBaseDataset.__len__c                 C   s   |S )zCustom your label format here.r   )rE   r   r   r   r   r     s    zBaseDataset.update_labels_infoc                 C   s   t dS )a-  
        Users can customize augmentations here.

        Example:
            ```python
            if self.augment:
                # Training transforms
                return Compose([])
            else:
                # Val transforms
                return Compose([])
            ```
        NNotImplementedError)rE   r    r   r   r   rC     s    zBaseDataset.build_transformsc                 C   s   t dS )a#  
        Users can customize their own format here.

        Note:
            Ensure output is a dictionary with the following keys:
            ```python
            dict(
                im_file=im_file,
                shape=shape,  # format: (height, width)
                cls=cls,
                bboxes=bboxes,  # xywh
                segments=segments,  # xy
                keypoints=keypoints,  # xy
                normalized=True,  # or False
                bbox_format="xyxy",  # or xywh, ltwh
            )
            ```
        Nr   r   r   r   r   r+   '  s    zBaseDataset.get_labels)T)r   )N)__name__
__module____qualname____doc__r
   r"   r)   r   rY   r-   r   rB   r   r@   r4   r   r   r   r   rC   r+   __classcell__r   r   rG   r   r      s6   7
&

r   )r[   r   rT   r   copyr   Zmultiprocessing.poolr   pathlibr   typingr   r|   numpyro   r   Ztorch.utils.datar   Zultralytics.data.utilsr   r   r	   Zultralytics.utilsr
   r   r   r   r   r   r   r   r   r   <module>   s   