a
    ==icj                     @   sz  d dl Z d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
 d dlmZmZ d dlmZmZmZ d dlmZ d dlm  m  mZ d dlm  m  mZ d dlmZmZ d dlm  m  mZ d dlm  m  m Z! d dl"m#Z# e$  e%dZ&e&'ej( e)d	 e)d
 dddZ*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0e1dkrve2ddg e0  dS )    N)core	workspaceexperiment_utildata_parallel_model)dyndep	optimizer)timeout_guardmodel_helperbrew)
caffe2_pb2)InitializerPseudoFP16Initializer)predictor_constantsZImagenet_trainerz2@/caffe2/caffe2/distributed:file_store_handler_opsz3@/caffe2/caffe2/distributed:redis_store_handler_opsc           
      C   sP   t j| |ddg||t| jr"dndd||ddd|d|d\}}	| ||}d	S )
z
    The image input operator loads image and label data from the reader and
    applies transformations to the images (random cropping, mirroring, ...).
    datalabelTFg      `@      )
batch_sizeoutput_typeZuse_gpu_transformZuse_caffe_datummean_per_channelstd_per_channelmeanZstdscalecropZmirroris_testN)r
   Zimage_inputr   ZIsGPUDeviceTypeZ_device_typeZStopGradient)
modelreaderr   img_sizedtyper   r   r   r   r    r   x/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/caffe2/python/examples/imagenet_trainer.pyAddImageInput/   s"    
r!   c                 C   sl   |dkrdnd}| j jg d| g|d||gd |dkrJ| j d| d | j jg dg|gdtjjd	 d
S )z
    The null input function uses a gaussian fill operator to emulate real image
    input. A label blob is hardcoded to a single value. This is useful if you
    want to test compute throughput or don't have a dataset available.
    float16Z_fp16 r      )shaper   r   )r%   valuer   N)param_init_netZGaussianFillZFloatToHalfZConstantFillr   DataTypeZINT32)r   r   r   r   r   suffixr   r   r    AddNullInputR   s    
r*   c                 C   s   d |j|jd }tj|j t||d g|d g|d d| j	f|d | j
| j| jfid}d| j| j|f }tjd|||d	 d S )
Nz[]_{}r   z/dataz/softmaxr   )Zpredict_net
parametersinputsoutputsZshapesz%s/%s_%d.mdlminidb)db_typeZdb_destinationpredictor_export_meta	use_ideep)format_device_prefix_devicespred_expZPredictorExportMetanetProtor   ZGetCheckpointParams
num_labelsnum_channels
image_sizefile_store_pathsave_model_nameZ
save_to_db)argstrain_modelepochr1   prefixr0   
model_pathr   r   r    	SaveModelj   s*    	rB   c                 C   s   t d|  t| d}tt|t	j
}tt|t	j}|rR|  n|  |rh|  n|  t|s~J t|sJ td}tjd|ttjdd dS )z)
    Load pretrained model from file
    zLoading path: {}r.   Zoptimizer_iterationr   )Zdevice_optionN)loginfor2   r5   Zload_from_dbr   ZNet
pred_utilsZGetNetr   ZGLOBAL_INIT_NET_TYPEZPREDICT_INIT_NET_TYPEZRunAllOnIDEEPZRunAllOnGPUr   
RunNetOnce	FetchBlobZFeedBlobZDeviceOptionr   ZCPU)pathr   r1   Zmeta_net_defZinit_netZpredict_init_netZitercntr   r   r    	LoadModel   s,    


rI   c                 C   sR  t d|| j t| j| | }t| j| | }	t|D ]}
|
dkrR| jn| j	}t
|: t }t|j j t }|| }W d   n1 s0    Y  d}t ||
d ||||  d|j|jd }t|d }t|d }d	}t ||| q@|| | }d|j|jd }t|d }t|d }tt|d }d}d}|dur
d}t|	D ]r}t|j j |jD ]R}|ttd|j|d 7 }|ttd|j|d
 7 }|d7 }qq|| }|| }nd}d}|j ||
||  ||||||dd |dk sJJ d|d S )zI
    Run one epoch of the trainer.
    TODO: add checkpointing here.
    zStarting epoch {}/{}r   Nz8Finished iteration {}/{} of epoch {} ({:.2f} images/sec)r   z{}_{}z	/accuracyz/losszTraining loss: {}, accuracy: {}z/accuracy_top5)accuracylosslearning_rater?   Ztop1_test_accuracyZtop5_test_accuracy)Zinput_countZbatch_countZadditional_values(   zExploded gradients :()rC   rD   r2   
num_epochsint
epoch_sizeZtest_epoch_sizerangeZfirst_iter_timeouttimeoutr   ZCompleteInTimeOrDietimer   ZRunNetr6   r7   namer3   r4   rG   r   ZGetLearningRateBlobNamesnpZasscalar)r=   r?   r>   
test_modeltotal_batch_size
num_shardsexpnameexplogepoch_itersZtest_epoch_itersirS   t1t2dtfmtr@   rK   rL   Z	train_fmtZ
num_imagesrM   Ztest_accuracyZtest_accuracy_top5Zntests_gr   r   r    RunEpoch   st    &





rd   c                    s   j dkrdt j }n j dkr(d} jd urRdd  jdD }t|}ntt j} j}t	
d|  j| | dksJ d jrt j jksJ d	 jrt j jksJ d
 j }t j| }|dksJ d||  _t	
d j  jr.dddd}nddd jd d d}tj||d} j j} jd}	tdd urttddttdd}dkrHtd |d j|	d dd d}
ndkrDd} jd urtt j!dg |g j j" j#d n tt j!dg |g j$ j#d t||d j|	d d d}
nd }
 fdd } fd!d"} fd#d$} j%d%kr fd&d'}n(|j&d( j% j'|d) fd*d'}d+d, }t(j)|| j dkr|n|||||
d j* j+ j j+ j+d- t(,|i t- d t.|j/ t0|j1 d } j2d urt	
d.  jrRddd/}ndddd0}tj|d1 |dd2}|j&d3 j2 j'd4 fd5d6}t(j)|| j dkr|n||d | j* j+d7 t.|j/ t0|j1 d} j3d urNt4 j3| j t(5|  j3d8d9 }|6d:rDt|d d; }t	
d<| n
t	7d= d>| j j8 j9f }t:;| }| j<k rt= |||||}t> || j d? j$ j?f }tj@A|t|d  d: rrtB|t|d  d:  qrd S )@Nresnext
shufflenetc                 S   s   g | ]}t |qS r   )rP   ).0xr   r   r    
<listcomp>      zTrain.<locals>.<listcomp>,zRunning on GPUs: {}r   z%Number of GPUs must divide batch sizez8The number of channels of image mean doesn't match inputz7The number of channels of image std doesn't match inputz;Epoch size must be larger than batch size times shard countzUsing epoch size: {}Fr   )	use_cudnncudnn_exhaustive_searchZtraining_modeZNCHWTi   )orderrl   rm   Zws_nbytes_limit)rU   	arg_scopeZOMPI_COMM_WORLD_SIZEZOMPI_COMM_WORLD_RANKZGLOO)
kv_handlerrY   shard_idengine	transport	interfaceZmpi_rendezvous	exit_netsstore_handlerZRedisStoreHandlerCreate)hostportr@   ZFileStoreHandlerCreate)rH   r@   )rp   rq   rY   rr   rs   rt   ru   c                    s    j dkrtnt}tjtjtjg|| j jd6 t	j
| d j j j j jddd	}W d    n1 sn0    Y   j dkr| j||d }| |dgdd	g\}}| j||d
}tj| |dgddd tj| |dgddd |gS )Nr"   ZWeightInitializerZBiasInitializerenable_tensor_corefloat16_computer   T)num_input_channelsr8   
num_layersZ
num_groupsZnum_width_per_groupZno_biasno_loss_fp32r   softmaxrL   r   rK   r   Ztop_kaccuracy_top5   )r   r   r   r
   ro   convfcrz   r{   resnetZcreate_resnextr9   r8   r}   Zresnext_num_groupsZresnext_width_per_groupr6   HalfToFloatSoftmaxWithLossScalerK   r   Z
loss_scaleZinitializerpredr   rL   r=   r   r    create_resnext_model_opsw  s:    $

z'Train.<locals>.create_resnext_model_opsc                    s    j dkrtnt}tjtjtjg|| j jd( t	j
| d j jdd}W d    n1 s`0    Y   j dkr| j||d }| |dgdd	g\}}| j||d
}tj| |dgddd tj| |dgddd |gS )Nr"   ry   r   T)r|   r8   r~   r   r   r   rL   r   rK   r   r   r   r   )r   r   r   r
   ro   r   r   rz   r{   rf   Zcreate_shufflenetr9   r8   r6   r   r   r   rK   r   r   r   r    create_shufflenet_model_ops  s2    $

z*Train.<locals>.create_shufflenet_model_opsc              
      sh   t d j   } jr<tj|  jdd jd|dd}n(t|  j tj|  jddd|dd}|S )N   g?r   step皙?)momentumnesterovweight_decaypolicystepsizegamma)r   r   r   r   r   )	rP   rQ   r{   r   Zbuild_fp16_sgdbase_learning_rater   Zadd_weight_decayZbuild_multi_precision_sgd)r   Zstepszopt)r=   rY   rX   r   r    add_optimizer  s.    	zTrain.<locals>.add_optimizernullc                    s   t | d  j jd d S )N)r   r   r   )r*   r:   r   r   )r=   batch_per_devicer   r    add_image_input  s    zTrain.<locals>.add_image_inputr   )dbr/   rY   rq   c              
      s$   t |  j jd j jd d S )NFr   r   r   r   r   r   r!   r:   r   image_mean_per_channelimage_std_per_channelr   )r=   r   r   r   r    r     s    c                 S   s<   |  |  D ](}|jdur| j|j|jtjj  qdS )z-Add ops applied after initial parameter sync.N)	ZGetOptimizationParamInfoZ	GetParamsZ	blob_copyr'   r   Zblobr   r(   FLOAT)r   Z
param_infor   r   r    add_post_sync_ops  s    
z Train.<locals>.add_post_sync_ops)input_builder_funforward_pass_builder_funZoptimizer_builder_funpost_sync_builder_fundevices
rendezvousZoptimize_gradient_memoryuse_nccl
cpu_deviceZideepZshared_modelZcombine_spatial_bnz----- Create test net ----)rl   rm   )rn   rl   rm   _test)rU   ro   Zinit_paramstest_reader)r   r/   c              
      s$   t |  j jd j jd d S )NTr   r   r   )r=   r   r   r   r    test_input_fn(  s    zTrain.<locals>.test_input_fn)r   r   r   Zparam_update_builder_funr   r   r   rb   rJ   z.mdlzReset epoch to {}z,The format of load_model_path doesn't match!z%s_gpu%d_b%d_L%d_lr%.2f_v2z%s/%s_)Cr   strr}   gpussplitlenlistrR   num_gpusrC   rD   r2   r   r   r9   r   rY   rP   rQ   r1   Zcudnn_workspace_limit_mbr	   ZModelHelperrq   Zdistributed_interfacesosgetenvdictZdistributed_transportZ
redis_hostr   ZRunOperatorOncer   ZCreateOperatorZ
redis_portZrun_idr;   Z
train_dataZCreateDBr/   r   ZParallelizer   Zuse_cpuZOptimizeGradientMemorysetrF   r'   Z	CreateNetr6   Z	test_dataZload_model_pathrI   ZFinalizeAfterCheckpointendswithwarningr8   r   r   ZModelTrainerLogrO   rd   rB   r<   rH   isfileremove)r=   Z
model_namer   r   Zglobal_batch_sizer\   Ztrain_arg_scoper>   rq   Z
interfacesr   rv   r   r   r   r   r   rW   Ztest_arg_scoper   r?   Zlast_strrZ   r[   rA   r   )r=   r   rY   r   r   rX   r    Train   s   











		


r   c                  C   s  t jdd} | jdtd ddd | jdtdd	d
 | jdtddd
 | jdtddd
 | jdtd dd
 | jdtddd | jdtddd | jdtddd
 | jdtddd
 | jdtdd  | jd!tdd"d
 | jd#td$d%d
 | jd&td'd(d
 | jd)td*d+d
 | jd,td-d.d
 | jd/td0d1d
 | jd2td*d3d
 | jd4td5d6d
 | jd7td8d9d
 | jd:tdd;d
 | jd<tdd=d
 | jd>td?d@d
 | jdAtdBd  | jdCtdDd  | jdEtdFdGd
 | jdHtdIdJd
 | jdKtdLdMd
 | jdNtd dOd
 | jdPdQdRdS | jdTdQdUdS | jdVtdWdXd
 | jdYdZdZd[gd\d] | jd^dQd_dS | jd`dQdadS | jdbtdcddd
 | jdetdfdgd
 | jdhtdidjd
 | jdktdldmd
 | jdndododpdqdogdrds |  }t| d S )tNzCaffe2: ImageNet Trainer)descriptionz--train_dataTz-Path to training data (or 'null' to simulate))typedefaultrequiredhelpz--num_layers2   z'The number of layers in ResNe(X)t model)r   r   r   z--resnext_num_groupsr   zThe cardinality of resnextz--resnext_width_per_group@   z--test_datazPath to test dataz--image_mean_per_channel+z#The per channel mean for the images)r   nargsr   z--image_std_per_channelz1The per channel standard deviation for the imagesz--test_epoch_sizeiP  zNumber of test imagesz	--db_typeZlmdbz'Database type (such as lmdb or leveldb)z--gpusz*Comma separated list of GPU devices to use)r   r   z
--num_gpusz)Number of GPU devices (instead of --gpus)z--num_channelsr$   zNumber of color channelsz--image_size   zInput image size (to crop to)z--num_labelsi  zNumber of labelsz--batch_size    zBatch size, total over all GPUsz--epoch_sizei` z/Number of images/epoch, total over all machinesz--num_epochszNum epochs.z--base_learning_rater   zInitial learning rate.z--weight_decayg-C6?z Weight decay (L2 regularization)z--cudnn_workspace_limit_mbzCuDNN workspace limit in MBsz--num_shardsz%Number of machines in distributed runz
--shard_idr   z	Shard id.z--run_idz!Unique run identifier (e.g. uuid)z--redis_hostz%Host of Redis server (for rendezvous)z--redis_porti  z%Port of Redis server (for rendezvous)z--file_store_pathz/tmpz'Path to directory to use for rendezvousz--save_model_nameZresnext_modelz&Save the trained model to a given namez--load_model_pathz0Load previously saved model to continue trainingz	--use_cpu
store_truezUse CPU instead of GPU)actionr   z
--use_ncclz"Use nccl for inter-GPU collectivesz--use_ideepFz	Use ideepz--dtypefloatr"   zData type used for training)r   choicesr   z--float16_computez"Use float 16 compute, if availablez--enable_tensor_corez+Enable Tensor Core math for Conv and FC opsz--distributed_transportZtcpz2Transport to use for distributed run [tcp|ibverbs]z--distributed_interfacesr#   z-Network interfaces to use for distributed runz--first_iter_timeouti  z<Timeout (secs) of the first iteration (default: %(default)s)z	--timeout<   zJTimeout (secs) of each (except the first) iteration (default: %(default)s)z--modelre   ?rf   zList of models which can be run)r   constr   r   r   )	argparseArgumentParseradd_argumentr   rP   r   bool
parse_argsr   )parserr=   r   r   r    mainv  s    




























r   __main__Zcaffe2z--caffe2_log_level=2)NN)3r   loggingnumpyrV   rT   r   Zcaffe2.pythonr   r   r   r   r   r   r   r	   r
   Zcaffe2.protor   Zcaffe2.python.models.resnetpythonmodelsr   Zcaffe2.python.models.shufflenetrf   Z#caffe2.python.modeling.initializersr   r   Z*caffe2.python.predictor.predictor_exporterZ	predictorZpredictor_exporterr5   Z*caffe2.python.predictor.predictor_py_utilsZpredictor_py_utilsrE   Z!caffe2.python.predictor_constantsr   basicConfig	getLoggerrC   setLevelDEBUGZInitOpsLibraryr!   r*   rB   rI   rd   r   r   __name__Z
GlobalInitr   r   r   r    <module>   sB   



  
# T  {^
