a
    ==icO                     @   s   d Z ddlZddlZddlmZmZmZ ddlm  m	Z	 dd Z
dd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zedkre  Zejrejrejrejse   eddg eeeee
dZeeej e dS )aQ  
Benchmark for common convnets.

(NOTE: Numbers below prior with missing parameter=update step, TODO to update)

Speed on Titan X, with 10 warmup steps and 10 main steps and with different
versions of cudnn, are as follows (time reported below is per-batch time,
forward / forward+backward):

                    CuDNN V3        CuDNN v4
                    AlexNet         32.5 / 108.0    27.4 /  90.1
                    OverFeat       113.0 / 342.3    91.7 / 276.5
                    Inception      134.5 / 485.8   125.7 / 450.6
                    VGG (batch 64) 200.8 / 650.0   164.1 / 551.7

Speed on Inception with varied batch sizes and CuDNN v4 is as follows:

Batch Size   Speed per batch     Speed per image
16             22.8 /  72.7         1.43 / 4.54
32             38.0 / 127.5         1.19 / 3.98
64             67.2 / 233.6         1.05 / 3.65
128            125.7 / 450.6         0.98 / 3.52

Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
v4, is as follows:

AlexNet         68.4 / 218.1
OverFeat       210.5 / 630.3
Inception      300.2 / 1122.2
VGG (batch 64) 405.8 / 1327.7

(Note that these numbers involve a "full" backprop, i.e. the gradient
with respect to the input image is also computed.)

To get the numbers, simply run:

for MODEL in AlexNet OverFeat Inception; do
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size 128 --model $MODEL --forward_only True
done
for MODEL in AlexNet OverFeat Inception; do
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size 128 --model $MODEL
done
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size 64 --model VGGA --forward_only True
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size 64 --model VGGA

for BS in 16 32 64 128; do
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size $BS --model Inception --forward_only True
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py     --batch_size $BS --model Inception
done

Note that VGG needs to be run at batch 64 due to memory limit on the backward
pass.
    N)cnn	workspacecorec           	   
      s   t  }d}d d}t D ]}t|D ]}|dkr@d||nd}d|d |}|j|||||j|jd | fd	d
t|D dg |jdd|d|j|jd |ddgd}||d ||f    S qd S )N         r   fc_{}_{}data   )Zdim_inZdim_outZweight_initZ	bias_initc                    s   g | ]}d   |qS )r   )format).0jdepth }/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/caffe2/experiments/python/convnet_benchmarks.py
<listcomp>h   s   zMLP.<locals>.<listcomp>sumlast  labelxentloss)	r   CNNModelHelperranger   FCZ
XavierInitZSumLabelCrossEntropyAveragedLoss)	ordermodeldwidthir   currentZnext_r   r   r   r   MLPZ   s6    r$   c                 C   s  t j| dddd}|jdddddd	i fd
i fddd	}||d}|j|dddd}|j|ddddd	i fd
i fdd}||d}|j|dddd}|j|ddddd	i fd
i fdd}||d}	|j|	ddddd	i fd
i fdd}
||
d}|j|ddddd	i fd
i fdd}||d}|j|dddd}||dddd	i fd
i f}||d}||dddd	i fd
i f}||d}||d dd!d	i fd
i f}||d"}||d#gd$}||d% |d&fS )'NZalexnetTnameZ	use_cudnnZcudnn_exhaustive_searchr	   conv1r   @      
XavierFillConstantFill      stridepadpool1kernelr/   conv2      r0   pool2conv3  r
   conv4r   conv5pool5fc6i $     fc7fc8r   predr   r   r      	r   r   ConvReluMaxPoolr   Softmaxr   r   r   r   r'   relu1r1   r4   relu2r8   r9   relu3r;   relu4r<   relu5r=   r>   relu6r@   relu7rA   rB   r   r   r   r   AlexNets   s    



rQ   c              
   C   s  t j| dddd}|jdddddd	i fd
i fdd}||d}|j|dddd}||ddddd	i fd
i f}||d}|j|dddd}|j|ddddd	i fd
i fdd}||d}	|j|	ddddd	i fd
i fdd}
||
d}|j|ddddd	i fd
i fdd}||d}|j|dddd}||dddd	i fd
i f}||d}||ddd d	i fd
i f}||d}||d!d d"d	i fd
i f}||d#}||d$gd%}||d& |d'fS )(NZoverfeatTr%   r	   r'   r   `   r)   r*   r+   r,   )r/   r1   r-   r2   r4   r   r6   r8   r9      r
   r7   r;      r<   r=   r>   i   i   r@   r?   rA   r   rB   r   r   r      rD   rI   r   r   r   OverFeat   s    



rV   c              
   C   sx  t j| dddd}|jddddddi fd	i fd
d}||d}|j|dddd}|j|dddddi fd	i fd
d}||d}|j|dddd}|j|dddddi fd	i fd
d}||d}	|j|	dddddi fd	i fd
d}
||
d}|j|dddd}|j|dddddi fd	i fd
d}||d}|j|dddddi fd	i fd
d}||d}|j|dddd}|j|dddddi fd	i fd
d}||d}|j|dddddi fd	i fd
d}||d}|j|dddd}||ddddi fd	i f}||d}||d dddi fd	i f}||d }||d!dd"di fd	i f}||d#}||d$gd%}||d& |d'fS )(Nzvgg-aTr%   r	   r'   r   r(   r*   r+   r
   r7   r1   r-   r2   r4      r8   r9   r   r;   pool4r<   rS   conv6pool6conv7conv8pool8fcixi b  r?   fcxfcxir   rB   r   r   r   rU   rD   )r   r   r'   rJ   r1   r4   rK   r8   r9   rL   r;   rM   rX   r<   rN   rY   rO   rZ   r[   rP   r\   Zrelu8r]   r^   Zreluixr_   Zreluxr`   rB   r   r   r   r   VGGA  s    







ra   c                 C   s&   t d t d t d t| }|S )Nz4====================================================z4                 Start Building DAG                 )printSparseTransformerZ
netbuilder)r   Znet_rootr   r   r   net_DAG_Builder  s
    
rd   c              
   C   sd  |  ||d ||ddi fdi f}| ||}|  ||d ||d ddi fdi f}	| |	|	}	| j |	|d |d |d ddi fdi fdd	}
| |
|
}
|  ||d
 ||d ddi fdi f}| ||}| j ||d |d |d ddi fdi fdd	}| ||}| j||d dddd}|  ||d ||ddi fdi f}| ||}| ||
||g|}|S )Nz:conv1r
   r*   r+   z:conv3_reducer   z:conv3r   r7   z:conv5_reducez:conv5r6   r-   z:poolr3   r/   r0   z
:pool_proj)rE   rF   rG   ZConcat)r   Z
input_blobZinput_depthZoutput_nameZconv1_depthZconv3_depthsZconv5_depthsZ
pool_depthr'   Zconv3_reducer9   Zconv5_reducer<   poolZ	pool_projoutputr   r   r   _InceptionModule  sr    

rh   c                 C   sV  t j| dddd}|jdddddd	i fd
i fddd	}||d}|j|ddddd}||ddddd	i fd
i f}|||}|j|ddddd	i fd
i fdd}||d}|j|ddddd}t||dddddgddgd}	t||	dddddgddgd}
|j|
ddddd}t||dddddgdd gd}t||d!d"d#d$d%gd&dgd}t||d!d'dddgd&dgd}t||d!d(d$d)d*gddgd}t||d+d,dd#d-gddgd}|j|d.dddd}t||d/d0dd#d-gddgd}t||d/d1d2dd2gd dgd}|j|d3ddd4}||d5d6d7d	i fd
i f}||d8}|	|d9gd:}|
|d; |d%fS )<NZ	inceptionTr%   r	   r'   r   r(      r*   r+   r-   r.   r1   r
   re   conv2ar4   r5   r7   r8   inc3rR   rW          r   inc4r=   i  inc5   0   rS   inc6   p   rC      inc7inc8   i   i  inc9i@  pool9i@  inc10inc11r:   pool11r2   fcrT   r   rB   r   r   r   )r   r   rE   rF   rG   rh   ZAveragePoolr   rH   r   r   )r   r   r'   rJ   r1   rj   r4   rK   r8   rk   rn   r=   ro   rr   rv   rw   ry   rz   r{   r|   r}   r~   rB   r   r   r   r   	Inception  s    
r   c                 C   sd   | j g ddg|||d\}}| j|dtjjd}| |d}| j||tdd}| ||}||fS )	zAdds the data input part.
data_uint8r   )
batch_sizedbdb_typeZ	data_nhwc)tor	   g      p?)scale)	ZTensorProtosDBInputZCastr   ZDataTypeFLOATZ	NHWC2NCHWZScalefloatZStopGradient)r   r   r   r   r   r   r	   r   r   r   AddInput  s    

r   c                 C   sd   |  d}| j|dddddd}| jjg dd	gd
d}| jD ]"}| j| }| ||||g| q<dS )zC Simple plain SGD update -- not tuned to actually train the models iterLRg:0yEstepi'  g+?)Zbase_lrpolicyZstepsizegammaONEr
         ?)shapevalueN)ZIterZLearningRateparam_init_netr+   paramsZparam_to_gradZWeightedSum)r   ZITERr   r   paramZ
param_gradr   r   r   AddParameterUpdate  s    


r   c           	   	   C   sP  | |j \}}|j| _|j| _|j dkr@|jd||g}n"|j||dg}|jdkrb|j|g}|jjg d|ddd |jj	g d|jgd	d
d |j
rtd|j n6td|j |dg t| |j dkrtd |js|j  |j  |jrtd|j|jdl}|t|j  td|jd$}|t|j  W d    n1 sl0    Y  W d    n1 s0    Y  t|j t|j t|jD ]}t|j j qtd}|t d|j|j! t"" }t#| tdt"" | |j!  |j$rLtd t%|j jd|j!d d S )NNCHWr   r$   r	   g        r   )r   meanZstdr   r   i  )r   minmaxz{}: running forward only.z{}: running forward-backward.r   ZNHWCzU==WARNING==
NHWC order with CuDNN may not be supported yet, so I might
exit suddenly.z{0}_init_batch_{1}.pbtxtwz	{0}.pbtxtplanrunz	Spent: {}zLayer-wise benchmark.r
   T)&r   Znet_typeZPrototypeZnum_workersr   r   r   ZGaussianFillZUniformIntFillZforward_onlyrb   r   ZAddGradientOperatorsr   cpuZRunAllOnGPUnetZ
dump_modelopenwritestrr   Z
RunNetOnceZ	CreateNetr   Zwarmup_iterationsZRunNetr&   r   ZPlanZAddStepZExecutionStepZ
iterationstimeZRunPlanZlayer_wise_benchmarkZBenchmarkNet)	Z	model_genargr   Z
input_sizeZinput_shapeZfidr"   r   startr   r   r   	Benchmark)  sl    





T

r   c                  C   s   t jdd} | jdtddd | jdtdd	 | jd
tddd | jdtddd | jdtddd | jdtddd | jdddd | jdddd | jdddd | jdddd | jdtd d! | jd"td#d! | S )$NzCaffe2 benchmark.)descriptionz--batch_sizerW   zThe batch size.)r   defaulthelpz--modelzThe model to benchmark.)r   r   z--orderr   zThe order to evaluate.z
--cudnn_wszThe cudnn workspace size.z--iterations
   z(Number of iterations to run the network.z--warmup_iterationsz1Number of warm-up iterations before benchmarking.z--forward_only
store_truez"If set, only run the forward pass.)actionr   z--layer_wise_benchmarkz.If True, run the layer-wise benchmark as well.z--cpuz+If True, run testing on CPU instead of GPU.z--dump_modelz*If True, dump the model prototxts to disk.z
--net_typeZdag)r   r   z--num_workersr-   )argparseArgumentParseradd_argumentintr   )parserr   r   r   GetArgumentParserq  sn    r   __main__Zcaffe2z--caffe2_log_level=0)rQ   rV   ra   r   r$   )__doc__r   r   Zcaffe2.pythonr   r   r   Zcaffe2.python.SparseTransformerpythonrc   r$   rQ   rV   ra   rd   rh   r   r   r   r   r   __name__
parse_argsargsr   r   r   Zcudnn_ws
print_helpZ
GlobalInitZ	model_mapr   r   r   r   <module>   sD   <QHs?KH:

