a
    ==ic>                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZmZ d dl	m
Z
 eeZeej dd Zdd Zd	d
 Zdd Zdd Zdd Zd(ddZd)ddZdd Zd*ddZd+ddZdd Zdd Zd,d d!Zd-d"d#Zd.d$d%Zd&d' Z dS )/    N)defaultdict)coreutils)hardcode_scale_zpc                 C   s,   ddl m} || \}}t|d t||S )z$s -> (s0,s1), (s1,s2), (s2, s3), ...r   )teeN)	itertoolsr   nextzip)iterabler   ab r   q/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/caffe2/quantization/server/utils.pypairwise   s    
r   c                 C   s:   g }t | jD ]&\}}||jv s*||jv r|| q|S N)	enumerateopinputZcontrol_inputappend)netblobuir   r   r   r   	blob_uses   s
    r   c                    s  t t |}tj|d  |D ]\ }|jdvr>q(t|jd }t|dkr\q(|d j }|jdkst|dkr|jd |jd kr|jdkr(t	d
| q(t fdd|jd |jd fD rt	d t d D ]}t	j|  qq(t |}|jd |jd< ||jd  }	t|jd	krX||jd	  }
n tt||jd	  tj}
||jd  }||jd	  }||jd
  }||jd  }d}|jD ]}|jdkr|j}q|d t||  }|||  }|jdkr |dtdg|	jd   n|dtdg|	jd	   }|
| | }|	| }||jd  j|jksnJ t|jd	kr||jd	  j|jksJ n|j|jksJ |||jd < t|jd	kr|||jd	 < n |||jd	 < |j|jd	  jd   |g j d   jd d   }jd d = ||jd  t|jd	krp||jd	  ||jd
  ||jd  ||jd = t|jd	kr||jd	 = ||jd
 = ||jd = j| || d f  S ||d fS )N)ConvConvTransposer   	SpatialBN   #Can't fuse if more than one user {}c                 3   sB   | ]:}t  d  D ]&}|j| jv p6|j| jv V  qqdS r   Nranger   r   output.0r   kr   jr   r   r   	<genexpr>?   s   z fuse_first_bn.<locals>.<genexpr>>Can't fuse because of the following interferring uses or defs:         gh㈵>epsilong      ?r   ))r   r.   )copydeepcopyr   r   typer   r"   lenloggerdebugformatanyr!   r   npZzerosastypefloat32argnamefsqrtreshapetuplendimshaper   extend)r   paramsremoved_tensorsbegin_op_indexconvusesbnr%   
fused_convZconv_weightZ	conv_biasbn_scalebn_biasZbn_running_meanZbn_running_varZepsr:   ABZA_CQnew_opsr   r&   r   fuse_first_bn!   s    



*


 

"8rQ   c                 C   sh   g }d}t | |||\}}}}|d u rRtdd |jD rH|sHtd||||fS |||  } }}qd S )Nr   c                 s   s   | ]}|j d kV  qdS )r   Nr1   r$   r   r   r   r   r(          zfuse_bn.<locals>.<genexpr>z,Model contains SpatialBN op after fusion: %s)rQ   r6   r   	Exception)r   rC   ignore_failurerD   rE   next_netnext_paramsr   r   r   fuse_bn   s    
rY   c                 C   s  t | } t |}tt| jD ]n\\}}\}}|jd |jd krJq"|jdks"|jdks"t| j|d ks"| j|d  jdkrq"|}|}| j|d  }	t |}
|	jd |
jd< ||jd  }||jd  }||jd  }||	jd  }|| ||jd < || | ||jd < | jd | |
g | j|d d   }| jd d = |	|jd  |	|	jd  ||jd = ||	jd = | j
|  qq"| ||fS )Nr   r   ZMulr   Addr*   )r/   r0   r   r   r   r   r"   r1   r2   r   rB   )r   rC   rD   r   currentr'   next_rH   muladdZfused_bnrJ   Z	mul_scalerK   Zadd_biasrP   r   r   r   fuse_first_scale   sD    

 
&r_   c                 C   sJ   g }t | ||\}}}t|jt| jkr4|||fS |||  } }}qd S r   )r_   r2   r   )r   rC   rV   rD   rW   rX   r   r   r   
fuse_scale   s    

r`   c           	         s  t tj|d  |D ]~\ }|jdvr4qt|jd }t|dks|rd|jd |v rdq|d j }|jdkst|dkr|jd |jd kr|jdkrt	d
| qt fdd|jd |jd fD rt	d t d D ]}t	j|  q qt |}|jd |_|jd |jd< jd   |g j d   jd d   }jd d = j|  d f  S d fS )	N)r   r   ZSumr   r   Relur   r   c                 3   sB   | ]:}t  d  D ]&}|j| jv p6|j| jv V  qqdS r   r    r#   r&   r   r   r(   	  s   z"fuse_first_relu.<locals>.<genexpr>r)   )r/   r0   r   r   r1   r   r"   r2   r3   r4   r5   r6   r!   rB   )	r   rE   ignore_op_with_outputrF   rG   relur%   rI   rP   r   r&   r   fuse_first_relu   sF    



*


8rd   c                 C   sL   d}t | ||\}}|d u rBtdd |jD r>|s>td||S |} qd S )Nr   c                 s   s   | ]}|j d kV  qdS )ra   NrR   rS   r   r   r   r(   )  rT   zfuse_relu.<locals>.<genexpr>z'Model contains Relu op after fusion: %s)rd   r6   r   rU   )r   rV   rb   rE   rW   r   r   r   	fuse_relu!  s    
re   c                 C   s>   t tt| D ]\}}|jd |kr|  S qtd|d S )Nr   z(Failed to find last producer of blob, %s)reversedlistr   r"   
ValueError)opsr   r   r   r   r   r   last_producer/  s    
rj   c                 C   st  t | } tt| jD ]T\\}}\}}|jd |jd kr@q|jdks|jdkrVq|rj|jd |v rjqt |}t |}t | jd | }t | j|d d  }	|jd |jd< |jD ]}
t||
}|| }|jd |
ksJ |
d |jd< t |}|jd |jd< |
|jd< |d |d  |g ||d d   }q||g |	 }| jd d = | j	|  qpq| S )Nr   ZConcatra   r   Z	_pre_relu)
r/   r0   r   r   r   r   r"   r1   rj   rB   )r   rb   r   r[   r'   r\   concatrc   Zpre_opsZpost_opsr   r%   ZproducerZnew_relurP   r   r   r   swap_first_concat_relu6  s6    
 





(rl   c                 C   s,   t | |}t|jt| jkr"|S |} q d S r   )rl   r2   r   )r   rb   rW   r   r   r   swap_concat_relu`  s    
rm   c           
      C   s2  t t}| jjD ]2}d|jv rt|jdkr||jd   d7  < qi }|jjD ]$}||jd  dkrP|||jd < qPt t}| jjD ]}d|jv rt|jdkr|jd }|| dkrq|| }||  d7  < |dkrq|d t| }t	
|| }	||	jd< |jj|	g ||jd< | jj| qdS )a  
    In architectures such as FPN (https://arxiv.org/abs/1612.03144), few Conv
    ops share the same weight and bias and are run at different scales of
    the input. Since 'bias_scale = input_scale * weight_scale', sharing the
    same bias blob among multiple Conv ops means that we need different bias
    scale for each of the ops. To achieve this, we just duplicate those bias
    blobs that are used by multiple Conv ops before performing int8 rewrite.
    r   r+   r*   r   r   _vN)r   intZ_netr   r1   r2   r   r"   strr/   r0   rB   Zexternal_inputr   )
r   Zinit_netZ
bias_countr   Zbias_fill_opZbias_versionZbiasversionZnew_biasZfill_opr   r   r   add_version_to_conv_biasi  s0    	


rr   c                 C   s(   | j td|jtd|jg d S )NZY_scaleZY_zero_point)r:   rB   r   MakeArgumentscale
zero_point)r   q_paramr   r   r   add_quantization_param_args_  s
    rw   Fc                 C   sx   | dk rF|dkrF|rFd}d}t t| | t|| }|| } || }t| |}| dk rt|dkrt|rtt|jd}|S )Nr   i      )maxabsr   choose_quantization_paramsQuantizationParamrt   )
tensor_min
tensor_maxpreserve_sparsityZsymmetric_qminZsymmetric_qmaxZ	max_scalerv   r   r   r   r|     s    r|   c                 C   sF   |j dkrdn| }|j dkr$dn| }t|||}t| | |S )Nr   )sizeminrz   r|   rw   )r   tensorr   r~   r   rv   r   r   r   add_quantization_param_args  s
    
r   c                 C   s   t dg |}t|| |}t| |j tj|j }t	dt
|d}|jtd|tj td|jg ||fS )zy
    Create Int8GivenTensorFill op that quantizes the given tensor and outputs
    an Int8Tensor with out_blob_name.
    ZInt8GivenTensorFillr      valuesrA   )r   CreateOperatorr   r7   aroundrt   r8   int32ru   maximumminimumr:   rB   r   rs   Zuint8tobytesrA   )r   out_blob_namer   r   rv   quantized_tensorr   r   r   create_int8_given_tensor_fill  s    r   c                 C   sv   |j |j  }t| | tj}|d tdg |}|j	t
d|t
d|jg t|d}t|| |S )za
    Similar to create_int8_given_tensor_fill, but for bias blobs to be stored
    as int32.
    r.   ZInt8GivenIntTensorFillr   rA   r   )rt   r7   r   r8   r   r>   r   r   r:   rB   r   rs   rA   r   r}   rw   )r   r   Z	x_q_paramZ	w_q_paramrt   r   r   rv   r   r   r   create_int8_bias_tensor_fill  s    


r   )N)N)N)N)F)F)F)!r/   loggingcollectionsr   numpyr7   Zcaffe2.pythonr   r   Zcaffe2.python.fbr   	getLogger__name__r3   setLevelDEBUGr   r   rQ   rY   r_   r`   rd   re   rj   rl   rm   rr   rw   r|   r   r   r   r   r   r   r   <module>   s2   
	 
*
0

*
	'	



