a
    ==ic@                     @   s  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlm  mZ d dlmZ d dlZd dlmZ d dlm  mZ d dlZd dl Z d dl!Z!d dl"m#Z# e$ee$e$f dd	d
Z%ee ej&e	e$e
f dddZ'ej&e	e$e
f ejj(dddZ)d0ejj(ejj(dddZ*ej(ej(dddZ+ej(eej& eej& eej& dddZ,ej-ej.ej/ej0ej1ej2ej3ej4ej5ej6ej4ej7ej8gZ9ej:ej;gZ<ej-ej=ej.ej>ej/dd iZ?eej& e	e$ej(f dddZ@eej& e	e$ej(f e	ej(ej(f dd d!ZAG d"d# d#ZBd1d&d'ZCeBeDd(d)d*ZEG d+d, d,ZFdejGfejj(ee	e$e
f  eejG ejj(d-d.d/ZHdS )2    N)ArgumentTarget)fuse_conv_bn_eval)TypeDictAnyTupleIterableOptionalListcast)	ShapeProp)defaultdict)Enum)targetreturnc                 C   s&   |  dd^ }}|r|d nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r   s/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/torch/fx/experimental/optimization.py_parent_name   s    r   )patternnodemodulesc                 C   s   t |jdkrdS |jd |f}t| |D ]d\}}t|tjsD dS |jdkrT dS t|jtsf dS |j|vrv dS t	||j |ur* dS q*dS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r   r   r   nodesexpected_typeZcurrent_noder   r   r   matches_module_pattern   s    

r*   )r   r   
new_modulec                 C   s<   t | jtsJ t| j\}}||| j< t|| || d S N)r"   r   r&   r   setattr)r   r   r+   parent_namer   r   r   r   replace_node_module,   s    
r/   F)modelr   c                 C   s   t jt jft jt jft jt jfg}|s0t| } t	
| }t| }t|j}|D ]}|jD ]~}t|||r`t|jd jdkrq`||jd j }||j }	|	jsq`t||	}
t|jd ||
 ||jd  || q`qVt	||S )z
    Fuses convolution/BN layers for inference purposes. Will deepcopy your
    model by default, but can modify the model inplace as well.
    r   r   )nnZConv1dZBatchNorm1dConv2dBatchNorm2dZConv3dZBatchNorm3dcopydeepcopyr#   symbolic_tracedictnamed_modulesgraphr(   r*   r   r    usersr   Ztrack_running_statsr   r/   replace_all_uses_with
erase_nodeGraphModule)r0   Zinplacepatternsfx_modelr   	new_graphr   r   convbnZ
fused_convr   r   r   fuse2   s.    







rC   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s8   e Zd Zeeedf eeef ed fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemover.)r   r    kwargsr   c                    s>   t | j| tjr*t|dks"J |d S t |||S d S )Nr   r   )r"   Z
submodulesr1   ZDropoutr   superr   )selfr   r    rD   	__class__r   r   r   V   s    z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r   r   r   r   r&   r   r   __classcell__r   r   rG   r   DropoutRemoverU   s   rM   )r#   r6   torchZTransformerZ	transform)r0   r?   rM   r   r   r   remove_dropoutO   s    
rO   )orig_moduler(   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ] }|| fdd}| |< q.| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S r,   r   )xenvr   r   <lambda>h       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r   r   ).0outputrT   r   r   
<listcomp>j   rW   z$extract_subgraph.<locals>.<listcomp>)r#   Graphplaceholderr   Z	node_copyrY   lintr=   )rP   r(   rQ   rR   r@   inputnew_noder   r   rT   r   extract_subgraph^   s    

r`   c                 C   s
   t | S r,   )	th_mkldnnZMkldnnBatchNorm)a_r   r   r   rV   {   rW   rV   )r(   r   c                 C   s   i }| D ]r}|j dkrt|jts&J ||j }t|tv rtt| |tj}t|tj	s`J t
|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r   )r%   r"   r   r&   r'   
mkldnn_maprN   floatr1   Moduler4   r5   r/   )r(   r   old_modulesr   
cur_moduler+   r   r   r   modules_to_mkldnn   s    

ri   )r(   r   rg   c                 C   sJ   | D ]@}|j dkrt|jts"J ||j }||v rt||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r   N)r%   r"   r   r&   r/   )r(   r   rg   r   rh   r   r   r   reset_modules   s    

rj   c                   @   s   e Zd ZejdddZdS )MklSubgraphfx_graphc                 C   s   || _ g | _g | _g | _d S r,   )rm   r(   start_nodes	end_nodes)rF   rm   r   r   r   __init__   s    zMklSubgraph.__init__N)rI   rJ   rK   r#   r[   rp   r   r   r   r   rk      s   rk   
   r   c                    s(   ddt td fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    Nr9   r   c                    s   | j }d u r,| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r   )rN   ZrandnshaperX   r   r   r   r   rZ      rW   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r    rt   r   r   r   rZ      rW   c                    s<   t D ]
}|   qt }t  D ]
}|  }q$t | S r,   )rangetime)frc   beginout)iterswarmupr   r   	benchmark   s    z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S   s   g | ]}|  qS r   to_denserX   ir   r   r   rZ      rW   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   s   g | ]}|  qS r   )	to_mkldnnr   r   r   r   rZ      rW   r   r   Zsample_inputs	submoduler   r   rV      rW   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S r,   r   r   r   r   r   rV      rW   )rn   rm   Zowning_modulerg   r   	propagater   r   r#   r$   ro   r`   r(   rj   r9   r7   r8   )r9   Zinput_nodesZoutput_argsr|   Zmkl_timeZno_mkl_timeexample_inputsr?   rz   rg   r{   r   r   use_mkl_heuristic   s    z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)rk   bool)r   rz   r{   r   r   r   r   gen_mkl_autotuner   s    	r   rr   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r   r(   )r9   r   r   r   use_mkl_length   s    r   c                   @   sB   e Zd Zdd ZedddZeedddZeed	d
dZdS )	UnionFindc                 C   s   d g| | _ dg| | _d S )Nr   r   size)rF   nr   r   r   rp      s    zUnionFind.__init__)vc                 C   s   || j |< d| j|< d S )Nr   r   )rF   r   r   r   r   make_set   s    
zUnionFind.make_set)r   r   c                 C   sB   | j | }||kr|S |d us"J | || j |< tt| j | S r,   )r   findr   int)rF   r   parr   r   r   r      s    
zUnionFind.find)rb   bc                 C   sf   |  ||  | }}||kr"|S | j| | j| k r@|| }}|| j|< | j|  | j| 7  < d S r,   )r   r   r   )rF   rb   r   r   r   r   join   s    

zUnionFind.joinN)rI   rJ   rK   rp   r   r   r   r   r   r   r   r   r      s   r   )r0   pass_configtracerr   c              	      sV  dddt id}|du ri }|| |d r6t| } |d rFt| } |d du rV| S t|d tsltd	d|d vrtd
|d d }| }|t	|  t
|j }t|  }G dd dt}t jD ]}	|j}
|	jdkrV||	j }t|tv r|j}
t| d}|dur|jtjks:J d|jtdksJ dn2|	jdkr|	jtv rv|j}
n|	jtv r|j}
|
|jkr|
|jkrtdd |	j D sqֈ !|	& t
"|	j  fdd}W d   n1 s0    Y  t#t$t
j%j& ||	_  '|	2  (dd|	f}|	)| |	f|_ W d   q1 sP0    Y  qt*t j|}| _+ jD ]}	|	jdkrx|	jdkrx|	j d }t|	j,}|D ]2}|jdkr|jdkr|)|  -| qt.|	j,dkrx -|	 qxt. j}t/|fddt0 jD ]\}}	|	jdkrX|	jdkrX||	_12| n|	jdkr|	jdkr|	j d dusJ |	j d |	_3ntfdd|	j4D }t.|dkrq&tdd |D rJ t5|}|d |	_6|dd D ]}7|d | qq&t8 fd d} jD ]r}	t9|	d!rP|:|	j6 j;|	 t9|	d"rt|:|	j1 j<;|	 t9|	d#r(|:|	j3 j=;|	 q(|> D ]P}||s|j<|j= D ]$}	|	j d }|	)|  -|	 qt?|j|| qd} jD ]&}	|	jdks|	jdkr |d7 }q t@AtBCd$|   D  t
|  }|S )%a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicity run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuserO   mkldnn_layout_optimizeNr   rO   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr   r      N)rI   rJ   rK   NOYESUNKNOWNr   r   r   r   
MklSupport  s   r   r   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc                 S   s   g | ]}|j d kqS r}   )r   )rX   argr   r   r   rZ   3  rW   z*optimize_for_inference.<locals>.<listcomp>c                    s     d| fS )Nr   )call_methodr   rl   r   r   rV   6  rW   z(optimize_for_inference.<locals>.<lambda>r   r~   r   r   c                    s0   t | dr | jS t | dr, | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr   r   	get_colorS  s
    

z)optimize_for_inference.<locals>.get_colorc                    s,   g | ]$}t |tjr |d ur |qS r,   )r"   r#   r$   r   )r   r   r   rZ   n  rW   c                 s   s   | ]}|d u V  qd S r,   r   r   r   r   r   	<genexpr>r  rW   z)optimize_for_inference.<locals>.<genexpr>r   c                      s   t  S r,   )rk   r   rl   r   r   rV   y  rW   r   r   	end_colorzmkldnn conversions: )Er   updaterC   rO   r"   r7   RuntimeErrortracer4   r5   r#   r=   rootr8   r   listr(   r   r%   r   r'   mkldnn_supportedr   next
parametersZdtyperN   re   Zdevicemkldnn_supported_unknownr   anyr    Zinserting_beforeZmap_argr   r   r   r   Zinserting_afterZcreate_noder;   ri   rg   r:   r<   r   r   	enumerater   r   r   Zall_input_nodessortedr   r   r   r   r   appendrn   ro   valuesrj   logging	getLoggerrI   infor]   )r0   r   r   Zdefault_pass_configr   Z
cur_tracerr?   r   r   r   Zsupports_mkldnnrh   Zsample_parameterZmkldnn_argsZdense_xrg   Zprv_noder:   userZ	num_nodesZcur_idxZ
cur_colorsZother_colorZmkldnn_graphsr9   ZprvZmkldnn_conversionsresultr   )rm   r   r   r   optimize_for_inference   s    
	


6
*










r   )F)rq   r   )IZtorch.fxr#   Ztorch.fx.noder   r   Ztorch.nn.utils.fusionr   typingr   r   r   r   r	   r
   r   r   rN   Ztorch.nnr1   Ztorch.nn.functionalZ
functionalFZtorch.fx.passes.shape_propr   r4   collectionsr   Ztorch.utils.mkldnnutilsZmkldnnra   operatorrv   r   enumr   r&   r   r$   r*   rf   r/   rC   rO   r`   r2   ZLinearr3   ZReLUZ	MaxPool2dZ	AvgPool2dZAdaptiveAvgPool2dZreluZ	transposeZsigmoidZ
avg_pool2dZadaptive_avg_pool2dr   addmulr   ZMkldnnConv2dZMkldnnLinearrd   ri   rj   rk   r   r   r   r   ZTracerr   r   r   r   r   <module>   sV   (	  (	
 .
&