a
    OSic                     @   s(  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZmZmZ d dlZd dlZG dd deZdd	 Zd
d Zdd Zdd ZG dd deZG dd deZedg dZG dd deZG dd deZG dd deZG dd dZdd Zd Zd!d" Z d*d$d%Z!d+d(d)Z"dS ),    N)
DeviceType)defaultdict
namedtuple)
attrgetter)DictListTupleOptionalc                       s   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	e
dd Zd"ddZdd Zdd ZeedddZd#ddZd d! Z  ZS )$	EventListz&A list of Events (for pretty printing)c                    sV   | dd}| dd}| dd}tt| j|i | || _|| _d| _|| _d S )Nuse_cudaTprofile_memoryF
with_flops)popsuperr
   __init__	_use_cuda_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__ X/var/www/html/django/DPS/env/lib/python3.9/site-packages/torch/autograd/profiler_util.pyr      s    zEventList.__init__c                 C   s"   |    |   |   d| _d S )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r   r   r   r   _build_tree   s    zEventList._build_treec                 C   s   |   S N)tabler   r   r   r   __str__    s    zEventList.__str__c                    s   t   tt| D ]}| | jd ur| | jj| | jkrt| | jjdkr| | j| | j_| | j| | j_| | jD ]}| | j|_q| | qt dkrq҇ fddt| D }| 	  | 
| q d S )N   r   c                    s   g | ]\}}| vr|qS r   r   ).0indev	to_deleter   r   
<listcomp>1       z/EventList._remove_dup_nodes.<locals>.<listcomp>)setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r   idxchZnew_evtsr   r(   r   r   #   s"    zEventList._remove_dup_nodesc                 C   s   dd | D }t |tdd}tj|dd d}|D ]\}}t |dd d}g }d}|D ]~}	t|dkr|d	 }
|	jj|
jjks|	jj|
jjkr|  q\|
	|	 |	j
d
u sJ d|	j|	|
 qq\||	 qXq4d
S )a4  Populates child events into each underlying FunctionEvent object.
        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        c                 S   s"   g | ]}|j s|jtjkr|qS r   )is_asyncdevice_typer   CPUr%   evtr   r   r   r*   E   r+   z4EventList._populate_cpu_children.<locals>.<listcomp>thread)keyc                 S   s   | j | jfS r!   )r>   node_ideventr   r   r   <lambda>N   r+   z2EventList._populate_cpu_children.<locals>.<lambda>c                 S   s   | j j| j j gS r!   )
time_rangestartendrA   r   r   r   rC   `   r+   r   Nz*There is already a CPU parent event for {})sortedr   	itertoolsgroupbyr.   rD   rE   rF   r   append_cpu_childr/   formatr?   set_cpu_parentappend)r   Zsync_eventseventsthreads	thread_idZthread_eventsZthread_events_Zcurrent_eventsZcur_endrB   parentr   r   r   r   5   s@    


z EventList._populate_cpu_childrenc                    s    fdd i }| D ]8} |d u r|j d ur|j|jf}||vr|j ||< q| D ]H} |}|d urR|jd ustJ |j|jf}||v r|| |_ qRg |_ qRd S )Nc                    s(   | d u rd S | j dkr| S  | jS d S Nr$   )scoper/   r=   	bw_parentr   r   rW   x   s
    
z6EventList._set_backward_stacktraces.<locals>.bw_parent)stacksequence_nrr>   
fwd_thread)r   Z
fwd_stacksr=   tpr   rV   r   r   w   s    z#EventList._set_backward_stacktracesc                 C   s   t dd | D S )Nc                 S   s   g | ]
}|j qS r   self_cpu_time_totalr%   rB   r   r   r   r*      r+   z1EventList.self_cpu_time_total.<locals>.<listcomp>)sumr   r   r   r   r^      s    zEventList.self_cpu_time_totalNd   K   Fc              
   C   s   t | ||||| j| j|dS )a  Prints an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``,
                ``cuda_time_total``, ``cpu_memory_usage``, ``cuda_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )sort_by	row_limitmax_src_column_widthheaderr   r   top_level_events_only)_build_tabler   r   )r   rc   rd   re   rf   rg   r   r   r   r"      s    zEventList.tablec                 C   s  ddl }t|d}g }d}|d | D ]}|jdu r:q*|d|j|jj|j |js^|jnd|j	 d|j df  |j
D ]*}|d	|j|jj|j|f  |d
7 }qq*t| dkr|| d |j |  |d W d   n1 s0    Y  dS )zExports an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Nw[z_{"name": "%s", "ph": "X", "ts": %s, "dur": %s, "tid": %s, "pid": "CPU functions", "args": {}}, z
" node_id:z, thread_id:z "zt{"name": "%s", "ph": "s", "ts": %s, "tid": %s, "pid": "CPU functions", "id": %s, "cat": "cpu_to_cuda", "args": {}}, r$      ])osopenwrite
trace_namerD   rE   
elapsed_us	is_remoter>   r@   r2   r.   seektellSEEK_SETtruncate)r   pathrm   fZchrome_eventsnext_idr=   kr   r   r   export_chrome_trace   s<    




zEventList.export_chrome_tracec                 C   s   ddgS )Nr^   self_cuda_time_totalr   r   r   r   r   supported_export_stacks_metrics   s    z)EventList.supported_export_stacks_metrics)rw   metricc           	      C   s   ||   vr tdt|    tdd}t|d}| D ]}|jr<t|jdkr<t||}t|dkr<d}t	|jD ]}||
|7 }|d7 }qx|d d d	 tt| }||d
  q<W d    n1 s0    Y  d S )Nzmetric should be one of: z ;	
Z____ri   r    ;rG    
)r}   
ValueErrorstr	maketransrn   rX   r.   getattrintreversed	translatero   )	r   rw   r~   Ztranslate_tablerx   r=   Zmetric_valueZ	stack_strentryr   r   r   export_stacks   s    

zEventList.export_stacksr   c                 C   s   | j s
J tt}ttdf ddd}| D ]}||||| | q,t| | j| j	| j
d}|D ]}|jd| |_|sfd|_qf|S )aH  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

        Returns:
            An EventList containing FunctionEventAvg objects.
        .)returnc                 S   sZ   t | jt | jt | jt | jg}|r8|t | j |dkrR|| jd | 7 }t|S Nr   )	r   r?   r@   r:   	is_legacyrN   input_shapesrX   tuple)rB   group_by_input_shapesgroup_by_stack_nr?   r   r   r   get_key  s    $z'EventList.key_averages.<locals>.get_keyr   r   r   Nr   )r   r   FunctionEventAvgr   r   r3   r
   valuesr   r   r   rX   r   )r   r   r   statsr   r=   Zavg_listr   r   r   key_averages   s     
zEventList.key_averagesc                 C   s(   t  }| D ]}||7 }d|_q
d|_|S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NZTotal)r   r?   )r   Z
total_statr=   r   r   r   total_average"  s    zEventList.total_average)Nra   rb   NF)Fr   )__name__
__module____qualname____doc__r   r    r#   r   r   r   propertyr^   r"   r{   r}   r   r   r   r   __classcell__r   r   r   r   r
      s   
B

7
'r
   c                 C   s>   d}d}| |krd | | S | |kr4d | | S d | S )+Defines how to format time in FunctionEventg    .Ag     @@z{:.3f}sz{:.3f}msz{:.3f}usrL   )time_usUS_IN_SECONDUS_IN_MSr   r   r   _format_time0  s    r   c                 C   s4   |dkr"| dksJ d | dS d | d | S )r   r   z Expected time_us == 0 but got {}NaNz{:.2f}%g      Y@r   )r   Ztotal_time_usr   r   r   _format_time_share:  s    r   c                 C   s~   d}d| }d| }t | |kr2d| d | S t | |krPd| d | S t | |krnd| d | S t| d S dS )z&Returns a formatted memory size stringi   z	{:.2f} Gb      ?z	{:.2f} Mbz	{:.2f} Kbz bN)absrL   r   )nbytesKBMBGBr   r   r   _format_memoryA  s    r   c                    s   t  fddS )Nc                    s   t t|  S r!   )r   r   r   r0   r   r   rC   P  r+   z!_attr_formatter.<locals>.<lambda>)r   r   r   r   r   _attr_formatterO  s    r   c                   @   sX   e Zd ZdZedZedZedZedZedZ	edZ
edd	 Zed
d ZdS )FormattedTimesMixinz{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_time	cuda_timecpu_time_totalcuda_time_totalr^   r|   c                 C   s   | j dkrdS d| j | j  S Nr   g        r   )countr   r   r   r   r   r   _  s    zFormattedTimesMixin.cpu_timec                 C   s   | j dkrdS d| j | j  S r   )r   r   r   r   r   r   r   c  s    zFormattedTimesMixin.cuda_timeN)r   r   r   r   r   cpu_time_strcuda_time_strcpu_time_total_strcuda_time_total_strself_cpu_time_total_strself_cuda_time_total_strr   r   r   r   r   r   r   r   S  s   
r   c                   @   s   e Zd Zdd Zdd ZdS )Intervalc                 C   s   || _ || _d S r!   )rE   rF   )r   rE   rF   r   r   r   r   i  s    zInterval.__init__c                 C   s   | j | j S r!   )rF   rE   r   r   r   r   rq   m  s    zInterval.elapsed_usN)r   r   r   r   rq   r   r   r   r   r   h  s   r   Kernel)r0   devicedurationc                   @   s   e Zd ZdZddddddddddejddddfddZdd	 Zd
d Zdd Z	e
dd Ze
dd Ze
dd Ze
dd Ze
dd Ze
dd Ze
dd Zdd ZdS )FunctionEventz.Profiling information about a single function.Nr   FrG   c                 C   s   || _ || _|| _|| _t||| _|| _|| _g | _d| _	g | _
d | _|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _|| _d S rS   )idr@   r0   rp   r   rD   r>   rZ   r2   r   r1   r/   r   rX   rT   cpu_memory_usagecuda_memory_usager9   rr   rY   r:   device_indexr   flops)r   r   r0   r>   start_usend_usrZ   r   rX   rT   r   r   r9   rr   rY   r@   r:   r   r   r   rp   r   r   r   r   v  s.    zFunctionEvent.__init__c                 C   s(   | j tjksJ | jt||| d S r!   )r:   r   r;   r2   rN   r   )r   r0   r   r   r   r   r   append_kernel  s    zFunctionEvent.append_kernelc                 C   s>   | j tjksJ t|tsJ |j tjks.J | j| dS )zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)r:   r   r;   
isinstancer   r1   rN   )r   childr   r   r   rK     s    zFunctionEvent.append_cpu_childc                 C   s8   | j tjksJ t|tsJ |j tjks.J || _dS )a#  Set the immediate CPU parent of type FunctionEvent

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)r:   r   r;   r   r   r/   )r   rR   r   r   r   rM     s    zFunctionEvent.set_cpu_parentc                 C   s0   | j s| jtjkrdS | jtdd | jD  S )Nr   c                 S   s   g | ]
}|j qS r   )r   r%   r   r   r   r   r*     r+   z7FunctionEvent.self_cpu_memory_usage.<locals>.<listcomp>)r9   r:   r   r;   r   r`   r1   r   r   r   r   self_cpu_memory_usage  s
    z#FunctionEvent.self_cpu_memory_usagec                 C   s0   | j s| jtjkrdS | jtdd | jD  S )Nr   c                 S   s   g | ]
}|j qS r   )r   r   r   r   r   r*     r+   z8FunctionEvent.self_cuda_memory_usage.<locals>.<listcomp>)r9   r:   r   r;   r   r`   r1   r   r   r   r   self_cuda_memory_usage  s
    z$FunctionEvent.self_cuda_memory_usagec                 C   s0   | j s| jtjkrdS | jtdd | jD  S )Nr   c                 S   s   g | ]
}|j qS r   )r   r   r   r   r   r*     r+   z5FunctionEvent.self_cpu_time_total.<locals>.<listcomp>)r9   r:   r   r;   r   r`   r1   r   r   r   r   r^     s
    z!FunctionEvent.self_cpu_time_totalc                 C   sx   | j r
dS | jtjkrZ| jsDtdd | jD tdd | jD  S tdd | jD S n| jtjksjJ | j	
 S d S )Nr   c                 s   s   | ]}|j V  qd S r!   r   r%   Zkinfor   r   r   	<genexpr>  r+   z0FunctionEvent.cuda_time_total.<locals>.<genexpr>c                 s   s   | ]}|j V  qd S r!   r   )r%   r8   r   r   r   r     r+   c                 s   s   | ]}|j V  qd S r!   r   r   r   r   r   r     r+   )r9   r:   r   r;   r   r`   r2   r1   CUDArD   rq   r   r   r   r   r     s    zFunctionEvent.cuda_time_totalc                 C   sJ   | j r
dS | jtjkr0| jtdd | jD  S | jtjks@J | jS d S )Nr   c                 S   s   g | ]
}|j qS r   r   r   r   r   r   r*     r+   z6FunctionEvent.self_cuda_time_total.<locals>.<listcomp>)r9   r:   r   r;   r   r`   r1   r   r   r   r   r   r|     s    z"FunctionEvent.self_cuda_time_totalc                 C   s   | j tjkr| j S dS d S r   )r:   r   r;   rD   rq   r   r   r   r   r     s    
zFunctionEvent.cpu_time_totalc                 C   s   | j S r!   r   r   r   r   r   r?     s    zFunctionEvent.keyc                 C   sf   d | j| j| j| j| j| jj| jjt	dd | j
D | j| j| jt	| j| j| j| j| j| j| jS )Nz<FunctionEvent id={} name={} device_type={} node_id={} cpu_time={} start_us={} end_us={} cpu_children={} cuda_time={} name={} thread={} input_shapes={} cpu_memory_usage={} cuda_memory_usage={} is_async={} is_remote={} seq_nr={} is_legacy={}>c                 S   s   g | ]
}|j qS r   )r   r   r   r   r   r*     r+   z*FunctionEvent.__repr__.<locals>.<listcomp>)rL   r   r0   r:   r@   r   rD   rE   rF   r   r1   r   r>   r   r   r   r9   rr   rY   r   r   r   r   r   __repr__  s*    zFunctionEvent.__repr__)r   r   r   r   r   r;   r   r   rK   rM   r   r   r   r^   r   r|   r   r?   r   r   r   r   r   r   t  s0   









r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r   z:Used to average stats over multiple FunctionEvent objects.c                 C   s   d | _ d| _d| _d| _d| _d| _d| _d| _d| _d | _	d | _
d | _d| _d| _d| _d| _d | _d | _tj| _d| _d| _d S )Nr   F)r?   r   r@   r9   rr   r   r   r^   r|   r   rX   rT   r   r   r   r   r1   r/   r   r;   r:   r   r   r   r   r   r   r     s*    zFunctionEventAvg.__init__c                 C   sJ  | j d u rb|j | _ |j| _|j| _|j| _|j| _|j| _|j| _|j| _|j| _|j	| _	|j
| _
t|ttfstJ |j | j ksJ |  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _| jd u r*|j| _n|jd urF|  j|j7  _| S r!   )r?   r@   r9   rr   r/   r1   r   rX   rT   r:   r   r   r   r   r   r   r^   r|   r   r   r   r   r   r   r   otherr   r   r   r3   "  s8    

zFunctionEventAvg.addc                 C   s
   |  |S r!   )r3   r   r   r   r   __iadd__D  s    zFunctionEventAvg.__iadd__c              
   C   s,   d | j| j| j| j| jt| j| j| j	S )Nz<FunctionEventAvg key={} self_cpu_time={} cpu_time={}  self_cuda_time={} cuda_time={} input_shapes={} cpu_memory_usage={} cuda_memory_usage={}>)
rL   r?   r   r   r   r   r   r   r   r   r   r   r   r   r   G  s    zFunctionEventAvg.__repr__N)r   r   r   r   r   r3   r   r   r   r   r   r   r   	  s
   "r   c                   @   s   e Zd Zdd ZdS )StringTablec                 C   s(   t |dkrtj|n|| |< | | S rS   )r.   torch_C	_demangle)r   r?   r   r   r   __missing__Y  s     zStringTable.__missing__N)r   r   r   r   r   r   r   r   r   X  s   r   c                   @   s    e Zd ZdZdd Zdd ZdS )MemRecordsAccz<Acceleration structure for accessing mem_records in intervalc                 C   sH   || _ g | _g | _t|dkrDtdd t|D }t| \| _| _d S )Nr   c                 S   s    g | ]\}}|d    |fqS r   )r   )r%   irr   r   r   r*   i  r+   z*MemRecordsAcc.__init__.<locals>.<listcomp>)_mem_records_start_uses_indicesr.   rH   r4   zip)r   mem_recordstmpr   r   r   r   d  s    zMemRecordsAcc.__init__c                 c   sB   t | j|}t | j|}t||D ]}| j| j|  V  q&d S r!   )bisectbisect_leftr   bisect_rightr-   r   r   )r   r   r   	start_idxend_idxr   r   r   r   in_intervall  s    zMemRecordsAcc.in_intervalN)r   r   r   r   r   r   r   r   r   r   r   a  s   r   c                    s   g d}t  fdd|D S )N))autograd/__init___make_grads)r   backward)ztorch/tensorr   )_internal/common_utilsZprof_callable)r   Zprof_func_call)r   Zprof_meth_callc                    s&   g | ]}|d   v o|d  v  qS )r   r$   r   )r%   rx   r   r   r   r*   |  r+   z'_filter_stack_entry.<locals>.<listcomp>)all)r   Zfiltered_entriesr   r   r   _filter_stack_entrys  s    r   z[memory]c                 C   s   t ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAME)r0   Zfiltered_out_namesr   r   r   _filter_name  s    	r   Fc                 C   s$   t  }||  } |r | dr d} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r0   with_wildcardZstring_tabler   r   r   _rewrite_name  s    
r   ra   rb   c           +         s  t | dkrdS tdd | D }tdd | D }	tdd | D }
durptt| fdd	d
d|||d} d}tdd | D d }t||}d}tdd | D d }t||}d}|}d}g }| D ](}|jdurt |jdkr||j qt |dk}|r(tdd |D d }t||}g d}|rD|g d |rl|ddg |	rl|ddg |d tdd | D }|r|d d dgdg  gd}d8 fd d!	}d"d# }|| |d$d D ]}|| q|
r|d% || |r(|d& ||d'd( |rg }| D ]}|j	dkr6||j	 q6t |dkr|t|\}}|d)
| || nd*}d }d }d } d}g fd+d,}!td-d | D }"d}#| D ]@}|jtjkr|jr|#|j7 }#n|jtjkr|#|j7 }#q|dur8|!d.|   |!| |rR|!d.|   |!d/ |!| |!|j
|  |!| d0d1 }$d}%| D ]}|%|kr q|r|jdurqn|%d$7 }%|j}&t |&|d2 kr|&d|d2  d3 }&|&t|j|"|j|jst|j|"nd|j|jg}'|r:|'|jt|j|#|j|jg |rz|'t|jt|j g |	rz|'t|j!t|j"g |'|j# |r|'|j$ |
r|'t%|j&d|  |r|j	dkr|'d4 n|'d5
|j	|  |rd}(t |jdkr|$|jd |}(|'|( |!|j
|'  |rdgt |d$  })|jd$| D ] }*|!|j
|)|$|*|g   qR|)d |!|j
|)  q|!| |!d6
t'|" |r|!d7
t'|# d(S )9zVPrints a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c                 S   s   g | ]}|j d kqS r   )r|   r_   r   r   r   r*     r+   z _build_table.<locals>.<listcomp>c                 S   s   g | ]}|j d kqS r   )r   r_   r   r   r   r*     r+   c                 S   s$   g | ]}|j d uot|j dkqS r   )r   r.   r_   r   r   r   r*     r+   Nc                    s
   t |  S r!   )r   rU   )rc   r   r   rC     r+   z_build_table.<locals>.<lambda>T)r?   reverser   7   c                 S   s   g | ]}t |jqS r   )r.   r?   r<   r   r   r   r*     r+      P   c                 S   s   g | ]}t t|jqS r   )r.   r   r   r<   r   r   r   r*     r+      c                 S   s   g | ]}t d d |D qS )c                 S   s   g | ]}t |qS r   r.   )r%   r   r   r   r   r*     r+   z+_build_table.<locals>.<listcomp>.<listcomp>)max)r%   rX   r   r   r   r*     r+   )Namez
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avg)z	Self CUDAzSelf CUDA %z
CUDA totalzCUDA time avgzCPU MemzSelf CPU MemzCUDA MemzSelf CUDA Memz
# of Callsc                 S   s   g | ]}|j d kqS )rG   )r@   r<   r   r   r   r*     r+   zNode IDrk      >c                    s\   d  d| t |  d d   7  < d  d|  d   7  < d  |   7  < d S )Nr   z{: }r   -)r   )paddingtext_dir)SPACING_SIZEheader_sep_lstline_length_lstrow_format_lstr   r   
add_column  s    (z _build_table.<locals>.add_columnc                 S   sr   g d}| dksJ t dtt| d tt|d }|dkrN|t|k sRJ tdt|d |t| fS )N)ZFLOPsZKFLOPsZMFLOPsZGFLOPsZTFLOPsZPFLOPsr      r$   
   g      )	r   minmathlog10floatr.   powfloorr   )r   Zflop_headersZ	log_flopsr   r   r   auto_scale_flops  s
    &z&_build_table.<locals>.auto_scale_flopsr$   zInput ShapeszSource Location<)r  zTotal {}Fc                    s     |    d d S )Nr   )rN   )s)resultr   r   rN   !  s    
z_build_table.<locals>.appendc                 S   s   g | ]
}|j qS r   r]   r_   r   r   r   r*   %  r+   =z1This report only display top-level ops statisticsc                 S   sD   t | |kr@t | | }| |d  } t | dkr@d| dd   } | S )Nr  ...r   )rw   src_column_widthoffsetr   r   r   	trim_path<  s    z_build_table.<locals>.trim_pathr  r  z--z{0:8.3f}zSelf CPU time total: {}zSelf CUDA time total: {})r  ))r.   anyr
   rH   r   r  rX   rN   r6   r   rL   r`   r:   r   r;   r   r|   r   r/   r?   r   r^   r   r9   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   join)+rO   rc   rf   rd   re   r   r   rg   Zhas_cuda_timeZhas_cuda_memZhas_input_shapesZMAX_NAME_COLUMN_WIDTHZname_column_widthZMAX_SHAPES_COLUMN_WIDTHZshapes_column_widthZDEFAULT_COLUMN_WIDTHZflops_column_widthr  Zstacksr=   Z	has_stackheadersZappend_node_idZMAX_STACK_ENTRYr  r  _Z	raw_flopsZflops_scaleZflops_headerZ
row_formatZ
header_sepZline_lengthrN   Zsum_self_cpu_time_totalZsum_self_cuda_time_totalr  Zevent_limitr0   Z
row_valuesZ	src_fieldZempty_headersr   r   )r  r	  r
  r  r  rc   r   rh     sB   















rh   )F)NNra   rb   FFF)#rI   r   torch.autogradr   collectionsr   r   operatorr   typingr   r   r   r	   r   r  listr
   r   r   r   r   objectr   r   r   r   r   r   r   r   r   r   r   rh   r   r   r   r   <module>   sD     $
	 O	

       