a
    .=icG                     @   s  d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZ d dlm  mZ d dlmZ d dlmZmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z# ed	d
G dd dZ$ed	d
G dd dZ%ee&dddZ'eeeeeee$f f e(f dddZ)ed	d
G dd dZ*e*d dej+de*ddej,dgZ-eeeee$f ee e(dddZ.e#ee(dddZ/ed	d
G dd  d Z0e#ee(dd!d"Z1eeeee%f ee e(dd#d$Z2e#ee(dd%d&Z3dS )'    )	dataclass)UnionOptionalListTupleDictSequence)	translate)NativeFunctionsGroup
ScalarTypeUfuncKeyDispatchKeyBaseTypeBaseTyArgumentN)UfunctorBindings)
StructuredImplSignaturescalar_topmath_tBindingCType	BaseCTypeExpr
NamedCTypeScalarTypeToCppMappingVectorizedCType)with_native_functionT)frozenc                   @   s   e Zd ZU eed< ee ed< eed< edddZ	e
e dddZedd	d
ZedddZedddZedddZdS )UfunctorSignaturegscalar_tensor_idxnamereturnc                 C   s   t j| j| jtdS )N)r    r   )ufuncZufunctor_argumentsr   r    r   self r'   d/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/torchgen/dest/ufunc.py	arguments@   s    
zUfunctorSignature.argumentsc                 C   s   dd |   jD S )Nc                 S   s   g | ]}| |j d qS )_)renamer!   .0br'   r'   r(   
<listcomp>G       z,UfunctorSignature.fields.<locals>.<listcomp>)r)   ctorr%   r'   r'   r(   fieldsE   s    zUfunctorSignature.fieldsc                 C   s   t tS N)r   r   r%   r'   r'   r(   returns_typeI   s    zUfunctorSignature.returns_typec                 C   s   d dd |  D S )N
c                 s   s"   | ]}|j  d |j dV  qdS ) ;N)typer!   )r-   fr'   r'   r(   	<genexpr>O   r0   z0UfunctorSignature.decl_fields.<locals>.<genexpr>)joinr2   r%   r'   r'   r(   decl_fieldsN   s    zUfunctorSignature.decl_fieldsc                 C   sL   d dd |  jD }d dd |  jD }| j d| d| dS )N, c                 s   s   | ]}|  V  qd S r3   declr-   ar'   r'   r(   r:   R   r0   z5UfunctorSignature.inline_defn_ctor.<locals>.<genexpr>c                 s   s"   | ]}|j  d |j  dV  qdS )z_()Nr!   r@   r'   r'   r(   r:   U   r0   (z) : z {})r;   r)   r1   r!   )r&   args_strZinit_strr'   r'   r(   inline_defn_ctorQ   s    z"UfunctorSignature.inline_defn_ctorc                 C   s2   d dd |  jD }|    d| dS )Nr=   c                 s   s   | ]}|  V  qd S r3   r>   r@   r'   r'   r(   r:   Y   r0   z/UfunctorSignature.decl_apply.<locals>.<genexpr>z operator()(z) const)r;   r)   applyr4   Zcpp_type)r&   rE   r'   r'   r(   
decl_applyX   s    zUfunctorSignature.decl_applyN)__name__
__module____qualname__r
   __annotations__r   intstrr   r)   r   r   r2   r   r4   r<   rF   rH   r'   r'   r'   r(   r   :   s   
r   c                   @   sT   e Zd ZU eed< eed< eed< ee dddZ	e
eeef  eddd	Zd
S )UfuncSignaturer   r!   	compute_tr"   c                 C   s   t j| j| jdS )N)rP   )r$   Zufunc_argumentsr   rP   r%   r'   r'   r(   r)   c   s    zUfuncSignature.argumentsctxr#   c              	   C   s,   | j  dddd t||  D  dS )NrD   r=   c                 s   s   | ]}|j V  qd S r3   exprr@   r'   r'   r(   r:   g   r0   z&UfuncSignature.call.<locals>.<genexpr>rB   r!   r;   r	   r)   r&   rR   r'   r'   r(   callf   s    zUfuncSignature.callN)rI   rJ   rK   r
   rL   rN   r   r   r   r)   r   r   r   rW   r'   r'   r'   r(   rO   ]   s
   
rO   )r   r#   c                 C   s"   t dd | jjjjD }|dkS )Nc                 s   s   | ]}|j  rd V  qdS )   N)r8   is_tensor_liker@   r'   r'   r(   r:   }   s   z<eligible_for_binary_scalar_specialization.<locals>.<genexpr>   )sum
functionalfuncr)   flat_non_out)r   Znum_tensorsr'   r'   r(   )eligible_for_binary_scalar_specialization|   s    
r_   c                 C   s  i }g }| j j}tjdtjdtjd i}t| r@tjtjtjg}n2tjg}tjtjfD ]}||vsTJ d| dqT|D ]X}||v rt| || || jd}|| j	D ]}||
|i |< qqvd }	t }
tjtjfD ]H}||vrq|	d u r|| j}	n|	|| jksJ d|
|| j	O }
q|	d us,J | d|	 }t| || |d}|
D ]}||
|i |< qPt| d|	 ttd	}| | j }|d
|j d|  d|  d|  d|| d qv|d|fS )NrX   r   zcannot use z on non-binary function)r    r!   z0ScalarOnly and Generic must have same ufunc namer*   ufunc::r!   rP   z%
template <typename scalar_t>
struct z3 {
  using opmath_t = at::opmath_type<scalar_t>;
  z
  z
  __device__ z {
    return z	;
  }
};
r5   )outufunc_inner_loopr   CUDAFunctorOnSelfCUDAFunctorOnOtherCUDAFunctorr_   r   r!   supported_dtypes
setdefaultset
ScalarOnlyGenericrO   r   r   r2   r)   rG   appendr<   rF   rH   rW   r;   )r   ufunctor_sigs	ufunctorsloopsZscalar_tensor_idx_lookupkeyskufunctor_sigdtypeZ
ufunc_namerg   lkr!   Z	ufunc_sigZ	apply_ctxr'   r'   r(   compute_ufunc_cuda_functors   sx    
	
ru   c                   @   s&   e Zd ZU eed< eed< eed< dS ) BinaryScalarSpecializationConfig
scalar_idxctor_tensor	ufunc_keyN)rI   rJ   rK   rM   rL   rN   r   r'   r'   r'   r(   rv      s   
rv   r&   )rw   rx   ry   rX   other)r   rs   inner_loops
parent_ctxr#   c           
      C   s   d}|d7 }t D ]}|j|vr q||j }|jd }t|}|td| dt|jtt	d d
dd	 t|| jD }	|d
| d|j d|	 d| d	7 }q|tj }d
dd	 t|| jD }	|d|j d|	 d7 }|S )Nz+using opmath_t = at::opmath_type<scalar_t>;zif (false) {}
rX   ziter.scalar_value<opmath_t>(rB   )rT   r8   r=   c                 s   s   | ]}|j V  qd S r3   rS   r@   r'   r'   r(   r:     s   z0compute_ufunc_cuda_dtype_body.<locals>.<genexpr>zelse if (iter.is_cpu_scalar(z)) {
  z<scalar_t> ufunctor(z);
  iter.remove_operand(z");
  gpu_kernel(iter, ufunctor);
}c                 s   s   | ]}|j V  qd S r3   rS   r@   r'   r'   r(   r:     s   z
else {
  gpu_kernel(iter, z<scalar_t>(z
));
}
    )!BinaryScalarSpecializationConfigsry   rw   listrl   r   r   rx   r   r   r;   r	   r)   r1   r!   r   rf   )
r   rs   r{   r|   bodyconfigrr   rw   rR   Zufunctor_ctor_exprs_strr'   r'   r(   compute_ufunc_cuda_dtype_body   sH    







r   c           	      C   s   t | \}}t| t| tj}g }| D ]@\}}|d|j d| dt	|  dt
| |||  d	 q,d|}t| }d| d|  d|  d	|  d
|j d| d|j d|j d|j d|  d||  dS )N
AT_PRIVATE_CASE_TYPE("", at::ScalarType::r=   ,
  [&]() {
    
  }
)
r5   z

;
;

N {
  at::ScalarType st = iter.common_dtype();
  RECORD_KERNEL_FUNCTION_DTYPE("", st);
  switch (st) {
    (
    default:
      TORCH_CHECK(false, "zI", " not implemented for '", toString(st), "'");
  }
}
REGISTER_DISPATCH(, &z);

 {
  ;
}
)ru   r   r$   kernel_namer   CUDAitemsrl   r!   r   r   r)   r;   StubSignature	type_defndispatch_declkernel_defndefndirect_call)	r   rm   rn   sigdtype_casesrs   Zinner_ufunctor_sigsdtype_cases_strstub_sigr'   r'   r(   compute_ufunc_cuda  sP    


r   c                   @   s   e Zd ZU eed< eedddZeedddZeedddZ	e
e dd	d
ZedddZedddZedddZedddZedddZee edddZee edddZdS )r   r   r"   c                 C   s   t | jjjjj dS )NZ_stubrN   r   r\   r]   r!   r%   r'   r'   r(   r!   S  s    zStubSignature.namec                 C   s   t | jjjjj dS )NZ_kernelr   r%   r'   r'   r(   r   W  s    zStubSignature.kernel_namec                 C   s   t | jjjjj dS )N_fnr   r%   r'   r'   r(   	type_name[  s    zStubSignature.type_namec                 C   s   t | jS r3   )r$   Zstub_argumentsr   r%   r'   r'   r(   r)   _  s    zStubSignature.argumentsc                 C   s$   |   }dddd |D  dS )Nzvoid(*)(TensorIteratorBase&, r=   c                 s   s   | ]}|j V  qd S r3   )r8   r@   r'   r'   r(   r:   d  r0   z%StubSignature.type.<locals>.<genexpr>rB   )r)   r;   )r&   Zcpp_argsr'   r'   r(   r8   b  s    zStubSignature.typec                 C   s   d| j  d| j dS )NzDECLARE_DISPATCH(r=   rB   )r   r!   r%   r'   r'   r(   r   f  s    zStubSignature.dispatch_declc                 C   s   d| j  dS )NzDEFINE_DISPATCH(rB   rC   r%   r'   r'   r(   dispatch_defni  s    zStubSignature.dispatch_defnc                 C   s(   d| j  dddd |  D  dS )Nzvoid z(TensorIteratorBase& iter, r=   c                 s   s   | ]}|  V  qd S r3   )r   r@   r'   r'   r(   r:   m  r0   z,StubSignature.kernel_defn.<locals>.<genexpr>rB   )r   r;   r)   r%   r'   r'   r(   r   l  s    zStubSignature.kernel_defnc                 C   s   d| j  d|   S )Nzusing  = )r   r8   r%   r'   r'   r(   r   o  s    zStubSignature.type_defnrQ   c              	   C   s,   | j  dddd t||  D  dS )Nz(device_type(), *this, r=   c                 s   s   | ]}|j V  qd S r3   rS   r@   r'   r'   r(   r:   t  r0   z%StubSignature.call.<locals>.<genexpr>rB   rU   rV   r'   r'   r(   rW   s  s    zStubSignature.callc              	   C   s,   | j  dddd t||  D  dS )Nz(*this, r=   c                 s   s   | ]}|j V  qd S r3   rS   r@   r'   r'   r(   r:   x  r0   z,StubSignature.direct_call.<locals>.<genexpr>rB   )r   r;   r	   r)   rV   r'   r'   r(   r   w  s    zStubSignature.direct_callN)rI   rJ   rK   r
   rL   propertyrN   r!   r   r   r   r   r)   r8   r   r   r   r   r   rW   r   r'   r'   r'   r(   r   O  s   
r   c                 C   sZ   t | }t| t| tj}d|  d|  d|  d|	  d|
|  dS )Nr5   r   r   r   r   )r   r   r$   r   r   ZCPUr   r   r   r   rW   r)   )r   r   r   r'   r'   r(   compute_ufunc_cpu{  s    r   c                    s  t j|v s J | d|  | t jt jhks8J |t j }d }t j|v rZ|t j }g }g  |D ]f}t|jtr|jjtt	j
krqf|d|j d|j d  td|j t|jjtt qf|d urL|D ]n}t|jtr|jjtt	j
krq|d|j d|j d  td	|j t|jjttt qg }g }	| jjjjD ]~}
|
j stq`|
jtt	jksJ |t|
jt|
jtt|
d
 |d ur`|	t|
jt|
jttt|
d
 q`tt ttttf  d fdd}d|}|d urpd| dddd |D  d||| dddd |	D  d|||	 dS d| dddd |D  d||| dS d S )Nr=   zauto _s_r   z.to<scalar_t>();Z_s_zauto _v_z$ = at::vec::Vectorized<scalar_t>(_s_z);Z_v_)r!   nctypeargument)r.   r#   c                    s   g }|   | |  |S r3   )extend)r.   rrR   r'   r(   with_ctx  s    

z.compute_ufunc_cpu_dtype_body.<locals>.with_ctxr5   z
cpu_kernel_vec(iter,
  [=](c                 s   s   | ]}|  V  qd S r3   r>   r,   r'   r'   r(   r:     r0   z/compute_ufunc_cpu_dtype_body.<locals>.<genexpr>z) { return z; },
  [=](c                 s   s   | ]}|  V  qd S r3   r>   r,   r'   r'   r(   r:     r0   z; }
);
z
cpu_kernel(iter,
  [=](c                 s   s   | ]}|  V  qd S r3   r>   r,   r'   r'   r(   r:     r0   )r   	CPUScalarrp   	CPUVector
isinstancer   r   r8   r   r   ZScalarrl   r!   r   r   r   r   r   r   r\   r]   r)   r^   rY   ZTensorr   r   r   r   r;   rW   )r   rs   r{   r|   Zscalar_loopZvec_loopr   r.   Zscalar_bindingsZvec_bindingsrA   r   Zbody_strr'   r   r(   compute_ufunc_cpu_dtype_body  s     


	(

	
$

r   c                 C   s  t | }| jj}i }tjtjfD ]}g }||v r:|| tj|v rZ|tju rZ|tj tj|v rp|tj |D ]x}|| j	D ]h}|tju rt
t}n|tju rtt
t}nt ||i }	||	vrt| d|| j |d|	|< qqtq g }
| D ]@\}}	|
d|j d| dt|  dt| ||	|  d	 qd|
}d	|  d
|j d| d|j d|  d|  d|j d|j dS )Nr`   ra   r   r   r=   r   r   r5   z
namespace {

r   r   r   zR", " not implemented for '", toString(st), "'");
  }
}

} // anonymous namespace

r   z;
REGISTER_DISPATCH(r   z);
)r   rb   rc   r   r   r   rl   rj   rk   rg   r   r   r   AssertionErrorrh   rO   r!   r   r   r   r)   r;   r   r   r   r   )r   r   ro   Z
ufunc_sigsrq   Zlksrt   rs   rP   Zinner_ufunc_sigsr   r   r'   r'   r(   compute_ufunc_cpu_kernel  sl    






r   )4dataclassesr   typingr   r   r   r   r   r   Ztorchgen.api.translater	   Ztorchgen.modelr
   r   r   r   r   r   r   Ztorchgen.api.ufuncapir$   r   Ztorchgen.api.typesr   r   r   r   r   r   r   r   r   r   Ztorchgen.contextr   r   rO   boolr_   rN   ru   rv   re   rd   r}   r   r   r   r   r   r   r'   r'   r'   r(   <module>   s`    $	0"S
/4+
]