a
    ==ic~k                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
m  mZ d dlmZmZmZ d dlm  m  m  mZ d dlmZ eeZeej eeej e dg dZ!dd Z"G dd	 d	e#Z$d
d Z%dddZ&dd Z'edkre'  dS )    N)core	workspacedata_parallel_model)Seq2SeqModelHelperBatchencoder_inputsencoder_lengthsdecoder_inputsdecoder_lengthstargetstarget_weightsc              
   C   sv  dd | D }t |}g }t dd | D }g }g }g }g }| D ]\}	}
tjg|t|	  }|tt|	|  tjg|t|
  }tjg|
 }|t| |||  |
tjg }|| }|| t|	t|
 dkrdgt| }ndd |D }|| q@t	t
j|t
jd t
j|t
jdt
j|t
jd t
j|t
jdt
j|t
jd t
j|t
jd dS )Nc                 S   s   g | ]}t |d  qS )r   len.0entry r   s/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/caffe2/python/models/seq2seq/train.py
<listcomp>'       z!prepare_batch.<locals>.<listcomp>c                 S   s   g | ]}t |d  qS )   r   r   r   r   r   r   *   r   r   c                 S   s   g | ]}|t jkrd ndqS )r   r   )seq2seq_utilPAD_ID)r   targetr   r   r   r   G   s   )Zdtyper   )maxr   r   r   appendlistreversedZGO_IDZEOS_IDr   nparrayint32Z	transposefloat32)batchr	   Zmax_encoder_lengthr   Zmax_decoder_lengthZbatch_encoder_inputsZbatch_decoder_inputsZbatch_targetsZbatch_target_weightsZ
source_seqZ
target_seqZencoder_padsZdecoder_padsZtarget_seq_with_go_tokenZtarget_seq_with_eosr   r   r   r   r   prepare_batch&   s`    
r$   c                   @   s   e Zd Zdd Zdd Zdd Zd*d	d
Zd+ddZdd Zdd Z	dd Z
dd Zdd Zdd Zd,ddZdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) ZdS )-Seq2SeqModelCaffe2c                    s   t |d} |  | t |d} |  |  jdkrt |}||  j|dd  | nh j j dksJ t	j
|dd  jd tt jd  fdd	}t	j
|d
d  j|tt jd  j|dd | _|j _d S )Ninit_paramsr   norm_clipped_grad_updatescopec                 S   s   d S Nr   mr   r   r   <lambda>   r   z1Seq2SeqModelCaffe2._build_model.<locals>.<lambda>)Zinput_builder_funZforward_pass_builder_funZparam_update_builder_funZdevicesc                    s    j | dd d S )Nr(   r)   )r(   )modelselfr   r   clipped_grad_update_bound   s    zBSeq2SeqModelCaffe2._build_model.<locals>.clipped_grad_update_boundc                 S   s   d S r+   r   r,   r   r   r   r.      r   norm_clipped_sparse_grad_update)r   _build_shared_build_embeddingsnum_gpusmodel_build_funZAddGradientOperatorsr(   forward_model_build_fun
batch_sizer   ZParallelize_GPUr   ranger3   r/   netforward_net)r1   r'   r/   Zforward_modelZ
loss_blobsr2   r   r0   r   _build_modele   sH    








zSeq2SeqModelCaffe2._build_modelc                 C   s   | j d }tttjR |jdt|d dd| _|jdddd| _	|jdt

 dd| _W d    n1 st0    Y  d S )Noptimizer_paramslearning_rateF)nameZ
init_valueZ	trainableglobal_stepr   
start_time)model_paramsr   DeviceScopeDeviceOption
caffe2_pb2CPUZAddParamfloatr?   rA   timerB   )r1   r/   r>   r   r   r   r4      s"    

z Seq2SeqModelCaffe2._build_sharedc                 C   s   t t tj td}|jjg d| j	| j
d g| |d| _|j| j |jjg d| j| j
d g| |d| _|j| j W d    n1 s0    Y  d S )N   encoder_embeddingsencoder_embedding_size)shapeminr   decoder_embeddingsdecoder_embedding_size)r   rD   rE   rF   rG   mathsqrtparam_init_netZUniformFillsource_vocab_sizerC   rK   paramsr   target_vocab_sizerO   )r1   r/   Zsqrt3r   r   r   r5      s,    


z$Seq2SeqModelCaffe2._build_embeddingsFNc                 C   s  |j t d }|j t d }|j t d }|j t d }|j t d }|j t d }	| jd }
|
dv sJ tj|| jt| jd	 ||| j	| j
| jd
 |
dk| jd
\}}}}}tj|| jd	 ||||||||| j| j| jd |
d| jd\}}tj|||| j| jd d}|j j|gddgdgd\}}|j j|	gddgdgd\}	}|j j|||	gddgdd\}}|j |	gd}|j ||gd}|j j|gdd| j d}|gS )Nr   r	   r
   r   r   r   	attention)noneregulardotdecoder_layer_configsrL   rX   )
r/   encoder_paramsZnum_decoder_layersinputsinput_lengths
vocab_size
embeddingsembedding_sizeuse_attentionr6   rP   F)r[   r]   r^   r	   encoder_outputsweighted_encoder_outputsfinal_encoder_hidden_statesfinal_encoder_cell_statesencoder_units_per_layerr_   r`   ra   attention_typeforward_onlyr6   decoder_softmax_size)r/   decoder_outputsdecoder_output_sizerV   rj   Ztargets_old_shape)rM   Ztarget_weights_old_shapeZOutputProbs_INVALIDloss_per_wordT)Z	only_loss	num_wordstotal_loss_scalartotal_loss_scalar_weighted      ?)scale)r;   ZAddExternalInputr   ZGetNameScoperC   r   Zbuild_embedding_encoderr\   r   rT   rK   r6   Zbuild_embedding_decoderrV   rO   Zoutput_projectionZReshapeZSoftmaxWithLossSumElementsMulZScaler9   )r1   r/   ri   
loss_scaler   r	   r
   r   r   r   rh   rc   rd   re   rf   rg   rk   rl   Zoutput_logits_rn   ro   rp   rq   r   r   r   r7      s    









z"Seq2SeqModelCaffe2.model_build_func                 C   s   | j |d|dS )NT)r/   ri   rv   )r7   )r1   r/   rv   r   r   r   r8   +  s
    z*Seq2SeqModelCaffe2.forward_model_build_func                 C   s  t |  g }t|D ]n\}}t| t|j| t jsH|j| n
|j| j}|j	
|gd|}	|j	|	d|}
||
 q|j	|d}|j	j|ddd}|jjg dg t| jd d	}|j	||gd
}|j	||gd}|W  d    S 1 s0    Y  d S )Nzgrad_{}_squaredzgrad_{}_squared_sumgrad_squared_full_sumglobal_norm      ?)exponent	clip_normmax_gradient_normrM   valuemax_norm
norm_ratio)r   	NameScope	enumerateloggerinfo
isinstanceparam_to_gradGradientSlicevaluesr;   ZSqrformatrt   r   ZSumPowrS   ConstantFillrH   rC   ZMaxDiv)r1   r/   rU   r*   ONEZgrad_squared_sumsiparamZgradZgrad_squaredZgrad_squared_sumrx   ry   r|   r   r   r   r   r   _calc_norm_ratio2  sX    

z#Seq2SeqModelCaffe2._calc_norm_ratioc              	   C   s   |D ]}|j | }|j|gd}	t|& |jj|	|gddd}
W d    n1 sX0    Y  t|tjr|j}|j	|||j
||
g| q|j||||
g| qd S )NZnegative_learning_rateupdate_coeffr   )	broadcast)r   r;   ZNegativer   r   ru   r   r   r   ZScatterWeightedSumindicesZWeightedSum)r1   r   r/   rU   r?   r*   r   r   Z
param_gradZnlrr   Zparam_grad_valuesr   r   r   _apply_norm_ratioa  s>    
$z$Seq2SeqModelCaffe2._apply_norm_ratioc                 C   s   | j dkr| j}n|| jd}g }|jddD ]*}||jv r0t|j| tjs0|| q0|j	j
g ddgdd}td	 | ||||}| |||||| d S )
Nr   ZLRTZ	top_scoper   r   rr   r~   zDense trainable variables: )r6   r?   ZCopyCPUToGPU	GetParamsr   r   r   r   r   rS   r   r   r   r   r   r1   r/   r*   r?   rU   r   r   r   r   r   r   r(     s,    


z+Seq2SeqModelCaffe2.norm_clipped_grad_updatec                 C   s   | j }g }|jddD ]*}||jv rt|j| tjr|| q|jjg ddgdd}t	
d | ||||}| |||||| d S )NTr   r   r   rr   r~   zSparse trainable variables: )r?   r   r   r   r   r   r   rS   r   r   r   r   r   r   r   r   r   r3     s(    

z2Seq2SeqModelCaffe2.norm_clipped_sparse_grad_updatec                 C   sL   | j dkrtdS d}t| j D ] }d|}t|}||7 }q"|S d S )Nr   rp   zgpu_{}/total_loss_scalar)r6   r   	FetchBlobr:   r   )r1   
total_lossr   r@   Zgpu_lossr   r   r   rp     s    




z$Seq2SeqModelCaffe2.total_loss_scalarc                 C   s0   t | jj dd }|| jj || j d S )Nc                 S   s   t j| dd | jD d d S )Nc                 S   s   g | ]}t |qS r   )str)r   r   r   r   r   r     r   zFSeq2SeqModelCaffe2._init_model.<locals>.create_net.<locals>.<listcomp>)Zinput_blobs)r   Z	CreateNetZexternal_inputs)r;   r   r   r   
create_net  s    z2Seq2SeqModelCaffe2._init_model.<locals>.create_net)r   Z
RunNetOncer/   rS   r;   r<   )r1   r   r   r   r   _init_model  s    zSeq2SeqModelCaffe2._init_modelr   c              	   C   sZ   || _ d| _|d | _|| _|| _|| _|| _|d | _t	dddddt
| j g d S )	NZrnnencoder_typer9   Zcaffe2z--caffe2_log_level=0z--v=0z---caffe2_handle_executor_threads_exceptions=1z--caffe2_mkl_num_threads=)rC   r   r\   rT   rV   r6   num_cpusr9   r   Z
GlobalInitr   )r1   rC   rT   rV   r6   r   r   r   r   __init__  s    

zSeq2SeqModelCaffe2.__init__c                 C   s   | S r+   r   r0   r   r   r   	__enter__  s    zSeq2SeqModelCaffe2.__enter__c                 C   s   t   d S r+   )r   ZResetWorkspace)r1   exc_type	exc_value	tracebackr   r   r   __exit__  s    zSeq2SeqModelCaffe2.__exit__c                 C   s,   t d | jdd |   t d d S )Nz3Initializing Seq2SeqModelCaffe2 from scratch: StartTr&   z4Initializing Seq2SeqModelCaffe2 from scratch: Finish)r   r   r=   r   r0   r   r   r   initialize_from_scratch  s    
z*Seq2SeqModelCaffe2.initialize_from_scratchc                 C   s   t | jd S )Nr   )r   r   rA   r0   r   r   r   get_current_step  s    z#Seq2SeqModelCaffe2.get_current_stepc                 C   s"   t | jt|  d g d S )Nr   )r   FeedBlobrA   r   r    r   r0   r   r   r   inc_current_step  s    z#Seq2SeqModelCaffe2.inc_current_stepc           
      C   s   | j dk r6t|}ttj|D ]\}}t|| qn~t| j D ]r}||d | j  }t|}ttj|D ]H\}}d||}|dv rt	
tj}	nt	
tj|}	tj|||	d qhq@|rt| j nt| jj |   |  S )Nr   z	gpu_{}/{})r   r
   )Zdevice_option)r6   r$   zipr   _fieldsr   r   r:   r   r   rE   rF   rG   ZGpuDeviceTypeZRunNetr<   r/   r;   r   rp   )
r1   r#   ri   Z	batch_objZbatch_obj_nameZbatch_obj_valuer   Z	gpu_batchr@   devr   r   r   step  s0    
zSeq2SeqModelCaffe2.stepc              
   C   s   d ||}ttjd| j g d|dds2J tj	tj
|d}t|d8}|d| d	 | d
  td|  W d    n1 s0    Y  |S )Nz{0}-{1}ZSaveTZminidb)Zabsolute_pathdbZdb_type
checkpointwzmodel_checkpoint_path: "z"
all_model_checkpoint_paths: "z"
zSaved checkpoint file to )r   r   ZRunOperatorOncer   ZCreateOperatorr/   ZGetAllParamsospathjoindirnameopenwriter   r   )r1   Zcheckpoint_path_prefixZcurrent_stepZcheckpoint_pathZcheckpoint_config_pathZcheckpoint_config_filer   r   r   save'  s4    	

,zSeq2SeqModelCaffe2.save)FN)N)r   r   )__name__
__module____qualname__r=   r4   r5   r7   r8   r   r   r(   r3   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r%   c   s*   4
e
/'  
#r%   c              	   C   sf  t | }t |}g }t||D ]f\}	}
t|	|}t|
|}t|dkr"t|dkr"|d u szt||kr"t||kr"|||f q"W d    n1 s0    Y  W d    n1 s0    Y  |jdd d g g  }}|D ](}|| t||kr|| g }qt|dkrXt||k r<||d  qt||ksNJ || t| |S )Nr   c                 S   s   t | d t | d fS )Nr   r   r   )Zs_tr   r   r   r.   ^  r   zgen_batches.<locals>.<lambda>)keyrm   )	r   r   r   Zget_numberized_sentencer   r   sortrandomshuffle)source_corpustarget_corpussource_vocabtarget_vocabr9   
max_lengthsourcer   Zparallel_sentencesZsource_sentenceZtarget_sentenceZnumerized_source_sentenceZnumerized_target_sentencebatchesr#   Zsentence_pairr   r   r   gen_batchesC  sN    




D




r   c           
      C   s  t | j| j}t | j| j}tdt| tdt| t	| j| j|||d | j
}tdt| t	| j| j|||d | j
}tdt| t|t|t|| jdd}|  t| jD ]}td| d	}|D ]}	||j|	d
d7 }qtd| d	}|D ]}	||j|	dd7 }q$td| | jd ur|| j| qW d    n1 s~0    Y  d S )NzSource vocab size {}zTarget vocab size {}r9   zNumber of training batches {}zNumber of eval batches {}   )rC   rT   rV   r6   r   zEpoch {}r   F)r#   ri   z	training loss {}Tz	eval loss {})r   Z	gen_vocabr   Zunk_thresholdr   r   r   r   r   r   r   Zsource_corpus_evalZtarget_corpus_evalr%   r6   r   r:   Zepochsr   r   r   )
argsrC   r   r   r   Zbatches_evalZ	model_objr   r   r#   r   r   r   run_seq2seq_modelo  s^    



r   c                  C   sB  t d tjdd} | jdtd ddd | jdtd d	dd | jd
td dd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdd d!d" | jd#d d$d" | jd%td d&dd | jd'td d(dd | jd)td*d+d | jd,td-d.d | jd/td*d0d | jd1td-d2d | jd3td4d5d | jd6td*d7d | jd8td d9d | jd:td d;d |  }t	|j
d<g|j }|jr|j
d- dksJ |d d=  d-  < t	|jd<g|j }t|t	|jrd>nd?|t	||jd@|jt	|jdA|j|j|j|jdB	dC d S )DNiz  zCaffe2: Seq2Seq Training)descriptionz--source-corpuszcPath to source corpus in a text file format. Each line in the file should contain a single sentenceT)typedefaulthelprequiredz--target-corpusz+Path to target corpus in a text file formatz--max-lengthz+Maximal lengths of train and eval sentences)r   r   r   z--unk-threshold2   zCThreshold frequency under which token becomes labeled unknown tokenz--batch-size    zTraining batch sizez--epochs
   z'Number of iterations over training dataz--learning-raterz   zLearning ratez--max-gradient-normrr   zbMax global norm of gradients at the end of each backward pass. We do clipping to match the number.z
--num-gpusr   z&Number of GPUs for data parallel modelz--use-bidirectional-encoder
store_truezJSet flag to use bidirectional recurrent network for first layer of encoder)actionr   z--use-attentionz,Set flag to use seq2seq with attention modelz--source-corpus-evalz:Path to source corpus for evaluation in a text file formatz--target-corpus-evalz:Path to target corpus for evaluation in a text file formatz--encoder-cell-num-unitsi   z&Number of cell units per encoder layerz--encoder-num-layers   zNumber encoder layersz--decoder-cell-num-unitsz)Number of cell units in the decoder layerz--decoder-num-layerszNumber decoder layersz--encoder-embedding-size   z&Size of embedding in the encoder layerz--decoder-embedding-sizez&Size of embedding in the decoder layerz--decoder-softmax-sizez$Size of softmax layer in the decoderz--checkpointzPath to checkpoint)	num_unitsr   rY   rX   )encoder_layer_configsuse_bidirectional_encoder)r?   )	rW   r[   r   r9   r>   rL   rP   rj   r}   )rC   )r   seedargparseArgumentParseradd_argumentr   intrH   
parse_argsdictZencoder_cell_num_unitsZencoder_num_layersr   Zdecoder_cell_num_unitsZdecoder_num_layersr   rb   r9   r?   rL   rP   rj   r}   )parserr   r   r[   r   r   r   main  s    



















r   __main__)N)(r   collectionsloggingrQ   numpyr   r   rI   sysr   Zcaffe2.proto.caffe2_pb2protorF   Zcaffe2.pythonr   r   r   Z)caffe2.python.models.seq2seq.seq2seq_utilpythonmodelsZseq2seqr   Z1caffe2.python.models.seq2seq.seq2seq_model_helperr   	getLoggerr   r   setLevelINFO
addHandlerStreamHandlerstderr
namedtupler   r$   objectr%   r   r   r   r   r   r   r   <module>   s4   

=   c,
2_
