U
    &c(                     @   s8   d Z ddlZddlmZ G dd dejZdddZdS )	zX
A general wrapper around models with LM heads to generate sequences
using beam search.
    N)nnc                       sN   e Zd Zd fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	  Z
S )TransformerBeamSearchr   Tc	           	         s$  t t|   || _|| _|j| _|j| _|j| _|| _|| _	|| _
|| _|dkrVdnd| _|| _dd t|D | _tj|tjd| _tjd|| j | jtjd| _tj|| j df| jtjd| _tjd	gtd
g| jd   tjd|| _dd |D dd |D d| _d| _d| _dS )z]
        Attributes:
            mask_word_id: token id that corresponds to the mask
        r   FTc                 S   s   g | ]}g qS  r   .0_r   r   E/tmp/pip-unpacked-wheel-ymerj3tt/transformers/modeling_beam_search.py
<listcomp>>   s     z2TransformerBeamSearch.__init__.<locals>.<listcomp>dtype)stepr      g        z-infc                 S   s   g | ]}g qS r   r   r   r   r   r   r	   J   s     c                 S   s   g | ]}g qS r   r   r   r   r   r   r	   K   s     )Z
predictionscoresN)superr   __init__model	tokenizerstart_token_idend_token_idZpad_token_id	beam_size
min_length
max_lengthblock_repeating_trigramZapply_length_penaltyalpharange
hypothesestorchZarangelongbatch_offsetbeam_offsetfullgrowing_beamZtensorfloatrepeattopk_log_probabilitiesresults_stepis_done)	selfr   r   
batch_sizer   r   r   r   r   	__class__r   r   r      sH          zTransformerBeamSearch.__init__c              
   C   st  |  j d7  _ |d}|d| j }|| jdd7 }| | | jrV| || |j||| j| | jdd\}}|| 	  }|
|}||}|| jd| dd d}	t| jd|	|ddgd| _|| j}
|   |
dddf d}|
 rp| jd| j| jd}t|
dD ]}|| rV|
| d |
|  d}| j| }|D ]0}| j| |||f |||ddf f qv|| r:t| j| dd dd	}|d \}}| jd
 | | | jd | | q:|d d}t|dkr*d| _|d|}| jd|| _|d|d| jd| _|	d|}	|	S )z Grows the beam by one step. r   r   dimNc                 S   s   | d S )Nr   r   )xr   r   r   <lambda>       z,TransformerBeamSearch.step.<locals>.<lambda>T)keyreverser   predictions)r&   sizer   r$   viewenforce_min_lengthr   remove_repeating_trigramsZtopklength_penaltydivfmodr   r   catr!   index_selecteqr   enforce_max_lengthanyr   fill_Znonzeror   r   appendsortedr%   lenr'   )r(   log_probabilitiesZ
vocab_size_Br$   Ztopk_idsZtopk_scoresZtopk_beam_idsZtopk_token_idssurviving_beams_rowsis_finishedZis_top_beam_finishedr4   iZfinished_hypbjZbest_hypZ
best_scoreZbest_predictionZnon_finishedr   r   r   r   P   s~    






  


.
    
zTransformerBeamSearch.stepc                 K   s  dd |  D }dd |  D }dd |  D }t|f|}t|f|}| jj||}t|| jdd|d< tj| j	| j df| j
tjd	| _t| jD ]^}| jd d d
f }| j||}	tjj|	d }
| |
}| jr q|d d||d< q| jS )Nc                 S   s,   i | ]$\}}| d r|td d |qS )encoder_N
startswithrD   r   argumentvaluer   r   r   
<dictcomp>   s   
 z1TransformerBeamSearch.forward.<locals>.<dictcomp>c                 S   s,   i | ]$\}}| d r|td d |qS )decoder_NrM   rO   r   r   r   rR      s   
 c                 S   s*   i | ]"\}}| d s| ds||qS )rL   rS   )rN   rO   r   r   r   rR      s
   
 
 r   r-   Zencoder_hidden_statesr   r
   r,   )itemsdictr   encoderforwardtiler   r   r    r)   r   r   r!   r   r   decoderr   Z
functionalZlog_softmaxr   r'   r=   r%   )r(   Zencoder_input_idskwargsZkwargs_encoderZkwargs_decoderZkwargs_commonZencoder_outputsr   Zdecoder_inputoutputsrE   rG   r   r   r   rW      sH      
  
 
zTransformerBeamSearch.forwardc                    s~   | j d dkrzt|| j D ]\}dd | j| D   fddtdttd D }t|d }||d d krd||< qd S )Nr      c                 S   s   g | ]}|qS r   r   )r   tr   r   r   r	      s     zCTransformerBeamSearch.remove_repeating_trigrams.<locals>.<listcomp>c                    s*   g | ]"} |d    |  |d   fqS )r   r   )r   rI   tokensr   r   r	      s     r,   @x)r&   r   r   r!   rD   wordstuple)r(   rE   rF   rI   ZtrigramsZlast_trigramr   r^   r   r8      s     z/TransformerBeamSearch.remove_repeating_trigramsc                 C   s   | j | jk rd| j| j< d S )Nr`   )r&   r   rE   r   r(   r   r   r   r7      s    z(TransformerBeamSearch.enforce_min_lengthc                 C   s    | j d | jkr| jd d S )Nr   )r&   r   rH   rA   rc   r   r   r   r?      s    z(TransformerBeamSearch.enforce_max_lengthc                 C   s   d| j d  d | j S )Ng      @r   g      @)r&   r   rc   r   r   r   r9      s    z$TransformerBeamSearch.length_penalty)r   T)__name__
__module____qualname__r   r   rW   r8   r7   r?   r9   __classcell__r   r   r*   r   r      s   	  2a/	r   c                 C   s   t tt|  }|dkrD|| |d  |d< ||< | | } t |  }|d  |9  < | d}| |ddd|ddd j| } |dkr| | } | S )z
    Tiles `x` along dimension `dim` `count` times.

    Example:
        >> ex = torch.tensor([1,2],[3,4])
        >> tile(ex, 2, 0)
        torch.Tensor([[1,2],[1,2],[3,4],[3,4]])
    r   r,   r   )	listr   rD   r5   Zpermute
contiguousr6   Z	transposer#   )r/   countr.   permZout_sizebatchr   r   r   rX      s.    	
   rX   )r   )__doc__r   r   Moduler   rX   r   r   r   r   <module>   s
    Y