U
    "c                     @   s   d dl mZ edk redes(dekr6ddlmZ nd dlZzd dlZ	W n e
k
rf   d dl	Z	Y nX dd	 Zd
d Zdd Zdd ZG dd deZG dd deZee G dd deZee G dd deZee G dd deZee dd ZG dd deZee dd Zd d! Zd"d# Zd$d% Z d&d' Z!d dl"Z"d dl#Z#d dl Z d dl$Z$d d(l%m&Z& d d)l%m'Z' d*d+ Z(d,d- Z)ej*Z+e,ed.ej- ej.e_/ej0e_1d/D ]Z2e)ee2 qe(e e(e eZ3dd0l4m5Z5 G d1d2 d2eZ6dS )3    )version_info)      r   zPython 2.7 or later required.   )_sentencepieceNc                 C   sD   zd| j   }W n tjk
r,   d}Y nX d| jj| jj|f S )Nz	proxy of  z<%s.%s; %s >)this__repr____builtin__	Exception	__class__
__module____name__)selfZstrthis r   :/tmp/pip-unpacked-wheel-qq0nn8u8/sentencepiece/__init__.py
_swig_repr   s
    
r   c                    s    fdd}|S )Nc                    sh   |dkr| j | nN|dkr, | || n8t| |rXttt| |trX | || ntd|  d S )Nthisownr	   z(You cannot add instance attributes to %s)r	   ownhasattr
isinstancegetattrtypepropertyAttributeError)r   namevaluesetr   r   set_instance_attr   s    zE_swig_setattr_nondynamic_instance_variable.<locals>.set_instance_attrr   )r   r    r   r   r   *_swig_setattr_nondynamic_instance_variable   s    	r!   c                    s    fdd}|S )Nc                    s8   t | |r(tt| |ts( | || ntd|  d S )Nz%You cannot add class attributes to %s)r   r   r   r   r   )clsr   r   r   r   r   set_class_attr,   s    z?_swig_setattr_nondynamic_class_variable.<locals>.set_class_attrr   )r   r#   r   r   r   '_swig_setattr_nondynamic_class_variable+   s    r$   c                    s    fdd}|S )zlClass decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclassc                    s    | j | j| j S N)r   	__bases____dict__copy)r"   	metaclassr   r   wrapper6   s    z$_swig_add_metaclass.<locals>.wrapperr   )r*   r+   r   r)   r   _swig_add_metaclass4   s    r,   c                   @   s   e Zd ZdZeejZdS )_SwigNonDynamicMetazKMeta class to enforce nondynamic attributes (no new attributes) for a classN)r   r   __qualname____doc__r$   r   __setattr__r   r   r   r   r-   ;   s   r-   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd ZeeZeeZeeZeeZeeZdd Zdd Zdd ZeZdS )1ImmutableSentencePieceText_ImmutableSentencePiecec                 C   s
   | j  S r%   r	   r   xr   r   r   <lambda>A       z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>c                 C   s   | j |S r%   r2   r4   vr   r   r   r5   A   r6   The membership flagdocc                 C   s   t | t   d S r%   )r   Z:ImmutableSentencePieceText_ImmutableSentencePiece_swiginitZ5new_ImmutableSentencePieceText_ImmutableSentencePiecer   r   r   r   __init__D   s    z:ImmutableSentencePieceText_ImmutableSentencePiece.__init__c                 C   s
   t | S r%   )r   Z8ImmutableSentencePieceText_ImmutableSentencePiece__piecer<   r   r   r   _pieceH   s    z8ImmutableSentencePieceText_ImmutableSentencePiece._piecec                 C   s
   t | S r%   )r   Z:ImmutableSentencePieceText_ImmutableSentencePiece__surfacer<   r   r   r   _surfaceK   s    z:ImmutableSentencePieceText_ImmutableSentencePiece._surfacec                 C   s
   t | S r%   )r   Z5ImmutableSentencePieceText_ImmutableSentencePiece__idr<   r   r   r   _idN   s    z5ImmutableSentencePieceText_ImmutableSentencePiece._idc                 C   s
   t | S r%   )r   Z8ImmutableSentencePieceText_ImmutableSentencePiece__beginr<   r   r   r   _beginQ   s    z8ImmutableSentencePieceText_ImmutableSentencePiece._beginc                 C   s
   t | S r%   )r   Z6ImmutableSentencePieceText_ImmutableSentencePiece__endr<   r   r   r   _endT   s    z6ImmutableSentencePieceText_ImmutableSentencePiece._endc                 C   s   d | j| j| j| j| jS )Nz3piece: "{}"
id: {}
surface: "{}"
begin: {}
end: {}
)formatpieceidsurfacebeginendr<   r   r   r   __str__]   s       z9ImmutableSentencePieceText_ImmutableSentencePiece.__str__c                 C   s<   | j |j ko:| j|jko:| j|jko:| j|jko:| j|jkS r%   )rD   rE   rF   rG   rH   r   otherr   r   r   __eq__e   s    z8ImmutableSentencePieceText_ImmutableSentencePiece.__eq__c                 C   s   t t| S r%   )hashstrr<   r   r   r   __hash__h   s    z:ImmutableSentencePieceText_ImmutableSentencePiece.__hash__N)r   r   r.   r   r   r   r
   r=   r   Z8delete_ImmutableSentencePieceText_ImmutableSentencePiece__swig_destroy__r>   r?   r@   rA   rB   rD   rF   rE   rG   rH   rI   rL   rO   r   r   r   r   r1   @   s$   r1   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd ZeeZeeZG dd dZedd Zdd Zdd Zdd ZeZdS )ImmutableSentencePieceTextc                 C   s
   | j  S r%   r2   r3   r   r   r   r5   r   r6   z#ImmutableSentencePieceText.<lambda>c                 C   s   | j |S r%   r2   r7   r   r   r   r5   r   r6   r9   r:   c                 C   s   t | t   d S r%   )r   Z#ImmutableSentencePieceText_swiginitZnew_ImmutableSentencePieceTextr<   r   r   r   r=   u   s    z#ImmutableSentencePieceText.__init__c                 C   s
   t | S r%   )r   Z'ImmutableSentencePieceText__pieces_sizer<   r   r   r   _pieces_sizey   s    z'ImmutableSentencePieceText._pieces_sizec                 C   s   t | |S r%   )r   Z"ImmutableSentencePieceText__piecesr   indexr   r   r   _pieces|   s    z"ImmutableSentencePieceText._piecesc                 C   s
   t | S r%   )r   Z ImmutableSentencePieceText__textr<   r   r   r   _text   s    z ImmutableSentencePieceText._textc                 C   s
   t | S r%   )r   Z!ImmutableSentencePieceText__scorer<   r   r   r   _score   s    z!ImmutableSentencePieceText._scorec                 C   s
   t | S r%   )r   Z,ImmutableSentencePieceText_SerializeAsStringr<   r   r   r   SerializeAsString   s    z,ImmutableSentencePieceText.SerializeAsStringc                   @   s0   e Zd Zdd Zdd Zdd Zdd ZeZd	S )
z9ImmutableSentencePieceText.ImmutableSentencePieceIteratorc                 C   s   || _ | j  | _d S r%   )protorR   lenr   rY   r   r   r   r=      s    zBImmutableSentencePieceText.ImmutableSentencePieceIterator.__init__c                 C   s   | j S r%   rZ   r<   r   r   r   __len__   s    zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__len__c                    sj   t |tr2 fddt jD |j|j|j S |dk rD| j }|dk sV| jkr^td j	|S )Nc                    s   g | ]} j |qS r   )rY   rU   .0ir<   r   r   
<listcomp>   s     zYImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__.<locals>.<listcomp>r   zpiece index is out of range)
r   slicerangerZ   startstopstep
IndexErrorrY   rU   rS   r   r<   r   __getitem__   s    
(
zEImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__c                 C   s   d dd | D S )N
c                 S   s   g | ]}d  t|qS zpieces {{
{}}}rC   rN   r_   r4   r   r   r   ra      s     zUImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__.<locals>.<listcomp>joinr<   r   r   r   rI      s    zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__Nr   r   r.   r=   r]   rh   rI   r
   r   r   r   r   ImmutableSentencePieceIterator   s
   	rp   c                 C   s
   t | S r%   )rQ   rp   r<   r   r   r   pieces   s    z!ImmutableSentencePieceText.piecesc                 C   s   |   |  kS r%   rX   rJ   r   r   r   rL      s    z!ImmutableSentencePieceText.__eq__c                 C   s   t |  S r%   rM   rX   r<   r   r   r   rO      s    z#ImmutableSentencePieceText.__hash__c                 C   s$   d | j| jddd | jD S )Nztext: "{}"
score: {}
{}ri   c                 S   s   g | ]}d  t|qS rj   rk   rl   r   r   r   ra      s     z6ImmutableSentencePieceText.__str__.<locals>.<listcomp>)rC   textscorern   rq   r<   r   r   r   rI      s
     z"ImmutableSentencePieceText.__str__N)r   r   r.   r   r   r   r
   r=   r   Z!delete_ImmutableSentencePieceTextrP   rR   rU   rV   rW   rX   rt   ru   rp   rq   rL   rO   rI   r   r   r   r   rQ   q   s$   
rQ   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd ZG dd dZedd Zdd Zdd Zdd ZeZdS )ImmutableNBestSentencePieceTextc                 C   s
   | j  S r%   r2   r3   r   r   r   r5      r6   z(ImmutableNBestSentencePieceText.<lambda>c                 C   s   | j |S r%   r2   r7   r   r   r   r5      r6   r9   r:   c                 C   s   t | t   d S r%   )r   Z(ImmutableNBestSentencePieceText_swiginitZ#new_ImmutableNBestSentencePieceTextr<   r   r   r   r=      s    z(ImmutableNBestSentencePieceText.__init__c                 C   s
   t | S r%   )r   Z,ImmutableNBestSentencePieceText__nbests_sizer<   r   r   r   _nbests_size   s    z,ImmutableNBestSentencePieceText._nbests_sizec                 C   s   t | |S r%   )r   Z'ImmutableNBestSentencePieceText__nbestsrS   r   r   r   _nbests   s    z'ImmutableNBestSentencePieceText._nbestsc                 C   s
   t | S r%   )r   Z1ImmutableNBestSentencePieceText_SerializeAsStringr<   r   r   r   rX      s    z1ImmutableNBestSentencePieceText.SerializeAsStringc                   @   s0   e Zd Zdd Zdd Zdd Zdd ZeZd	S )
zBImmutableNBestSentencePieceText.ImmutableSentencePieceTextIteratorc                 C   s   || _ | j  | _d S r%   )rY   rw   rZ   r[   r   r   r   r=      s    zKImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__init__c                 C   s   | j S r%   r\   r<   r   r   r   r]      s    zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__len__c                    sj   t |tr2 fddt jD |j|j|j S |dk rD| j }|dk sV| jkr^td j	|S )Nc                    s   g | ]} j |qS r   )rY   rx   r^   r<   r   r   ra      s     zbImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__.<locals>.<listcomp>r   znbests index is out of range)
r   rb   rc   rZ   rd   re   rf   rg   rY   rx   rS   r   r<   r   rh      s    
(
zNImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__c                 C   s   d dd | D S )Nri   c                 S   s   g | ]}d  t|qS znbests {{
{}}}rk   rl   r   r   r   ra      s     z^ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__.<locals>.<listcomp>rm   r<   r   r   r   rI      s    zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__Nro   r   r   r   r   "ImmutableSentencePieceTextIterator   s
   	rz   c                 C   s
   t | S r%   )rv   rz   r<   r   r   r   nbests   s    z&ImmutableNBestSentencePieceText.nbestsc                 C   s   |   |  kS r%   rr   rJ   r   r   r   rL      s    z&ImmutableNBestSentencePieceText.__eq__c                 C   s   t |  S r%   rs   r<   r   r   r   rO      s    z(ImmutableNBestSentencePieceText.__hash__c                 C   s   d dd | jD S )Nri   c                 S   s   g | ]}d  t|qS ry   rk   rl   r   r   r   ra      s     z;ImmutableNBestSentencePieceText.__str__.<locals>.<listcomp>)rn   r{   r<   r   r   r   rI      s    z'ImmutableNBestSentencePieceText.__str__N)r   r   r.   r   r   r   r
   r=   r   Z&delete_ImmutableNBestSentencePieceTextrP   rw   rx   rX   rz   r{   rL   rO   rI   r   r   r   r   rv      s   
rv   c                   @   s4  e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z d4d5 Z!d6d7 Z"d8d9 Z#d:d; Z$d<d= Z%d>d? Z&d@dA Z'dBdC Z(dDdE Z)dFdG Z*dHdI Z+dJdK Z,dLdM Z-dNdO Z.dPdQ Z/dRdS Z0dTdU Z1dVdW Z2dXdY Z3dZd[ Z4d\d] Z5d^d_ Z6d`da Z7dbdc Z8ddde Z9dfdg Z:dhdi Z;djdk Z<dldm Z=dndne>dodododododpdqdpfdrdsZ?ddtduZ@dvdw ZAdxdy ZBdzd{ ZCd|d} ZDdd~dZEdddZFdddZGdddZHdddZIdddZJdddZKdddZLdddZMdddZNdddZOdddZPdddZQdddZReSdnfddZTeSfddZUeSfddZVdddZWdddZXdddZYdddZZdddZdd Z[dd Z\dd Z]dd Z^dd Z_dd Z`dddZadnS )SentencePieceProcessorc                 C   s
   | j  S r%   r2   r3   r   r   r   r5      r6   zSentencePieceProcessor.<lambda>c                 C   s   | j |S r%   r2   r7   r   r   r   r5      r6   r9   r:   c                 C   s   t | t   d S r%   )r   ZSentencePieceProcessor_swiginitZnew_SentencePieceProcessorr<   r   r   r   r=      s    zSentencePieceProcessor.__init__c                 C   s   t | |S r%   )r   Z.SentencePieceProcessor_LoadFromSerializedProto)r   Z
serializedr   r   r   LoadFromSerializedProto   s    z.SentencePieceProcessor.LoadFromSerializedProtoc                 C   s   t | |S r%   )r   Z,SentencePieceProcessor_SetEncodeExtraOptionsr   Zextra_optionr   r   r   SetEncodeExtraOptions   s    z,SentencePieceProcessor.SetEncodeExtraOptionsc                 C   s   t | |S r%   )r   Z,SentencePieceProcessor_SetDecodeExtraOptionsr~   r   r   r   SetDecodeExtraOptions   s    z,SentencePieceProcessor.SetDecodeExtraOptionsc                 C   s   t | |S r%   )r   Z$SentencePieceProcessor_SetVocabulary)r   Zvalid_vocabr   r   r   SetVocabulary  s    z$SentencePieceProcessor.SetVocabularyc                 C   s
   t | S r%   )r   Z&SentencePieceProcessor_ResetVocabularyr<   r   r   r   ResetVocabulary  s    z&SentencePieceProcessor.ResetVocabularyc                 C   s   t | ||S r%   )r   Z%SentencePieceProcessor_LoadVocabulary)r   filename	thresholdr   r   r   LoadVocabulary  s    z%SentencePieceProcessor.LoadVocabularyc                 G   s   t j| f| S r%   )r   Z'SentencePieceProcessor_CalculateEntropy)r   argsr   r   r   CalculateEntropy  s    z'SentencePieceProcessor.CalculateEntropyc                 C   s
   t | S r%   )r   Z#SentencePieceProcessor_GetPieceSizer<   r   r   r   GetPieceSize  s    z#SentencePieceProcessor.GetPieceSizec                 C   s   t | |S r%   )r   Z SentencePieceProcessor_PieceToIdr   rD   r   r   r   	PieceToId  s    z SentencePieceProcessor.PieceToIdc                 C   s   t | |S r%   )r   Z SentencePieceProcessor_IdToPiecer   rE   r   r   r   	IdToPiece  s    z SentencePieceProcessor.IdToPiecec                 C   s   t | |S r%   )r   ZSentencePieceProcessor_GetScorer   r   r   r   GetScore  s    zSentencePieceProcessor.GetScorec                 C   s   t | |S r%   )r   Z SentencePieceProcessor_IsUnknownr   r   r   r   	IsUnknown  s    z SentencePieceProcessor.IsUnknownc                 C   s   t | |S r%   )r   Z SentencePieceProcessor_IsControlr   r   r   r   	IsControl  s    z SentencePieceProcessor.IsControlc                 C   s   t | |S r%   )r   ZSentencePieceProcessor_IsUnusedr   r   r   r   IsUnused   s    zSentencePieceProcessor.IsUnusedc                 C   s   t | |S r%   )r   ZSentencePieceProcessor_IsByter   r   r   r   IsByte#  s    zSentencePieceProcessor.IsBytec                 C   s
   t | S r%   )r   ZSentencePieceProcessor_unk_idr<   r   r   r   unk_id&  s    zSentencePieceProcessor.unk_idc                 C   s
   t | S r%   )r   ZSentencePieceProcessor_bos_idr<   r   r   r   bos_id)  s    zSentencePieceProcessor.bos_idc                 C   s
   t | S r%   )r   ZSentencePieceProcessor_eos_idr<   r   r   r   eos_id,  s    zSentencePieceProcessor.eos_idc                 C   s
   t | S r%   )r   ZSentencePieceProcessor_pad_idr<   r   r   r   pad_id/  s    zSentencePieceProcessor.pad_idc                 C   s
   t | S r%   )r   Z-SentencePieceProcessor_serialized_model_protor<   r   r   r   serialized_model_proto2  s    z-SentencePieceProcessor.serialized_model_protoc                 C   s   t | |S r%   )r   Z#SentencePieceProcessor_LoadFromFiler   argr   r   r   LoadFromFile5  s    z#SentencePieceProcessor.LoadFromFilec	           	      C   s   t | ||||||||	S r%   )r   Z#SentencePieceProcessor__EncodeAsIds	r   rt   enable_sampling
nbest_sizealphaadd_bosadd_eosreverseemit_unk_piecer   r   r   _EncodeAsIds8  s    z#SentencePieceProcessor._EncodeAsIdsc	           	      C   s   t | ||||||||	S r%   )r   Z&SentencePieceProcessor__EncodeAsPiecesr   r   r   r   _EncodeAsPieces;  s    z&SentencePieceProcessor._EncodeAsPiecesc	           	      C   s   t | ||||||||	S r%   )r   Z/SentencePieceProcessor__EncodeAsSerializedProtor   r   r   r   _EncodeAsSerializedProto>  s    z/SentencePieceProcessor._EncodeAsSerializedProtoc	           	      C   s   t | ||||||||	S r%   )r   Z.SentencePieceProcessor__EncodeAsImmutableProtor   r   r   r   _EncodeAsImmutableProtoA  s    z.SentencePieceProcessor._EncodeAsImmutableProtoc
           
      C   s   t | |||||||||	
S r%   )r   Z(SentencePieceProcessor__EncodeAsIdsBatch
r   insnum_threadsr   r   r   r   r   r   r   r   r   r   _EncodeAsIdsBatchD  s    z(SentencePieceProcessor._EncodeAsIdsBatchc
           
      C   s   t | |||||||||	
S r%   )r   Z+SentencePieceProcessor__EncodeAsPiecesBatchr   r   r   r   _EncodeAsPiecesBatchG  s    z+SentencePieceProcessor._EncodeAsPiecesBatchc
           
      C   s   t | |||||||||	
S r%   )r   Z4SentencePieceProcessor__EncodeAsSerializedProtoBatchr   r   r   r   _EncodeAsSerializedProtoBatchJ  s    z4SentencePieceProcessor._EncodeAsSerializedProtoBatchc
           
      C   s   t | |||||||||	
S r%   )r   Z3SentencePieceProcessor__EncodeAsImmutableProtoBatchr   r   r   r   _EncodeAsImmutableProtoBatchM  s    z3SentencePieceProcessor._EncodeAsImmutableProtoBatchc                 C   s   t | |S r%   )r   Z!SentencePieceProcessor__DecodeIdsr   idsr   r   r   
_DecodeIdsP  s    z!SentencePieceProcessor._DecodeIdsc                 C   s   t | |S r%   )r   Z$SentencePieceProcessor__DecodePiecesr   rq   r   r   r   _DecodePiecesS  s    z$SentencePieceProcessor._DecodePiecesc                 C   s   t | |S r%   )r   Z2SentencePieceProcessor__DecodeIdsAsSerializedProtor   r   r   r   _DecodeIdsAsSerializedProtoV  s    z2SentencePieceProcessor._DecodeIdsAsSerializedProtoc                 C   s   t | |S r%   )r   Z5SentencePieceProcessor__DecodePiecesAsSerializedProtor   r   r   r   _DecodePiecesAsSerializedProtoY  s    z5SentencePieceProcessor._DecodePiecesAsSerializedProtoc                 C   s   t | |S r%   )r   Z1SentencePieceProcessor__DecodeIdsAsImmutableProtor   r   r   r   _DecodeIdsAsImmutableProto\  s    z1SentencePieceProcessor._DecodeIdsAsImmutableProtoc                 C   s   t | |S r%   )r   Z4SentencePieceProcessor__DecodePiecesAsImmutableProtor   r   r   r   _DecodePiecesAsImmutableProto_  s    z4SentencePieceProcessor._DecodePiecesAsImmutableProtoc                 C   s   t | ||S r%   )r   Z&SentencePieceProcessor__DecodeIdsBatchr   r   r   r   r   r   _DecodeIdsBatchb  s    z&SentencePieceProcessor._DecodeIdsBatchc                 C   s   t | ||S r%   )r   Z7SentencePieceProcessor__DecodeIdsAsSerializedProtoBatchr   r   r   r    _DecodeIdsAsSerializedProtoBatche  s    z7SentencePieceProcessor._DecodeIdsAsSerializedProtoBatchc                 C   s   t | ||S r%   )r   Z6SentencePieceProcessor__DecodeIdsAsImmutableProtoBatchr   r   r   r   _DecodeIdsAsImmutableProtoBatchh  s    z6SentencePieceProcessor._DecodeIdsAsImmutableProtoBatchc                 C   s   t | ||S r%   )r   Z)SentencePieceProcessor__DecodePiecesBatchr   r   r   r   _DecodePiecesBatchk  s    z)SentencePieceProcessor._DecodePiecesBatchc                 C   s   t | ||S r%   )r   Z:SentencePieceProcessor__DecodePiecesAsSerializedProtoBatchr   r   r   r   #_DecodePiecesAsSerializedProtoBatchn  s    z:SentencePieceProcessor._DecodePiecesAsSerializedProtoBatchc                 C   s   t | ||S r%   )r   Z9SentencePieceProcessor__DecodePiecesAsImmutableProtoBatchr   r   r   r   "_DecodePiecesAsImmutableProtoBatchq  s    z9SentencePieceProcessor._DecodePiecesAsImmutableProtoBatchc              	   C   s   t | ||||||S r%   )r   Z(SentencePieceProcessor__NBestEncodeAsIdsr   rt   r   r   r   r   r   r   r   r   _NBestEncodeAsIdst  s    z(SentencePieceProcessor._NBestEncodeAsIdsc              	   C   s   t | ||||||S r%   )r   Z+SentencePieceProcessor__NBestEncodeAsPiecesr   r   r   r   _NBestEncodeAsPiecesw  s    z+SentencePieceProcessor._NBestEncodeAsPiecesc              	   C   s   t | ||||||S r%   )r   Z4SentencePieceProcessor__NBestEncodeAsSerializedProtor   r   r   r   _NBestEncodeAsSerializedProtoz  s    z4SentencePieceProcessor._NBestEncodeAsSerializedProtoc              	   C   s   t | ||||||S r%   )r   Z3SentencePieceProcessor__NBestEncodeAsImmutableProtor   r   r   r   _NBestEncodeAsImmutableProto}  s    z3SentencePieceProcessor._NBestEncodeAsImmutableProtoc
           
      C   s   t | |||||||||	
S r%   )r   Z1SentencePieceProcessor__SampleEncodeAndScoreAsIds
r   rt   num_samplesr   worinclude_bestr   r   r   r   r   r   r   _SampleEncodeAndScoreAsIds  s    z1SentencePieceProcessor._SampleEncodeAndScoreAsIdsc
           
      C   s   t | |||||||||	
S r%   )r   Z4SentencePieceProcessor__SampleEncodeAndScoreAsPiecesr   r   r   r   _SampleEncodeAndScoreAsPieces  s    z4SentencePieceProcessor._SampleEncodeAndScoreAsPiecesc
           
      C   s   t | |||||||||	
S r%   )r   Z=SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProtor   r   r   r   &_SampleEncodeAndScoreAsSerializedProto  s    z=SentencePieceProcessor._SampleEncodeAndScoreAsSerializedProtoc
           
      C   s   t | |||||||||	
S r%   )r   Z<SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProtor   r   r   r   %_SampleEncodeAndScoreAsImmutableProto  s    z<SentencePieceProcessor._SampleEncodeAndScoreAsImmutableProtoc                 C   s   t | ||S r%   )r   Z(SentencePieceProcessor__CalculateEntropy)r   rt   r   r   r   r   _CalculateEntropy  s    z(SentencePieceProcessor._CalculateEntropyc                 C   s   t | |||S r%   )r   Z-SentencePieceProcessor__CalculateEntropyBatch)r   r   r   r   r   r   r   _CalculateEntropyBatch  s    z-SentencePieceProcessor._CalculateEntropyBatchNFg?c                 C   sX   t |  || _|| _|| _|| _|| _|| _|	| _|
| _|| _	|sF|rT| j
||d dS )a  Initialzie sentencepieceProcessor.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
          reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                      from the all hypothesis (lattice) using
                      forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and dropout probability of
               merge operations for BPE-dropout.
        num_threads: number of threads in batch processing (Default = -1, auto-detected)
      )
model_filemodel_protoN)$_sentencepiece_processor_init_native	_out_type_add_bos_add_eos_reverse_emit_unk_piece_enable_sampling_nbest_size_alpha_num_threadsLoad)r   r   r   out_typer   r   r   r   r   r   r   r   r   r   r   Init  s    "zSentencePieceProcessor.Initc                 C   s  |dkr| j }|dkr| j}|dkr*| j}|dkr8| j}|dkrF| j}|dkrT| j}|dkrb| j}|	dkrp| j}	|
dkr~| j}
|dkr|dks|dks|dks|	dkrt	d|
dkst
|
tk	rt	dt
|tkrp|tkr| ||
|||	||||	S |tkr| ||
|||	||||	S |dks2|dkrL| ||
|||	||||	S |d	krp| ||
|||	||||	S |tkr| ||||	||||S |tkr| ||||	||||S |dks|dkr| ||||	||||S |d	kr| ||||	||||S t	d
|dS )a~  Encode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
                 reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                    from the all hypothesis (lattice) using
                    forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and merge probability for
               BPE-dropout (probablity 'p' in BPE-dropout paper).
        num_threads: the number of threads used in the batch processing (Default = -1).
      NTr   r   a  When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. when "nbest_size = -1" , this method samples from all candidates on the lattice instead of nbest segmentations.num_threads must be intserialized_protorY   immutable_protozunknown out_type={})r   r   r   r   r   r   r   r   r   RuntimeErrorr   intlistr   rN   r   r   r   r   r   r   r   rC   )r   inputr   r   r   r   r   r   r   r   r   r   r   r   Encode  s         
        
    

    

    
    

    zSentencePieceProcessor.Encodec                 K   s   | j f |td|S Nr   r   r   rN   r   r   kwargsr   r   r   EncodeAsPieces   s    z%SentencePieceProcessor.EncodeAsPiecesc                 K   s   | j f |td|S r   r   r   r   r   r   r   EncodeAsIds$  s    z"SentencePieceProcessor.EncodeAsIdsc                 K   s   | j f |dd|S )Nr   r   r   r   r   r   r   EncodeAsSerializedProto(  s    z.SentencePieceProcessor.EncodeAsSerializedProtoc                 K   s   | j f |dd|S )Nr   r   r   r   r   r   r   EncodeAsImmutableProto,  s    z-SentencePieceProcessor.EncodeAsImmutableProtoc                 K   s   | j f |||tdd|S NTr   r   r   r   r   r   r   r   r   r   r   r   r   r   SampleEncodeAsPieces0  s     z+SentencePieceProcessor.SampleEncodeAsPiecesc                 K   s   | j f |||tdd|S r   r   r   r   r   r   SampleEncodeAsIds5  s     z(SentencePieceProcessor.SampleEncodeAsIdsc                 K   s   | j f |||ddd|S )Nr   Tr   r   r   r   r   r   SampleEncodeAsSerializedProto:  s     z4SentencePieceProcessor.SampleEncodeAsSerializedProtoc                 K   s   | j f |||ddd|S )Nr   Tr   r   r   r   r   r   SampleEncodeAsImmutableProto?  s     z3SentencePieceProcessor.SampleEncodeAsImmutableProtoc                    s   dkrj dkrjdkr*jdkr8jdkrFjdkrTjdkr`dfdd t|tkr fdd|D S  |S )a  NBestEncode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: nbest size
      Nr   r   c                    s   t kr|  S tkr8|  S dksHdkr\|  S dkrx|  S tdd S )Nr   rY   r   zunknown out_type)r   r   rN   r   r   r   r   rt   )r   r   r   r   r   r   r   r   r   _encodeh  s:                z3SentencePieceProcessor.NBestEncode.<locals>._encodec                    s   g | ]} |qS r   r   r_   nr   r   r   ra   y  s     z6SentencePieceProcessor.NBestEncode.<locals>.<listcomp>)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   NBestEncodeD  s$    z"SentencePieceProcessor.NBestEncodec                 K   s   | j f ||td|S Nr   r   r   )r   rN   r   r   r   r   r   r   r   NBestEncodeAsPieces~  s
    
z*SentencePieceProcessor.NBestEncodeAsPiecesc                 K   s   | j f ||td|S r   )r   r   r   r   r   r   NBestEncodeAsIds  s
    
z'SentencePieceProcessor.NBestEncodeAsIdsc                 K   s   | j f ||dd|S )Nr   r   r   r   r   r   r   NBestEncodeAsSerializedProto  s
    
z3SentencePieceProcessor.NBestEncodeAsSerializedProtoc                 K   s   | j f ||dd|S )Nr   r   r  r   r   r   r   NBestEncodeAsImmutableProto  s
    
z2SentencePieceProcessor.NBestEncodeAsImmutableProtoc              
      s   dkr	j dkr	jdkr*	jdkr8	jdkrF	jdkrRddkr^d
dkrjd
dkrvddkrtdr
std	
f
dd	 t|tkr҇ fd
d|D S  |S )a   SampleEncodeAndScore text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        num_samples: How many samples to return (Default = 1)
        alpha: inverse temperature for sampling
        wor: whether to sample without replacement (Default = false)
        include_best: whether to include the best tokenization, requires wor=True (Default = false)
      Nr   g      ?Fr   znum_examples must be positivez8When include_best is True, We must specify "wor = True".c                    s   t kr"| 	 	S tkrD| 	 	S dksTdkrn| 	 	S dkr| 	 	S tdd S )Nr   rY   r   zunknown output type)r   r   rN   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   r   r   r     s:                z<SentencePieceProcessor.SampleEncodeAndScore.<locals>._encodec                    s   g | ]} |qS r   r   r   r   r   r   ra     s     z?SentencePieceProcessor.SampleEncodeAndScore.<locals>.<listcomp>)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   SampleEncodeAndScore  s4    z+SentencePieceProcessor.SampleEncodeAndScorec                 K   s   | j f |||td|S Nr   r   r   r   )r  rN   r   r   r   r   r   r   r   r   SampleEncodeAndScoreAsPieces  s
    z3SentencePieceProcessor.SampleEncodeAndScoreAsPiecesc                 K   s   | j f |||td|S r  )r  r   r  r   r   r   SampleEncodeAndScoreAsIds  s
    z0SentencePieceProcessor.SampleEncodeAndScoreAsIdsc                 K   s   | j f |||dd|S )Nr   r  r  r  r   r   r   %SampleEncodeAndScoreAsSerializedProto  s
    z<SentencePieceProcessor.SampleEncodeAndScoreAsSerializedProtoc                 K   s   | j f |||dd|S )Nr   r  r
  r  r   r   r   $SampleEncodeAndScoreAsImmutableProto  s
    z;SentencePieceProcessor.SampleEncodeAndScoreAsImmutableProtoc                 C   s  |dkr| j }|dks"t|tk	r*td|s2dS |tkrt|tkrT| |gS t|tkrl| |gS t|tkrt|dkst|d tkr| |S t|d tkr| |S t|d tkrt|d dkst|d d tkr| 	||S t|d d tkr| 
||S |dkrt|tkrB| |gS t|tkr\| |gS t|tkrt|dkst|d tkr| |S t|d tkr| |S t|d tkrt|d dkst|d d tkr| ||S t|d d tkr| ||S |dkrt|tkr<| |gS t|tkrV| |gS t|tkrt|dkst|d tkr| |S t|d tkr| |S t|d tkrt|d dkst|d d tkr| ||S t|d d tkr| ||S tddS )zDecode processed id or token sequences.

      Args:
        out_type: output type. str or 'serialized_proto' or 'immutable_proto' (Default = str)
        num_threads: the number of threads used in the batch processing (Default = -1).
      Nr   r   r   r   r   zunknown output or input type)r   r   r   r   rN   r   r   r   rZ   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   Decode  sj    


$
 

(
 

(zSentencePieceProcessor.Decodec                 K   s   | j f ||d|S r   r  r   r   r   r   r   r   r   DecodePieces@  s    z#SentencePieceProcessor.DecodePiecesc                 K   s   | j f ||d|S r   r  r  r   r   r   	DecodeIdsD  s    z SentencePieceProcessor.DecodeIdsr   c                 K   s   | j f ||d|S r   r  r  r   r   r   DecodePiecesAsSerializedProtoH  s    z4SentencePieceProcessor.DecodePiecesAsSerializedProtoc                 K   s   | j f ||d|S r   r  r  r   r   r   DecodeIdsAsSerializedProtoL  s    z1SentencePieceProcessor.DecodeIdsAsSerializedProtor   c                 K   s   | j f ||d|S r   r  r  r   r   r   DecodePiecesAsImmutableProtoP  s    z3SentencePieceProcessor.DecodePiecesAsImmutableProtoc                 K   s   | j f ||d|S r   r  r  r   r   r   DecodeIdsAsImmutableProtoT  s    z0SentencePieceProcessor.DecodeIdsAsImmutableProtoc                 C   sP   t |tkrD|dkr| j}|dks.t |tk	r6td| |||S | ||S )zCalculate sentence entropyNr   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   X  s    c                 C   s   |   S r%   r   r<   r   r   r   
piece_sized  s    z!SentencePieceProcessor.piece_sizec                 C   s   |   S r%   r  r<   r   r   r   
vocab_sizeh  s    z!SentencePieceProcessor.vocab_sizec                 C   s   |   S r%   )r   r<   r   r   r   __getstate__l  s    z#SentencePieceProcessor.__getstate__c                 C   s   |    | | d S r%   )r=   r}   )r   r   r   r   r   __setstate__p  s    z#SentencePieceProcessor.__setstate__c                 C   s   |   S r%   r  r<   r   r   r   r]   u  s    zSentencePieceProcessor.__len__c                 C   s
   |  |S r%   )r   r   r   r   r   rh   y  s    z"SentencePieceProcessor.__getitem__c                 C   s(   |r|rt d|r| |S | |S )a  Overwride SentencePieceProcessor.Load to support both model_file and model_proto.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto. Either `model_file`
          or `model_proto` must be set.
      z-model_file and model_proto must be exclusive.)r   r}   r   )r   r   r   r   r   r   r   }  s
    
zSentencePieceProcessor.Load)	NNNNNNNNN)NN)NN)NN)NN)NNNNNN)N)N)N)N)	NNNNNNNNN)NN)NN)NN)NN)r   )r   )r   )r   )N)NN)br   r   r.   r   r   r   r
   r=   r   Zdelete_SentencePieceProcessorrP   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r	  r  r  rN   r  r  r  r  r  r  r  r  r  r  r  r]   rh   r   r   r   r   r   r|      s   
2         
^



      
:



         
M



M




r|   c                 C   s
   t | S r%   )r   SetRandomGeneratorSeed)seedr   r   r   r    s    r  c                   @   s   e Zd Zedd dd ddZdd ZeZedd	 Z	ed
d Z
edd Zedd Zedd ZedddZedddZdS )SentencePieceTrainerc                 C   s
   | j  S r%   r2   r3   r   r   r   r5     r6   zSentencePieceTrainer.<lambda>c                 C   s   | j |S r%   r2   r7   r   r   r   r5     r6   r9   r:   c                 O   s   t dd S )NzNo constructor defined)r   )r   r   r   r   r   r   r=     s    zSentencePieceTrainer.__init__c                 C   s
   t | S r%   r   %SentencePieceTrainer__TrainFromStringr   r   r   r   _TrainFromString  s    z%SentencePieceTrainer._TrainFromStringc                 C   s
   t | S r%   r   "SentencePieceTrainer__TrainFromMapr   r   r   r   _TrainFromMap  s    z"SentencePieceTrainer._TrainFromMapc                 C   s   t | |S r%   r   #SentencePieceTrainer__TrainFromMap2r   iterr   r   r   _TrainFromMap2  s    z#SentencePieceTrainer._TrainFromMap2c                 C   s
   t | S r%   r   #SentencePieceTrainer__TrainFromMap3r$  r   r   r   _TrainFromMap3  s    z#SentencePieceTrainer._TrainFromMap3c                 C   s   t | |S r%   r   #SentencePieceTrainer__TrainFromMap4r(  r   r   r   _TrainFromMap4  s    z#SentencePieceTrainer._TrainFromMap4Nc           	      K   s   | dk	rt | tkrt| S dd }d}d}i }| D ]0\}}|dkrP|}q:|dkr^|}q:||||< q:|r|rt||}n
t|}|| n|rt||S t	|S dS )zDTrain Sentencepiece model. Accept both kwargs and legacy string arg.Nc                 S   s^   t | tkrRtjd dkr"t }nt }tj|dd}|dd | D  |	 S t
| S dS )zEncode value to CSV..r      r   )lineterminatorc                 S   s   g | ]}t |qS r   )rN   )r_   r8   r   r   r   ra     s     z@SentencePieceTrainer._Train.<locals>._encode.<locals>.<listcomp>N)r   r   sysr   StringIOBytesIOcsvwriterwriterowgetvaluerN   )r   fr7  r   r   r   r     s    z,SentencePieceTrainer._Train.<locals>._encode)sentence_iteratorZsentence_reader)model_writer)
r   rN   r  r!  itemsr0  r-  writer*  r%  )	r   r   r   r;  r<  Z
new_kwargskeyr   r   r   r   r   _Train  s.    


zSentencePieceTrainer._Trainc              	   K   s.   t |d tjf d| i| W 5 Q R X d S )N)ostreamr   )
_LogStreamr  r@  )r   Z	logstreamr   r   r   r   Train  s    zSentencePieceTrainer.Train)N)NN)r   r   r.   r   r   r=   r   r
   staticmethodr!  r%  r*  r-  r0  r@  rC  r   r   r   r   r    s"   




,r  c                 C   s
   t | S r%   r  r   r   r   r   r    s    r  c                 C   s
   t | S r%   r"  r$  r   r   r   r#    s    r#  c                 C   s   t | |S r%   r&  r(  r   r   r   r'    s    r'  c                 C   s
   t | S r%   r+  r$  r   r   r   r,    s    r,  c                 C   s   t | |S r%   r.  r(  r   r   r   r/    s    r/  )r4  )r5  c                 C   sh   i }| j  D ]6\}}td|rtdd| dd}|||< q| D ]\}}t| || qNdS )z1Added snake_cased method from CammelCased method.z^[A-Z]+z(?<!^)(?=[A-Z])_Zn_bestZnbestN)r'   r=  rematchsublowerreplacesetattr)	classnameZ	snake_mapkr8   Zsnaker   r   r   _add_snake_case  s     
rN  c                    s4   t | |dfdd  fdd}t| || dS )z4Enables batch request for the method classname.name.Nc                    s2   t |tkr(|dk s ||  kr(td | |S )Nr   zpiece id is out of range.)r   r   r  rg   )r8   r   )funcr   r   _func  s     z_batchnize.<locals>._funcc                    s.   t |tkr  fdd|D S  |S d S )Nc                    s   g | ]} |qS r   r   r   )rP  r   r   r   ra     s     z5_batchnize.<locals>._batched_func.<locals>.<listcomp>)r   r   r   )rP  r<   r   _batched_func  s    z!_batchnize.<locals>._batched_func)r   rK  )rL  r   rQ  r   )rP  rO  r   
_batchnize	  s    rR  r=   )r   r   r   r   r   r   r   )__version__c                   @   s&   e Zd ZdddZdd Zdd ZdS )	rB  Nc                 C   s    || _ | j d k	rtj | _d S r%   )rA  r3  stderrfilenoorig_stream_fileno)r   rA  r   r   r   r=   -  s    
z_LogStream.__init__c                 C   s0   | j d k	r,t| j| _t| j  | j d S r%   )rA  osduprV  orig_stream_dupdup2rU  r<   r   r   r   	__enter__2  s    
z_LogStream.__enter__c                 C   s@   | j d k	r<t| j t| j| j t| j | j   d S r%   )rA  rW  closerV  rZ  rY  )r   r   r   	tracebackr   r   r   __exit__7  s
    
z_LogStream.__exit__)N)r   r   r.   r=   r[  r^  r   r   r   r   rB  ,  s   
rB  )7r3  r   Z_swig_python_version_infor   __package__r   r   r   builtinsr   ImportErrorr   r!   r$   r,   r   r-   objectr1   Z>ImmutableSentencePieceText_ImmutableSentencePiece_swigregisterrQ   Z'ImmutableSentencePieceText_swigregisterrv   Z,ImmutableNBestSentencePieceText_swigregisterr|   Z#SentencePieceProcessor_swigregisterr  r  Z!SentencePieceTrainer_swigregisterr  r#  r'  r,  r/  rF  r6  rW  ior4  r5  rN  rR  r=   r   rK  r   r   ZTokenizer  Z
DetokenizemZset_random_generator_seed_versionrS  rB  r   r   r   r   <module>   sl   	/
D
8
     !
O
