U
    &cJq                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlmZ e	e
Zdd Zdd Zdd	 Zd
d Zdd Zdd Zd*ddZdd Zdd Zdd Zdd Zdd Zd+ddZd,d d!Zd"d# Zd$d% Zd&d' Zd(d) ZdS )-a   Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was
modified by XLNet authors to update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and
plot precision-recall curves if an additional na_prob.json file is provided.
This file is expected to map question ID's to the model's predicted probability
that a question is unanswerable.
    N)BasicTokenizerc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dt j}t |d| S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregex r
   K/tmp/pip-unpacked-wheel-ymerj3tt/transformers/data/metrics/squad_metrics.pyremove_articles   s    z)normalize_answer.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinsplitr   r
   r
   r   white_space_fix   s    z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s   | ]}| kr|V  qd S Nr
   ).0chexcluder
   r   	<genexpr>$   s      z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r   r
   r   r   remove_punc"   s    
z%normalize_answer.<locals>.remove_puncc                 S   s   |   S r   )lowerr   r
   r
   r   r   &   s    znormalize_answer.<locals>.lowerr
   )sr   r   r   r   r
   r
   r   normalize_answer   s
    r   c                 C   s   | sg S t |  S r   )r   r   )r   r
   r
   r   
get_tokens,   s    r   c                 C   s   t t| t|kS r   )intr   )a_golda_predr
   r
   r   compute_exact2   s    r#   c           	      C   s   t | }t |}t|t|@ }t| }t|dksHt|dkrTt||kS |dkr`dS d| t| }d| t| }d| | ||  }|S )Nr         ?   )r   collectionsCountersumvalueslenr    )	r!   r"   Z	gold_toksZ	pred_tokscommonZnum_sameZ	precisionZrecallf1r
   r
   r   
compute_f16   s    r-   c                    s   i }i }| D ]v}|j }dd |jD }|s0dg}||krFtd|  q||  t fdd|D ||< t fdd|D ||< q||fS )zV
    Computes the exact and f1 scores from the examples and the model predictions
    c                 S   s    g | ]}t |d  r|d  qS r   )r   )r   Zanswerr
   r
   r   
<listcomp>O   s      z"get_raw_scores.<locals>.<listcomp>r   zMissing prediction for %sc                 3   s   | ]}t | V  qd S r   )r#   r   aZ
predictionr
   r   r   Z   s     z!get_raw_scores.<locals>.<genexpr>c                 3   s   | ]}t | V  qd S r   )r-   r/   r1   r
   r   r   [   s     )qas_idanswersprintmax)examplespredsexact_scores	f1_scoresexampler2   Zgold_answersr
   r1   r   get_raw_scoresF   s    r;   c                 C   sF   i }|   D ]4\}}|| |k}|r8t||  ||< q|||< q|S r   )itemsfloat)scoresna_probsqid_to_has_ansZna_prob_threshZ
new_scoresqidr   Zpred_nar
   r
   r   apply_no_ans_threshold`   s    
rB   c              	      s   |sHt  }tddt   | fddt  | fd|fgS t |}tddt fdd|D  | fddtfdd|D  | fd|fgS d S )Nexact      Y@r,   totalc                 3   s   | ]} | V  qd S r   r
   r   k)r8   r
   r   r   y   s     z!make_eval_dict.<locals>.<genexpr>c                 3   s   | ]} | V  qd S r   r
   rF   )r9   r
   r   r   z   s     )r*   r&   OrderedDictr(   r)   )r8   r9   qid_listrE   r
   )r8   r9   r   make_eval_dictk   s      rJ   c                 C   s"   |D ]}|| | d||f < qd S )Nz%s_%sr
   )	main_evalZnew_evalprefixrG   r
   r
   r   
merge_eval   s    rM   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|krPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	\}}|D ],}
|
 sq|d
7 }|
|krq|||
 7 }qd| t| |d| | fS )Nc                 3   s   | ]} | sd V  qdS    Nr
   rF   r@   r
   r   r      s      z&find_best_thresh_v2.<locals>.<genexpr>        c                    s    |  S r   r
   rG   r?   r
   r   <lambda>       z%find_best_thresh_v2.<locals>.<lambda>keyr   )r   r   rO   rD   r$   r(   sorted	enumerater*   )r7   r>   r?   r@   
num_no_ans	cur_score
best_scorebest_threshrI   irA   diffZhas_ans_scoreZhas_ans_cntr
   r?   r@   r   find_best_thresh_v2   s4    

rc   c                 C   s\   t ||||\}}}t ||||\}	}
}|| d< || d< |	| d< |
| d< || d< || d< d S )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)rc   )rK   r7   	exact_rawf1_rawr?   r@   rd   exact_threshrh   rf   	f1_threshri   r
   r
   r   find_all_best_thresh_v2   s    rn   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|krPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	| t| |fS )
Nc                 3   s   | ]} | sd V  qdS rN   r
   rF   rP   r
   r   r      s      z#find_best_thresh.<locals>.<genexpr>rQ   c                    s    |  S r   r
   rR   rS   r
   r   rT      rU   z"find_best_thresh.<locals>.<lambda>rV   rX   r   rD   rY   )r7   r>   r?   r@   r\   r]   r^   r_   rI   _rA   ra   r
   rb   r   find_best_thresh   s$    

rp   c           
      C   sH   t ||||\}}t ||||\}}	|| d< || d< || d< |	| d< d S )Nrd   re   rf   rg   )rp   )
rK   r7   rj   rk   r?   r@   rd   rl   rf   rm   r
   r
   r   find_all_best_thresh   s    rq   r$   c                 C   s   dd | D }dd |  D }dd |  D }|d krHdd |D }t| |\}}t||||}	t||||}
t|	|
}|rt|	|
|d}t||d |rt|	|
|d}t||d	 |rt|||||| |S )
Nc                 S   s   i | ]}|j t|jqS r
   )r2   boolr3   )r   r:   r
   r
   r   
<dictcomp>   s      z"squad_evaluate.<locals>.<dictcomp>c                 S   s   g | ]\}}|r|qS r
   r
   r   r2   Z
has_answerr
   r
   r   r.      s      z"squad_evaluate.<locals>.<listcomp>c                 S   s   g | ]\}}|s|qS r
   r
   rt   r
   r
   r   r.      s      c                 S   s   i | ]
}|d qS )rQ   r
   rF   r
   r
   r   rs      s      )rI   ZHasAnsZNoAns)r<   r;   rB   rJ   rM   rq   )r6   r7   Zno_answer_probsZno_answer_probability_thresholdZqas_id_to_has_answerZhas_answer_qidsZno_answer_qidsrC   r,   Zexact_thresholdZf1_thresholdZ
evaluationZhas_ans_evalZno_ans_evalr
   r
   r   squad_evaluate   s.       
ru   Fc                 C   sR  dd }t |d}d||}|| }|dkrN|rJtd| |f  |S |t|  d }||\}	}
||\}}t|	t|kr|rtd|	| |S i }| D ]\}}|||< qd	}||kr|| }||
kr|
| }|d	kr|rtd
 |S d	}||kr || }||
kr |
| }|d	kr>|r:td |S |||d  }|S )z;Project the tokenized prediction back to the original text.c                 S   sP   g }t  }t| D ](\}}|dkr&q||t|< || qd|}||fS )Nr   r   )r&   rH   r[   r*   appendr   )r   Zns_charsZns_to_s_mapr`   cZns_textr
   r
   r   _strip_spaces  s    
z%get_final_text.<locals>._strip_spaces)do_lower_caser   rX   z!Unable to find text: '%s' in '%s'rO   z5Length not equal after stripping spaces: '%s' vs '%s'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor*   r<   )Z	pred_text	orig_textry   verbose_loggingrx   	tokenizertok_textZstart_positionZend_positionZorig_ns_textZorig_ns_to_s_mapZtok_ns_textZtok_ns_to_s_mapZtok_s_to_ns_mapr`   Z	tok_indexZorig_start_positionZns_start_positionZorig_end_positionZns_end_positionZoutput_textr
   r
   r   get_final_text   sL    







r   c                 C   sN   t t| dd dd}g }tt|D ]"}||kr6 qJ||| d  q&|S )z"Get the n-best logits from a list.c                 S   s   | d S )NrO   r
   xr
   r
   r   rT   R  rU   z#_get_best_indexes.<locals>.<lambda>TrW   reverser   )rZ   r[   ranger*   rv   )Zlogitsn_best_sizeZindex_and_scoreZbest_indexesr`   r
   r
   r   _get_best_indexesP  s    r   c                 C   s|   | sg S d}| D ]}|dks$||kr|}qg }d}| D ]$}t || }|| ||7 }q6g }|D ]}|||  qd|S )z,Compute softmax probability over raw logits.NrQ   )mathexprv   )r>   Z	max_scoreZscoreZ
exp_scoresZ	total_sumr   probsr
   r
   r   _compute_softmax\  s     

r   c           8      C   sd  |rt d|  |r(t d|  |r@|
r@t d|  tt}|D ]}||j | qNi }|D ]}|||j< qltdddddd	g}t	 }t	 }t	 }t
| D ]\}}|| }g }d
}d}d}d}t
|D ]*\}}||j }t|j|}t|j|}|
rL|jd |jd  } | |k rL| }|}|jd }|jd }|D ]}!|D ]}"|!t|jkrpqX|"t|jkrqX|!|jkrqX|"|jkrqX|j|!dsqX|"|!k rƐqX|"|! d }#|#|krqX||||!|"|j|! |j|" d qXqPq|
r,|||dd||d t|dd dd}tdddd	g}$i }%g }&|D ]}'t|&|krt qB||'j }|'jdkr|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|, },d|+}-t|,|-||	}.|.|%krq\d|%|.< nd}.d|%|.< |&|$|.|'j|'jd q\|
rd|%krf|&|$d||d t|&dkr|&d|$dddd |&s|&|$dddd t|&dkst g }/d}0|&D ]*}1|/|1j|1j  |0s|1j!r|1}0qt"|/}2g }3t
|&D ]F\}4}1t	 }5|1j!|5d< |2|4 |5d< |1j|5d< |1j|5d	< |3|5 qt|3dks\t |
sv|3d d ||j#< n<||0j |0j }6|6||j#< |6|krd||j#< n|0j!||j#< |3||j#< q|rt$|d}7|7%t&j'|ddd  W 5 Q R X |r&t$|d}7|7%t&j'|ddd  W 5 Q R X |r`|
r`t$|d}7|7%t&j'|ddd  W 5 Q R X |S ) zHWrite final predictions to the json file and log-odds of null if needed.zWriting predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPredictionfeature_indexstart_index	end_indexstart_logit	end_logit@B r   FrO   )r   r   r   r   r   c                 S   s   | j | j S r   )r   r   r   r
   r
   r   rT     rU   z,compute_predictions_logits.<locals>.<lambda>Tr   NbestPredictionr   r   r   )r   r   r   emptyrQ   Nprobabilityw   indent
)(r|   r}   r&   defaultdictlistexample_indexrv   	unique_id
namedtuplerH   r[   r   start_logits
end_logitsr*   tokenstoken_to_orig_maptoken_is_max_contextgetrZ   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insertAssertionErrorr   r   r2   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthry   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativeZnull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   r:   featuresprelim_predictions
score_nullZmin_null_feature_indexZnull_start_logitZnull_end_logitr   Zstart_indexesZend_indexesZfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r~   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonr`   output
score_diffwriterr
   r
   r   compute_predictions_logitss  s   
 





		 

	








"""r   c           8      C   s  t ddddddg}t ddddg}td	| t t}|D ]}||j | qBi }|D ]}|||j< q`t 	 }t 	 }t 	 }t
| D ]\}}|| }g }d
}t
|D ]\}}||j }|j}t||}t|D ]}t|	D ]}|j| }|j| } ||	 | }!|j|! }"|j|! }#| |jd kr2q|#|jd krDq|j| dsVq|#| k rbq|#|  d }$|$|krzq|||| |#||"d qqqt|dd dd}i }%g }&|D ]}'t|&|kr q||'j }|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|,  },d|+}-t!|drV|j"}.n|j#}.t$|,|-|.|}/|/|%krxqd|%|/< |&||/|'j%|'j&d q|&s|&|dddd g }0d}1|&D ]"}2|0|2j%|2j&  |1s|2}1qt'|0}3g }4t
|&D ]F\}}2t 	 }5|2j(|5d< |3| |5d< |2j%|5d< |2j&|5d< |4|5 qt|4dksTt)|1dk	sbt)|}6|6||j*< |1j(||j*< |4||j*< qt+|d}7|7,t-j.|ddd  W 5 Q R X t+|d}7|7,t-j.|ddd  W 5 Q R X |
rt+|d}7|7,t-j.|ddd  W 5 Q R X |S )z XLNet write prediction logic (more complex than Bert's).
        Write final predictions to the json file and log-odds of null if needed.

        Requires utils_squad_evaluate.py
    r   r   r   r   start_log_probend_log_probr   r   zWriting predictions to: %sr   rO   F)r   r   r   r   r   c                 S   s   | j | j S r   )r   r   r   r
   r
   r   rT     rU   z/compute_predictions_log_probs.<locals>.<lambda>Tr   r   ry   )r   r   r   r   g    .Nr   r   r   r   r   )/r&   r   r|   r}   r   r   r   rv   r   rH   r[   Z
cls_logitsminr   r   Zstart_top_indexr   Zend_top_indexZparagraph_lenr   r   rZ   r*   r   r   r   r   r   r   r   r   r   r   hasattrry   Zdo_lowercase_and_remove_accentr   r   r   r   r   r   r2   r   r   r   r   )8r   r   r   r   r   r   r   r   Zstart_n_topZ	end_n_topr   r   r   r   r   r   r   r   r   r   r   r   r   r:   r   r   r   r   Zcur_null_scorer`   jr   r   Zj_indexr   r   r   r   r   r   r   r   r   r   r   r~   ry   r   r   r   r   r   r   r   r   r   r
   r
   r   compute_predictions_log_probs@  s      










  







"""r   )N)Nr$   )F)__doc__r&   r   loggingr   r   r   Ztransformers.tokenization_bertr   	getLogger__name__r|   r   r   r#   r-   r;   rB   rJ   rM   rc   rn   rp   rq   ru   r   r   r   r   r   r
   r
   r
   r   <module>   s6   


"


^ N