
    \j                         d Z ddlmZmZ ddlmZmZmZ  G d de      Z G d de      Z	 G d d	e	      Z
 G d
 de      Z G d de      Z G d de      Z G d de      Z G d de      Zy)zLanguage Models    )LanguageModel	Smoothing)AbsoluteDiscounting	KneserNey
WittenBellc                       e Zd ZdZddZy)MLEzbClass for providing MLE ngram model scores.

    Inherits initialization from BaseNgramModel.
    Nc                 B    | j                  |      j                  |      S )zReturns the MLE score for a word given a context.

        Args:
        - word is expected to be a string
        - context is expected to be something reasonably convertible to a tuple
        )context_countsfreq)selfwordcontexts      8/root/env/lib/python3.12/site-packages/nltk/lm/models.pyunmasked_scorezMLE.unmasked_score   s      ""7+0066    N)__name__
__module____qualname____doc__r    r   r   r	   r	      s    
7r   r	   c                   *     e Zd ZdZ fdZddZ xZS )LidstonezProvides Lidstone-smoothed scores.

    In addition to initialization arguments from BaseNgramModel also requires
    a number by which to increase the counts, gamma.
    c                 2    t        |   |i | || _        y r   )super__init__gamma)r   r   argskwargs	__class__s       r   r   zLidstone.__init__%       $)&)
r   c                     | j                  |      }||   }|j                         }|| j                  z   |t        | j                        | j                  z  z   z  S )ztAdd-one smoothing: Lidstone or Laplace.

        To see what kind, look at `gamma` attribute on the class.

        )r   Nr   lenvocab)r   r   r   counts
word_count
norm_counts         r   r   zLidstone.unmasked_score)   sR     $$W-D\
XXZ
TZZ'JTZZ4::9U,UVVr   r   r   r   r   r   r   r   __classcell__r!   s   @r   r   r      s    	Wr   r   c                   "     e Zd ZdZ fdZ xZS )LaplacezwImplements Laplace (add one) smoothing.

    Initialization identical to BaseNgramModel because gamma is always 1.
    c                 ,    t        |   dg|i | y )N   )r   r   )r   r   r    r!   s      r   r   zLaplace.__init__;   s    ,T,V,r   r   r   r   r   r   r+   r,   s   @r   r.   r.   5   s    
- -r   r.   c                   ,     e Zd ZdZd fd	ZddZ xZS )StupidBackoffa8  Provides StupidBackoff scores.

    In addition to initialization arguments from BaseNgramModel also requires
    a parameter alpha with which we scale the lower order probabilities.
    Note that this is not a true probability distribution as scores for ngrams
    of the same order do not sum up to unity.
    c                 2    t        |   |i | || _        y r   )r   r   alpha)r   r5   r   r    r!   s       r   r   zStupidBackoff.__init__H   r"   r   c                 >   |r#| j                   dz
  }t        |      |kD  r|| d  }|s%| j                  j                  j	                  |      S | j                  |      }||   }|j                         }|dkD  r||z  S | j                  | j                  ||dd        z  S )Nr0   r   )	orderr%   r'   unigramsr   r   r$   r5   r   )r   r   r   max_ctxr'   r(   r)   s          r   r   zStupidBackoff.unmasked_scoreL   s    jj1nG7|g%!7(),;;'',,T22$$W-D\
XXZ
>
**:: 3 3D'!"+ FFFr   )g?r   r*   r,   s   @r   r3   r3   ?   s    Gr   r3   c                   *     e Zd ZdZ fdZddZ xZS )InterpolatedLanguageModelzLogic common to all interpolated language models.

    The idea to abstract this comes from Chen & Goodman 1995.
    Do not instantiate this class directly!
    c                     |j                  di       }t        |   |fi |  || j                  | j                  fi || _        y )Nparams)popr   r   r&   r'   	estimator)r   smoothing_clsr7   r    r=   r!   s        r   r   z"InterpolatedLanguageModel.__init__g   s@    Hb))&)&tzz4;;I&Ir   c                 $   |r#| j                   dz
  }t        |      |kD  r|| d  }|s| j                  j                  |      S | j                  |   sd\  }}n| j                  j                  ||      \  }}||| j                  ||dd        z  z   S )Nr0   )r   r0   )r7   r%   r?   unigram_scorer'   alpha_gammar   )r   r   r   r9   r5   r   s         r   r   z(InterpolatedLanguageModel.unmasked_scorel   s    jj1nG7|g%!7(),>>//55{{7#  LE5>>55dGDLE5ut224EEEEr   r   r*   r,   s   @r   r;   r;   `   s    J
Fr   r;   c                   "     e Zd ZdZ fdZ xZS )WittenBellInterpolatedz.Interpolated version of Witten-Bell smoothing.c                 0    t        |   t        |fi | y r   )r   r   r   )r   r7   r    r!   s      r   r   zWittenBellInterpolated.__init__   s    U5f5r   r1   r,   s   @r   rE   rE      s    86 6r   rE   c                   $     e Zd ZdZd fd	Z xZS )AbsoluteDiscountingInterpolatedz9Interpolated version of smoothing with absolute discount.c                 8    t        |   t        |fdd|ii| y )Nr=   discount)r   r   r   r   r7   rJ   r    r!   s       r   r   z(AbsoluteDiscountingInterpolated.__init__   s*    	
0:H/E	
IO	
r   )g      ?r1   r,   s   @r   rH   rH      s    C
 
r   rH   c                   $     e Zd ZdZd fd	Z xZS )KneserNeyInterpolatedz-Interpolated version of Kneser-Ney smoothing.c                 ~    d|cxk  rdk  st        d       t        d      t        |   t        |fd||di| y )Nr   r0   zCDiscount must be between 0 and 1 for probabilities to sum to unity.r=   )rJ   r7   )
ValueErrorr   r   r   rK   s       r   r   zKneserNeyInterpolated.__init__   s^    X""U  #U  	u	
2:U%K	
OU	
r   )g?r1   r,   s   @r   rM   rM      s    7
 
r   rM   N)r   nltk.lm.apir   r   nltk.lm.smoothingr   r   r   r	   r   r.   r3   r;   rE   rH   rM   r   r   r   <module>rR      s~     0 H H7- 7 W} W.-h -GM GBF FB66 6
&? 


5 

r   