
    \j1                     J   d Z ddlZddlZddlZddlZddlmZ d Z e       Z	ej                  j                  ej                  j                  d      dd      Zej                  j                  ej                  j                  d      d      Zd	Zd
 Z G d dej"                        Z G d dej"                        Z G d dej"                        Zd Z ede	      Z ede      Z ede      Z ej2                   ee      de        G d dej"                               Zy)z
Unit tests for VerbnetCorpusReader, covering version support (2.1, 3.2, 3.3)
and the longid/shortid bug fix for dash-style numeric identifiers.
    N)VerbnetCorpusReaderc                      t         j                  j                  D ]F  } t        j                  j	                  | dd      }t        j                  j                  |      sD|c S  y)z7Search nltk.data.path for the verbnet corpus directory.corporaverbnetN)nltkdatapathosjoinisdir)
search_dir	candidates     E/root/env/lib/python3.12/site-packages/nltk/test/unit/test_verbnet.py_find_verbnet_in_nltk_datar      sJ    iinn 
GGLLY	B	77==#     ~	Downloadsr   z
verbnet3.3z(?!\.).*\.xmlc                     | d uxrF t         j                  j                  |       xr% t        d t        j                  |       D              S )Nc              3   >   K   | ]  }|j                  d         yw)z.xmlN)endswith).0fs     r   	<genexpr>z$_corpus_available.<locals>.<genexpr>'   s     =q

6"=s   )r
   r	   r   anylistdir)r	   s    r   _corpus_availabler   #   s@    D 	>GGMM$	>=BJJt,<==r   c                   v    e Zd ZdZej
                  Zej                  Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zy)TestRegexPatternsz)Test _LONGID_RE and _SHORTID_RE directly.c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y )Nconfess-37.10   confess   37.10	longid_rematchgroupselfms     r   test_longid_simplez$TestRegexPatterns.test_longid_simple5   s@    NN  1QWWQZ9,w1FFF1Fr   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y )Nanimal_sounds-38r!   animal_soundsr#   38r%   r)   s     r   test_longid_underscorez(TestRegexPatterns.test_longid_underscore9   sA    NN  !34QWWQZ?2qwwqzT7III7Ir   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y )N	act-114-1r!   actr#   114-1r%   r)   s     r   test_longid_with_dash_shortidz/TestRegexPatterns.test_longid_with_dash_shortid=   s@    NN  -QWWQZ5(QWWQZ7-BBB-Br   c                 >    | j                   j                  d      J y)z1The bug: '114-1' must NOT be matched as a longid.r5   Nr&   r'   r*   s    r   "test_longid_rejects_numeric_prefixz4TestRegexPatterns.test_longid_rejects_numeric_prefixA   s    ~~##G,444r   c                 >    | j                   j                  d      J y Nr$   r8   r9   s    r   test_longid_rejects_pure_digitsz1TestRegexPatterns.test_longid_rejects_pure_digitsE   s    ~~##G,444r   c                 >    | j                   j                  d      sJ y r<   
shortid_rer'   r9   s    r   test_shortid_dottedz%TestRegexPatterns.test_shortid_dottedI       $$W---r   c                 >    | j                   j                  d      sJ y )Nr5   r?   r9   s    r   test_shortid_dashedz%TestRegexPatterns.test_shortid_dashedL   rB   r   c                 >    | j                   j                  d      sJ y )N
22.2-3-1-1r?   r9   s    r   test_shortid_complexz&TestRegexPatterns.test_shortid_complexO   s    $$\222r   c                 >    | j                   j                  d      J y )Nr    r?   r9   s    r   test_shortid_rejects_alphaz,TestRegexPatterns.test_shortid_rejects_alphaR   s    $$_5===r   N)__name__
__module____qualname____doc__r   
_LONGID_REr&   _SHORTID_REr@   r,   r1   r6   r:   r=   rA   rD   rG   rI    r   r   r   r   .   sK    3#..I$00JGJC55..3>r   r   c                       e Zd ZdZej
                  Zej                  Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zy)TestRealEntrieszTest longid/shortid regex matching against real entries from each version.

    These examples are taken directly from VerbNet 2.1, 3.2, and 3.3 corpora
    so we can verify correctness without requiring the files on disk.
    c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                  j                  d      sJ | j                   j                  d      J y)z.VerbNet 2.1: accompany-51.7 (simple dotted id)zaccompany-51.7r!   	accompanyr#   z51.7Nr&   r'   r(   r@   r)   s     r   test_v21_accompany_51_7z'TestRealEntries.test_v21_accompany_51_7f   sq    NN  !12QWWQZ;.1771:3GGG$$V,,,~~##F+333r   c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                  j                  d      sJ | j                   j                  d      J y)z9VerbNet 2.1: admire-31.2-1 (dotted id with dash subclass)zadmire-31.2-1r!   admirer#   z31.2-1NrU   r)   s     r   test_v21_admire_31_2_1z&TestRealEntries.test_v21_admire_31_2_1m   sp    NN  1QWWQZ8+
h0FFF$$X...~~##H-555r   c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                  j                  d      sJ | j                   j                  d      J y)z>VerbNet 2.1: animal_sounds-38 (underscore in name, integer id)r.   r!   r/   r#   r0   NrU   r)   s     r   test_v21_animal_sounds_38z)TestRealEntries.test_v21_animal_sounds_38t   sq    NN  !34QWWQZ?2qwwqzT7III$$T***~~##D)111r   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)zVerbNet 2.1: weather-57z
weather-57r!   weatherr#   57Nr%   r)   s     r   test_v21_weather_57z#TestRealEntries.test_v21_weather_57{   s@    NN  .QWWQZ9,t1CCC1Cr   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                  j                  d      sJ y)z+VerbNet 2.1: put-9.1-2 (subclass with dash)z	put-9.1-2r!   putr#   z9.1-2NrU   r)   s     r   test_v21_put_9_1_2z"TestRealEntries.test_v21_put_9_1_2   sU    NN  -QWWQZ5(QWWQZ7-BBB$$W---r   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)zVerbNet 3.2: absorb-39.8zabsorb-39.8r!   absorbr#   z39.8Nr%   r)   s     r   test_v32_absorb_39_8z$TestRealEntries.test_v32_absorb_39_8   @    NN  /QWWQZ8+
f0DDD0Dr   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)z%VerbNet 3.2: advise-37.9-1 (subclass)zadvise-37.9-1r!   adviser#   z37.9-1Nr%   r)   s     r   test_v32_advise_37_9_1z&TestRealEntries.test_v32_advise_37_9_1   s@    NN  1QWWQZ8+
h0FFF0Fr   c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                  j                  d      sJ | j                   j                  d      J y)z;VerbNet 3.2: amalgamate-22.2-3-1-1 (deeply nested subclass)zamalgamate-22.2-3-1-1r!   
amalgamater#   rF   NrU   r)   s     r   test_v32_amalgamate_22_2_3_1_1z.TestRealEntries.test_v32_amalgamate_22_2_3_1_1   sq    NN  !89QWWQZ</AGGAJ,4NNN$$\222~~##L1999r   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)z6VerbNet 3.2: body_internal_motion-49 (underscore name)zbody_internal_motion-49r!   body_internal_motionr#   49Nr%   r)   s     r    test_v32_body_internal_motion_49z0TestRealEntries.test_v32_body_internal_motion_49   sB    NN  !:;QWWQZ#99aggajD>PPP>Pr   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)z<VerbNet 3.2: entity_specific_cos-45.5 (multiple underscores)zentity_specific_cos-45.5r!   entity_specific_cosr#   z45.5Nr%   r)   s     r   !test_v32_entity_specific_cos_45_5z1TestRealEntries.test_v32_entity_specific_cos_45_5   sB    NN  !;<QWWQZ#88QWWQZ6=QQQ=Qr   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)z&VerbNet 3.3: act-114 (top-level class)zact-114r!   r4   r#   114Nr%   r)   s     r   test_v33_act_114z TestRealEntries.test_v33_act_114   s@    NN  +QWWQZ5(QWWQZ5-@@@-@r   c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                   j                  d      J | j                  j                  d      sJ y)z.VerbNet 3.3: act-114-1 (the original bug case)r3   r!   r4   r#   r5   NrU   r)   s     r   test_v33_act_114_1z"TestRealEntries.test_v33_act_114_1   sp    NN  -QWWQZ5(QWWQZ7-BBB~~##G,444$$W---r   c                    | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ | j                   j                  d      J | j                  j                  d      sJ y)z*VerbNet 3.3: act-114-1-1 (nested subclass)act-114-1-1r!   r4   r#   114-1-1NrU   r)   s     r   test_v33_act_114_1_1z$TestRealEntries.test_v33_act_114_1_1   sp    NN  /QWWQZ5(QWWQZ9-DDD~~##I.666$$Y///r   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)zVerbNet 3.3: acquiesce-95.1-1zacquiesce-95.1-1r!   	acquiescer#   z95.1-1Nr%   r)   s     r   test_v33_acquiesce_95_1_1z)TestRealEntries.test_v33_acquiesce_95_1_1   sA    NN  !34QWWQZ;.1771:3III3Ir   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)zVerbNet 3.3: accept-77.1zaccept-77.1r!   acceptr#   z77.1Nr%   r)   s     r   test_v33_accept_77_1z$TestRealEntries.test_v33_accept_77_1   rf   r   c                     | j                   j                  d      }|r(|j                  d      dk(  r|j                  d      dk(  sJ y)zDVerbNet 3.3: entity_specific_modes_being-47.2 (long underscore name)z entity_specific_modes_being-47.2r!   entity_specific_modes_beingr#   z47.2Nr%   r)   s     r   )test_v33_entity_specific_modes_being_47_2z9TestRealEntries.test_v33_entity_specific_modes_being_47_2   sE    NN  !CD!''!* ==!''!*PVBV	
VBVr   N)rJ   rK   rL   rM   r   rN   r&   rO   r@   rV   rY   r[   r_   rb   re   ri   rl   rp   rs   rv   rx   r|   r   r   r   rP   r   r   rR   rR   Z   sw     $..I$00J462D
.E
G
:Q
RA
.0J
E

r   rR   c                       e Zd Zd Zd Zy)TestVersionParameterc                 |    dt         j                  v sJ dt         j                  v sJ dt         j                  v sJ y )N2.13.23.3)r   SUPPORTED_VERSIONSr9   s    r   test_supported_versionsz,TestVersionParameter.test_supported_versions   sA    +>>>>>+>>>>>+>>>>>r   c                     t        j                  t        d      5  t        ddd       d d d        y # 1 sw Y   y xY w)Nznot supported)r'   z/tmpz.*z4.0version)pytestraises
ValueErrorr   r9   s    r   test_invalid_version_raisesz0TestVersionParameter.test_invalid_version_raises   s1    ]]:_= 	=e<	= 	= 	=s   4=N)rJ   rK   rL   r   r   rP   r   r   r   r      s    ?
=r   r   c                      t        j                  t              d  d        G  fddt         j                               }d j	                  dd       |_        |j
                  |_        |S )zAFactory that returns a test class for a specific VerbNet version.zVerbNet z not found at c                       e Zd Ze fd       ZfdZd Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zy)"_make_corpus_tests.<locals>._Testsc                 4    t        t              | _        y )Nr   )r   _FILEIDSvn)clsrootr   s    r   
setUpClassz-_make_corpus_tests.<locals>._Tests.setUpClass   s    (xICFr   c                 <    | j                   j                  k(  sJ y )N)r   r   )r*   r   s    r   test_version_propertyz8_make_corpus_tests.<locals>._Tests.test_version_property   s    77??g---r   c                 T    t        | j                  j                               dkD  sJ y Nr   )lenr   classidsr9   s    r   test_classids_nonemptyz9_make_corpus_tests.<locals>._Tests.test_classids_nonempty   s"    tww'')*Q...r   c                 T    t        | j                  j                               dkD  sJ y r   )r   r   lemmasr9   s    r   test_lemmas_nonemptyz7_make_corpus_tests.<locals>._Tests.test_lemmas_nonempty   s     tww~~'(1,,,r   c                 T    t        | j                  j                               dkD  sJ y r   )r   r   
wordnetidsr9   s    r   test_wordnetids_nonemptyz;_make_corpus_tests.<locals>._Tests.test_wordnetids_nonempty   s"    tww))+,q000r   c                 T    t        | j                  j                               dkD  sJ y r   )r   r   fileidsr9   s    r   test_fileids_nonemptyz8_make_corpus_tests.<locals>._Tests.test_fileids_nonempty   s     tww()A---r   c                     | j                   j                         D ]E  }| j                   j                  |      }| j                   j                  |      |k(  r=J d|         y)z7Every longid must survive shortid -> longid round-trip.zround-trip failed for N)r   r   shortidlongid)r*   cidsids      r   test_roundtrip_all_idsz9_make_corpus_tests.<locals>._Tests.test_roundtrip_all_ids   sZ    ww'') Rggooc*ww~~c*c1Q5KC53QQ1Rr   c                     | j                   j                         d   }| j                   j                  |      }|J |j                  d      |k(  sJ y Nr   ID)r   r   vnclassget)r*   r   vcs      r   test_vnclass_by_longidz9_make_corpus_tests.<locals>._Tests.test_vnclass_by_longid   sK    ''""$Q'C%B>!>66$<3&&&r   c                     | j                   j                         d   }| j                   j                  |      }| j                   j                  |      }|J |j	                  d      |k(  sJ y r   )r   r   r   r   r   )r*   r   r   r   s       r   test_vnclass_by_shortidz:_make_corpus_tests.<locals>._Tests.test_vnclass_by_shortid  s]    ''""$Q'C''//#&C%B>!>66$<3&&&r   c                     | j                   j                         d   }| j                   j                  |      }t        |t              sJ y r   )r   r   frames
isinstancelist)r*   r   r   s      r   test_framesz._make_corpus_tests.<locals>._Tests.test_frames	  s;    ''""$Q'CWW^^C(Ffd+++r   c                     | j                   j                         d   }| j                   j                  |      }t        |t              sJ y r   )r   r   	themrolesr   r   )r*   r   roless      r   test_themrolesz1_make_corpus_tests.<locals>._Tests.test_themroles  s=    ''""$Q'CGG%%c*EeT***r   c                     | j                   j                         d   }| j                   j                  |      }t        |t              sJ y r   )r   r   
subclassesr   r   )r*   r   subss      r   test_subclassesz2_make_corpus_tests.<locals>._Tests.test_subclasses  s=    ''""$Q'C77%%c*DdD)))r   c                     | j                   j                         d   }| j                   j                  |      }t        |t              rt        |      dkD  sJ y r   )r   r   pprintr   strr   )r*   r   pps      r   test_pprintz._make_corpus_tests.<locals>._Tests.test_pprint  sG    ''""$Q'C$Bb#&3r7Q;66;r   c                     | j                   j                         d   }| j                   j                  |      }t        |      dkD  sJ y )Nr   )lemma)r   r   r   r   )r*   r   cidss      r   test_classids_by_lemmaz9_make_corpus_tests.<locals>._Tests.test_classids_by_lemma  s>    GGNN$Q'E77##%#0Dt9q= =r   c                     | j                   j                         d   }| j                   j                  |      }t        |      dkD  sJ y )Nr   )fileid)r   r   r   r   )r*   fidr   s      r   test_classids_by_fileidz:_make_corpus_tests.<locals>._Tests.test_classids_by_fileid"  s>    ''//#A&C77##3#/Dt9q= =r   c                     | j                   j                         d   }| j                   j                  |      }t        |      dk(  sJ y )Nr   r!   )r   r   r   r   )r*   r   fidss      r   test_fileids_by_classidz:_make_corpus_tests.<locals>._Tests.test_fileids_by_classid'  s;    ''""$Q'C77??3'Dt9>!>r   N)rJ   rK   rL   classmethodr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   s   r   _Testsr      se     
	J 
	J	.	/	-	1	.	R	'	'	,
	+
	*
	7
	!
	!
	"r   r   TestVerbNet. )unittest
skipUnlessr   TestCasereplacerJ   rL   )r   r   r   s   `` r   _make_corpus_testsr      sw     $8G9N4&!IH""" H"H"T $GOOC$<#=>FO //FMr   r   r   r   zVerbNet 3.3 not found at c                   >    e Zd ZdZed        Zd Zd Zd Zd Z	d Z
y)	TestLongidShortidBugFixz@Regression tests for the longid/shortid bug with dash-style IDs.c                 :    t        t        t        d      | _        y )Nr   r   )r   
_VN33_ROOTr   r   )r   s    r   r   z"TestLongidShortidBugFix.setUpClass?  s    $Z5Ir   c                 D    | j                   j                  d      dk(  sJ y)z5longid('114-1') must return 'act-114-1', not '114-1'.r5   r3   Nr   r   r9   s    r   test_longid_numeric_dashz0TestLongidShortidBugFix.test_longid_numeric_dashC  s    ww~~g&+555r   c                 D    | j                   j                  d      dk(  sJ y )Nr{   rz   r   r9   s    r   test_longid_numeric_dash_nestedz7TestLongidShortidBugFix.test_longid_numeric_dash_nestedG  s    ww~~i(M999r   c                 D    | j                   j                  d      dk(  sJ y )Nr3   r5   r   r   r9   s    r   test_shortid_from_dash_longidz5TestLongidShortidBugFix.test_shortid_from_dash_longidJ  s    ww{+w666r   c                 D    | j                   j                  d      dk(  sJ y)z5A longid passed to longid() should be returned as-is.r3   Nr   r9   s    r   test_longid_passthroughz/TestLongidShortidBugFix.test_longid_passthroughM  s    ww~~k*k999r   c                 D    | j                   j                  d      dk(  sJ y)z7A shortid passed to shortid() should be returned as-is.r5   Nr   r9   s    r   test_shortid_passthroughz0TestLongidShortidBugFix.test_shortid_passthroughQ  s    www'7222r   N)rJ   rK   rL   rM   r   r   r   r   r   r   r   rP   r   r   r   r   9  s5     KJ J6:7:3r   r   )rM   r
   r   r   	nltk.datar   nltk.corpus.reader.verbnetr   r   
_VN21_ROOTr	   r   
expanduser
_VN32_ROOTr   r   r   r   r   rR   r   r   TestVerbNet21TestVerbNet32TestVerbNet33r   r   rP   r   r   <module>r      s#  
 
    : ()
WW\\"'',,S1;	J
WW\\"'',,S1<@
%>)) %>Xn
h'' n
h=8,, =Rj #5*5"5*5"5*5 j!%>zl#K3h// 33r   