# evaluation file - top 1000 terms sorted by: cmpr_cvalue
# annotations: 2 for valid tech term, 1 for valid term but not technology term, 0 for invalid term, - for not annotated yet
# term_id	term_string, annotation 
447	natural language	-
1706	machine translation	-
1922	natural language processing	-
1489	other hand	-
2881	noun phrase	-
9254	speech recognition	-
28246	language model	-
1431	language processing	-
6033	computational linguistics	-
34285	error rate	-
34179	training corpus	-
33684	test set	-
44456	knowledge base	-
37346	parse tree	-
450	information retrieval	-
33622	training set	-
224403	computational linguistics volume	-
33229	word sense	-
8255	target language	-
241	word order	-
34900	machine learning	-
6912	total number	-
42352	wall street journal	-
197	phrase structure	-
27750	data set	-
251	l ~	-
33632	information extraction	-
33165	named entity	-
37782	target word	-
34902	decision tree	-
2276	large number	-
7646	semantic information	-
33690	future work	-
8277	spoken language	-
7846	syntactic structure	-
31	c ~	-
1907	next section	-
36668	penn treebank	-
47085	logical form	-
8304	discourse structure	-
1678	same time	-
224404	linguistics volume	-
33700	mutual information	-
4642	semantic representation	-
8253	source language	-
5851	input sentence	-
33044	feature structure	-
33230	word sense disambiguation	-
35587	previous work	-
8804	translation system	-
2850	data base	-
36555	morphological analysis	-
9602	lexical entry	-
8258	question answering	-
42351	wall street	-
70125	word segmentation	-
1057	first step	-
1056	same way	-
33231	sense disambiguation	-
15121	language understanding	-
42353	street journal	-
1423	syntactic analysis	-
29339	previous section	-
4806	speech act	-
4644	prepositional phrase	-
14350	input string	-
34609	linguistic knowledge	-
2264	syntactic information	-
7399	semantic interpretation	-
34203	word alignment	-
35078	search space	-
210	r ~	-
34284	word error rate	-
31781	test corpus	-
7584	lexical item	-
7941	semantic analysis	-
51662	anaphora resolution	-
34283	word error	-
12532	finite state	-
46834	knowledge representation	-
19275	small number	-
10200	relative clause	-
21333	translation model	-
34534	semantic role	-
33951	statistical machine translation	-
7623	lexical information	-
8803	machine translation system	-
38078	gold standard	-
34578	unknown word	-
35832	natural language generation	-
2473	t ~	-
19504	parsing algorithm	-
34628	mt system	-
34407	probability distribution	-
36048	dynamic programming	-
40471	brown corpus	-
9118	main verb	-
35833	language generation	-
49935	world knowledge	-
2976	direct object	-
38989	feature selection	-
21573	% accuracy	-
1498	english word	-
51291	case frame	-
12225	context-free grammar	-
36526	contextual information	-
13044	data structure	-
34532	language modeling	-
30406	head noun	-
64330	domain knowledge	-
46526	derivation tree	-
12993	average number	-
32759	root node	-
2818	semantic class	-
2302	wide range	-
9776	head word	-
33872	conditional probability	-
9974	verb phrase	-
13459	word sequence	-
55893	natural language understanding	-
3016	dependency tree	-
12714	maximum likelihood	-
7469	new york	-
45912	speech recognizer	-
34241	proposed method	-
33869	statistical machine	-
39933	search engine	-
3018	tree structure	-
36587	morphological analyzer	-
11965	sentence length	-
34905	pos tag	-
37729	named entity recognition	-
44859	elementary tree	-
10541	system performance	-
62299	text generation	-
571	single word	-
52276	reference resolution	-
2862	dependency structure	-
35596	user model	-
67846	discourse segment	-
28960	recognition system	-
9028	linguistic information	-
14683	feature set	-
34473	markov model	-
34943	pattern matching	-
8783	input text	-
4689	semantic structure	-
18781	generation system	-
21346	large corpus	-
106	c \/	-
35251	dialogue system	-
16954	similarity measure	-
34286	translation quality	-
40048	probabilistic model	-
21195	additional information	-
33978	parallel corpus	-
60256	speech recognition system	-
52282	argument structure	-
35477	spoken dialogue	-
1586	text processing	-
34186	source sentence	-
9516	new information	-
39024	dialogue act	-
5631	generation process	-
33779	entity recognition	-
10388	proper noun	-
2989	first sentence	-
33587	% precision	-
15777	syntactic category	-
43573	local context	-
277	~ l	-
34924	coreference resolution	-
38269	part-of-speech tagging	-
1653	computer science	-
48292	user interface	-
36671	data sparseness	-
46652	auxiliary tree	-
35129	spontaneous speech	-
9113	l \	-
37796	semantic similarity	-
4648	surface form	-
9873	given word	-
1472	~ r	-
63	finite set	-
30753	word w	-
31464	normal form	-
36653	sentence level	-
22030	small set	-
21107	right-hand side	-
5804	parsing process	-
22128	next step	-
10240	language system	-
6601	p ~	-
62364	classi cation	-
57360	neural network	-
56247	semantic relation	-
24189	important role	-
7382	proper name	-
12745	statistical model	-
34404	development set	-
61311	lexical choice	-
10043	english sentence	-
47484	text categorization	-
17129	future research	-
34502	baseline system	-
39352	discourse model	-
35338	domain model	-
50847	regular expression	-
9724	new word	-
57000	lexical knowledge	-
61158	chinese word	-
33705	feature vector	-
4641	b ~	-
83110	semantic network	-
5306	phrase structure grammar	-
292	same sentence	-
34287	bleu score	-
3195	n ~	-
881	data collection	-
12004	source text	-
36377	feature space	-
33243	training data	-
34168	edit distance	-
22506	continuous speech	-
57552	trigram model	-
5036	linguistic analysis	-
30310	human language	-
33479	vector space	-
33588	% recall	-
35098	annotation scheme	-
41132	maximum entropy	-
39950	recent work	-
33646	knowledge acquisition	-
37628	related work	-
7936	special case	-
33713	average precision	-
33979	em algorithm	-
34153	parallel corpora	-
13771	wide variety	-
17708	categorial grammar	-
1622	national science foundation	-
5748	language learning	-
4858	real world	-
35814	information structure	-
6679	ambiguous word	-
6753	hierarchical structure	-
21311	current state	-
30848	first stage	-
34580	overall performance	-
1722	high precision	-
43191	semantic knowledge	-
6795	basic idea	-
39503	dialogue manager	-
21214	hand side	-
33047	grammar formalism	-
28418	first order	-
3377	surface structure	-
12753	correct answer	-
2746	same set	-
926	great deal	-
38155	large amount	-
5001	relative frequency	-
2213	same word	-
13043	search algorithm	-
11883	main clause	-
8599	semantic content	-
35192	pronoun resolution	-
33753	annotated corpus	-
101056	lexical rule	-
28698	right hand side	-
148343	natural language interface	-
4882	artificial intelligence	-
21105	left-hand side	-
20686	given sentence	-
35043	success rate	-
36799	large corpora	-
488738	o \	-
7962	linguistic theory	-
8358	~ \	-
51723	query expansion	-
67634	continuous speech recognition	-
46910	rhetorical structure	-
87840	chinese word segmentation	-
47730	qa system	-
382	first word	-
47114	semantic type	-
31371	same type	-
37363	target sentence	-
36617	probability model	-
10239	natural language system	-
38404	chart parsing	-
7924	discourse context	-
33886	sentence pair	-
34828	word form	-
45871	understanding system	-
819	retrieval system	-
19573	internal structure	-
1603	english text	-
42655	broadcast news	-
33795	high level	-
41831	source word	-
36736	language technology	-
27962	grammar rule	-
28567	word list	-
53769	chart parser	-
40417	high accuracy	-
45843	meaning representation	-
37354	time complexity	-
39824	standard deviation	-
33261	window size	-
6326	high frequency	-
42072	relevant information	-
23876	o o o	-
33663	text corpus	-
98191	representation language	-
35383	new domain	-
36559	beam search	-
43721	current word	-
29450	second step	-
35603	current implementation	-
42407	training material	-
45126	error reduction	-
48534	unification grammar	-
12658	initial state	-
125111	text planning	-
581	next word	-
39100	naive bayes	-
34730	statistical language	-
50640	text structure	-
53551	free word order	-
12206	~ t	-
1500	word class	-
2852	following way	-
30201	word level	-
21281	large scale	-
34375	significant improvement	-
9055	sentence boundary	-
17322	t \	-
35409	automatic evaluation	-
36258	supervised learning	-
17697	dependency grammar	-
34879	classification accuracy	-
34904	classification task	-
36335	learning algorithm	-
45356	grammatical function	-
111750	compound noun	-
34431	automatic speech recognition	-
55030	translation process	-
42297	pos tagger	-
37088	objective function	-
51790	semantic processing	-
35161	pos tagging	-
15183	such information	-
3646	low frequency	-
17930	definite noun	-
34629	system using	-
57444	research projects agency	-
33433	lexical sample	-
36915	following section	-
34471	hidden markov	-
54467	bilingual corpus	-
5245	b \	-
4738	constituent structure	-
67497	description length	-
45881	recognition accuracy	-
36680	cross validation	-
833	content word	-
6523	detailed description	-
12328	~ c	-
33855	word pair	-
37480	tree adjoining	-
58840	parse forest	-
28697	right hand	-
5307	structure grammar	-
10252	simple example	-
5666	system \	-
56815	viterbi algorithm	-
10765	o o	-
34961	first experiment	-
92589	expert system	-
1883	~ ~	-
18118	above example	-
3935	other language	-
52825	text summarization	-
32790	typed feature	-
39504	lexical database	-
44163	corpus analysis	-
40213	semantic category	-
41120	second stage	-
7383	definite description	-
42220	high quality	-
34430	automatic speech	-
2937	present paper	-
43586	parsing model	-
51341	entropy model	-
34624	part-of-speech tagger	-
57442	advanced research projects	-
35128	right context	-
79902	word boundary	-
49462	text classification	-
98327	text understanding	-
40206	extraction system	-
45546	shared task	-
70883	text analysis	-
951706	de coling-92	-
37831	distributional similarity	-
52582	lexical ambiguity	-
61322	correct parse	-
137844	reference time	-
1564	document collection	-
6924	input word	-
37188	joint probability	-
1623	science foundation	-
7978	programming language	-
79655	grammar development	-
34511	trigram language model	-
1621	national science	-
55183	language text	-
10192	english translation	-
5045	sentence structure	-
6645	problem solving	-
2841	second sentence	-
47777	unsupervised learning	-
33868	computational complexity	-
35885	dependency parsing	-
41166	high degree	-
47518	discourse representation	-
33908	parameter estimation	-
44265	text corpora	-
156381	n \	-
44851	example sentence	-
27560	+ l	-
42614	lexical cohesion	-
53144	message understanding	-
33906	bigram model	-
241048	text planner	-
34919	current work	-
8893	subordinate clause	-
46560	foot node	-
37986	british national corpus	-
1617	document retrieval	-
8155	error correction	-
10133	relative pronoun	-
17423	c \	-
958	o ~	-
18715	current system	-
9761	morphological information	-
14744	top level	-
33867	ibm model	-
50158	information content	-
27790	processing system	-
14524	feature value	-
34482	n-best list	-
35601	user satisfaction	-
52715	linguistic structure	-
54700	human language technology	-
36008	syntactic parsing	-
41174	prepositional phrase attachment	-
41006	speech synthesis	-
53522	text retrieval	-
67598	real time	-
73101	knowledge source	-
33899	polynomial time	-
96222	context model	-
33480	vector space model	-
68345	context vector	-
148344	language interface	-
19091	background knowledge	-
36215	small amount	-
12621	equivalence class	-
27883	same number	-
30167	left context	-
51302	finite verb	-
58523	% error	-
5110	whole sentence	-
51324	long distance	-
18587	large set	-
34695	statistical significance	-
35230	recognition performance	-
39084	tag set	-
59077	natural language text	-
40538	word frequency	-
41043	pp attachment	-
66657	partial parse	-
22593	other information	-
59076	predicate-argument structure	-
6310	original text	-
40482	further research	-
34472	hidden markov model	-
17550	new approach	-
21078	dependency relation	-
27982	parsing system	-
34302	single sentence	-
18974	maximum number	-
27006	final state	-
50331	term frequency	-
63332	decision list	-
1396	foreign language	-
57526	street journal corpus	-
77717	first phase	-
33033	first argument	-
34535	role labeling	-
42438	common noun	-
32838	type hierarchy	-
35369	system development	-
36255	word accuracy	-
78311	hong kong	-
77235	word recognition	-
50431	human performance	-
12911	same meaning	-
37813	raw text	-
51961	base form	-
35877	parsing strategy	-
96904	nlp system	-
3779	first part	-
38697	similarity score	-
3174	following sentence	-
45648	pitch accent	-
33154	document frequency	-
7797	w ~	-
39996	computational model	-
20769	general purpose	-
27180	useful information	-
48441	definite clause	-
49302	dialog system	-
831	information retrieval system	-
57782	expressive power	-
71637	discourse relation	-
78926	brown et	-
87445	brown et al.	-
12720	k ~	-
36870	language acquisition	-
52236	discourse entity	-
112277	discourse marker	-
61155	speci c	-
10333	rule application	-
36760	average length	-
48616	grammar writer	-
54738	exact match	-
6558	m ~	-
46666	initial tree	-
54192	statistical information	-
23970	word string	-
45360	training corpora	-
33434	lexical sample task	-
18357	current version	-
46871	temporal information	-
34495	baseline model	-
147360	language understanding system	-
17574	new method	-
59850	discourse processing	-
8740	language pair	-
76	linear order	-
8777	empty string	-
47088	answer type	-
32965	lexical category	-
37728	relation extraction	-
51345	subcategorization frame	-
36831	corpus size	-
37343	tree kernel	-
50223	shallow parsing	-
53729	lexical head	-
1520	english language	-
3911	vocabulary size	-
34225	word lattice	-
34343	good performance	-
34891	indirect object	-
57440	defense advanced research	-
1748	latter case	-
10253	semantic feature	-
35152	model using	-
8180	second language	-
54615	tag sequence	-
57234	significant difference	-
57441	advanced research	-
112588	speech understanding	-
21938	general case	-
30705	past participle	-
1035	further work	-
17901	discourse referent	-
33999	probability mass	-
34197	speech translation	-
34476	alignment model	-
695	third person	-
42271	annotated corpora	-
66619	bilingual lexicon	-
70688	propositional content	-
6704	f ~	-
12563	~ b	-
42726	cue phrase	-
44905	active learning	-
271047	x \	-
9942	processing time	-
65781	system architecture	-
2261	generative model	-
2946	transitive verb	-
9335	second experiment	-
293176	text plan	-
33242	cosine similarity	-
17127	query language	-
13434	first time	-
293	same level	-
32740	given input	-
9987	given context	-
39203	inter-annotator agreement	-
54162	information gain	-
42875	free text	-
50124	extraction task	-
128052	resource management	-
157937	intentional structure	-
8817	correct translation	-
21074	natural way	-
46546	derived tree	-
53550	free word	-
87305	tagged corpus	-
2193	g ~	-
48604	logic programming	-
50421	first case	-
54129	memory-based learning	-
102343	joint venture	-
80123	spoken language system	-
34510	trigram language	-
1333	previous research	-
37412	predicate argument	-
39128	context information	-
52776	rhetorical relation	-
39408	dialogue management	-
59900	inverse document frequency	-
39926	support vector	-
63527	statistical approach	-
65625	retrieval performance	-
80714	database query	-
30509	syntactic tree	-
33623	second order	-
37587	classification problem	-
46934	application domain	-
37481	tree adjoining grammar	-
40894	sentence extraction	-
269382	s \	-
1557	language analysis	-
90	second phase	-
3151	current sentence	-
33487	error analysis	-
37715	syntactic parser	-
17931	definite noun phrase	-
20809	particular word	-
27303	given text	-
38852	sentence alignment	-
46092	n-gram model	-
53965	lexical acquisition	-
47117	question answering system	-
26030	semantic distance	-
29547	formal language	-
34966	overall accuracy	-
1759	test collection	-
34659	system output	-
5147	english grammar	-
10520	internal representation	-
53942	thematic role	-
27981	similar way	-
57446	projects agency	-
6420	limited number	-
46354	structural information	-
80364	japanese sentence	-
41079	world wide web	-
14563	character string	-
40899	web page	-
44691	transitive closure	-
10300	ambiguity resolution	-
37319	maximum likelihood estimation	-
1470	u ~	-
38053	correct sense	-
67692	word similarity	-
33055	leaf node	-
7605	first place	-
34668	first pass	-
22327	np \	-
42422	wsj corpus	-
49720	conceptual structure	-
53053	person name	-
118570	information extraction system	-
211694	language processing system	-
24091	new york times	-
43506	same information	-
45296	subject position	-
952096	acres de coling-92	-
35888	dependency graph	-
42042	high performance	-
5259	r \	-
40809	comparable corpora	-
4818	language use	-
12794	p \	-
9930	dictionary entry	-
42535	auxiliary verb	-
56762	word formation	-
28703	left hand side	-
35658	content selection	-
2925	written text	-
1940	different word	-
3971	second part	-
10067	near future	-
57443	research projects	-
37705	dynamic programming algorithm	-
22519	written language	-
44112	n-gram language	-
45212	posterior probability	-
52784	overall system	-
60204	speech signal	-
43987	same entity	-
38789	training text	-
23901	name recognition	-
46064	partial parsing	-
112605	speech input	-
40892	bilingual corpora	-
46287	parallel text	-
27707	linguistic processing	-
52721	previous sentence	-
18765	other work	-
11274	second argument	-
87446	et al.	-
89900	bilingual dictionary	-
330837	default unification	-
449409	y \	-
490394	j \	-
34656	translation probability	-
2920	sentence generation	-
43747	multi-document summarization	-
48524	semantic lexicon	-
39864	training phase	-
26033	semantic space	-
43061	discourse analysis	-
2560	e \	-
81319	plan recognition	-
1090	information system	-
54861	new algorithm	-
41056	broad coverage	-
41175	phrase attachment	-
51314	constraint satisfaction	-
36685	statistical parsing	-
9567	entire sentence	-
61449	syntactic knowledge	-
6754	x x	-
7340	s ~	-
21095	original sentence	-
44330	communicative goal	-
48530	original document	-
53606	linear precedence	-
1334	function word	-
64972	full text	-
2745	same class	-
34110	bigram language model	-
12228	l l	-
35413	specific domain	-
39481	common sense	-
487	last word	-
35683	generation component	-
37987	national corpus	-
45512	verb sense	-
64068	linguistic data consortium	-
30046	second case	-
12738	finite number	-
38502	% improvement	-
19514	further processing	-
27226	semantic classification	-
36749	singular value decomposition	-
3173	sentence analysis	-
1521	statistical analysis	-
34937	important information	-
70563	natural language learning	-
279	~ n	-
36392	parsing accuracy	-
5718	entire corpus	-
58283	inside-outside algorithm	-
66729	semantic frame	-
14821	k \	-
10547	chinese language	-
58146	prototype system	-
102630	sentence planning	-
119073	lr parsing	-
33481	space model	-
35606	based approach	-
1966	complete set	-
46480	development corpus	-
47479	feature function	-
47813	specific information	-
31478	modified version	-
32153	structure analysis	-
2801	generation algorithm	-
54250	first sense	-
1739	subject matter	-
39160	different feature	-
224196	noun group	-
313319	discourse plan	-
225196	computational linguistics computational	-
5900	new language	-
12487	start symbol	-
225198	linguistics computational linguistics	-
45250	automatic acquisition	-
64622	term weighting	-
246	\/ ~	-
37361	syntactic parse	-
156681	semantic interpreter	-
35311	statistical language model	-
34156	training procedure	-
43485	task completion	-
84070	message understanding conference	-
29771	only difference	-
36832	training process	-
1453	th ~	-
52981	enough information	-
37463	kernel function	-
80597	matrix clause	-
110121	language sentence	-
41747	distance measure	-
43867	simple sentence	-
12381	v ~	-
37114	relative importance	-
3084	second type	-
52578	rhetorical structure theory	-
54024	confusion matrix	-
179804	data extraction	-
30842	last year	-
21913	same category	-
8440	k +	-
37449	first set	-
9914	other word	-
33586	dependency parser	-
21957	generative capacity	-
48083	part-of-speech tag	-
23	~ o	-
27089	first column	-
34261	alignment algorithm	-
37982	particular domain	-
3807	correlation coefficient	-
15120	general knowledge	-
17107	current research	-
21985	np vp	-
9266	y ~	-
14501	first element	-
52675	event structure	-
10105	chinese text	-
10118	right side	-
44839	sentence containing	-
46282	statistical parser	-
28702	left hand	-
29704	main difference	-
34807	evaluation method	-
35328	second set	-
8656	grammatical information	-
39613	speech processing	-
54125	labeled training	-
18794	string matching	-
77009	syntax tree	-
36679	10-fold cross validation	-
32684	intermediate representation	-
36672	sparseness problem	-
43509	channel model	-
62993	human intervention	-
73343	control structure	-
34701	test data	-
8287	canonical form	-
17196	given set	-
9875	word length	-
1979	minimum number	-
9760	translation lexicon	-
57277	last section	-
4914	difficult task	-
37551	vector machines	-
37718	linear time	-
39267	% confidence	-
41641	linear combination	-
45115	corpus using	-
18545	time consuming	-
1001	same thing	-
66253	correct analysis	-
6755	x x x	-
2071	rule set	-
3874	different language	-
37483	adjoining grammar	-
42805	web search	-
73937	prosodic information	-
31014	semantic level	-
55539	surface string	-
71931	data model	-
90289	search process	-
159	tree t	-
712	computer program	-
2753	same manner	-
55163	probability p	-
58271	training algorithm	-
52752	discourse representation theory	-
12793	final result	-
51182	complete parse	-
57527	journal corpus	-
206227	attentional state	-
35747	dialogue structure	-
37985	british national	-
44437	intended referent	-
62133	% error rate	-
29433	verb class	-
6518	h ~	-
14255	french word	-
34279	linear interpolation	-
21841	certain threshold	-
37937	% reduction	-
38308	local tree	-
17688	b c	-
39131	full set	-
23877	new set	-
48290	word graph	-
13531	high probability	-
51900	robust parsing	-
52081	syntactic processing	-
33792	syntactic relation	-
47921	discourse level	-
5172	upper case	-
54482	word translation	-
62983	parser output	-
154312	wide scope	-
154689	dr. smith	-
44113	n-gram language model	-
29581	previous example	-
35272	development test	-
42183	ordered list	-
49444	relational database	-
77157	structural ambiguity	-
12863	word meaning	-
36627	set size	-
22017	phrase type	-
43857	target domain	-
47264	second column	-
51338	grammatical relation	-
63483	supervised training	-
68857	new text	-
93518	della pietra	-
14693	semantic component	-
12513	same language	-
34654	phrase translation	-
2936	present work	-
39133	manual annotation	-
73980	relevance feedback	-
148085	dialogue model	-
5400	new model	-
22016	dependency analysis	-
53648	much information	-
34574	alignment error rate	-
1464	same document	-
34250	important feature	-
8490	detailed analysis	-
28239	major problem	-
40912	selection process	-
11987	sample size	-
2617	chinese character	-
961	different corpora	-
48486	single document	-
57439	defense advanced	-
31546	english corpus	-
414	method using	-
37743	parent node	-
41251	boundary detection	-
9695	first example	-
56607	entity type	-
177031	japanese text	-
289331	upper model	-
8070	information processing	-
53359	source document	-
100630	user input	-
6859	relative position	-
36021	recognition problem	-
37148	computational cost	-
38760	logistic regression	-
41156	same corpus	-
54701	language technology conference	-
32533	search strategy	-
1584	automatic text	-
33435	sample task	-
46944	answer key	-
56533	argument position	-
57201	particular type	-
59995	ir system	-
76130	previous utterance	-
82657	sentence planner	-
87718	case study	-
91271	statistical translation	-
98237	air travel	-
38197	null ing	-
2047	information concerning	-
20707	uniform distribution	-
27668	binary classification	-
37358	shallow parser	-
31093	syntactic form	-
41717	cost function	-
42310	error detection	-
50718	van den	-
55795	search procedure	-
59484	accuracy rate	-
89544	precision rate	-
104004	unrestricted text	-
952095	acres de	-
35627	spoken dialogue system	-