<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"><meta name=Generator content="Microsoft Word 14 (filtered medium)"><base target="_blank"><style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Verdana;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p
        {mso-style-priority:99;
        margin:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0cm;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:36.0pt;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
span.EmailStyle19
        {mso-style-type:personal;
        font-family:"Calibri","sans-serif";
        color:windowtext;}
span.EmailStyle21
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1167938951;
        mso-list-type:hybrid;
        mso-list-template-ids:-526476844 134807553 134807555 134807557 134807553 134807555 134807557 134807553 134807555 134807557;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1
        {mso-list-id:1891113081;
        mso-list-type:hybrid;
        mso-list-template-ids:861948702 134807553 134807555 134807557 134807553 134807555 134807557 134807553 134807555 134807557;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:38.25pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:74.25pt;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:110.25pt;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:146.25pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:182.25pt;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:218.25pt;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:254.25pt;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:290.25pt;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:326.25pt;
        text-indent:-18.0pt;
        font-family:Wingdings;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-GB link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><span style='color:#002060'>DGT-TM is an extraction of the translation memory of the European Institutions for all official EU languages, produced by the European Commission’s <i>Directorate General for Translation</i> (DGT) and distributed by the <i>Joint Research Centre</i> (JRC). Translation memories are sentences and their manually produced translations.<o:p></o:p></span></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><span style='color:#002060'>The new release is called <b>DGT-TM-2013</b>. It follows the previous releases, DGT-TM (2007), DGT-TM-2011 and DGT-TM-2012. DGT-TM-2013 adds over ten million translation units to the previous 63 million translation units, resulting in <b>73 million translation units in total</b>. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>New features of DGT-TM-2013 are:<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoListParagraph style='text-indent:-18.0pt;mso-list:l0 level1 lfo2'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>Significantly more data for the Bulgarian, Maltese and Romanian languages;<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-18.0pt;mso-list:l0 level1 lfo2'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>Mostly about 460K new translation units per language.<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-18.0pt;mso-list:l0 level1 lfo2'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>Most documents of this release were translated in 2012, but it also contains previously unpublished documents from older years.<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-18.0pt;mso-list:l0 level1 lfo2'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>Unfortunately, no new Irish translation units are part of this release.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><b><span style='color:#002060'>Languages:</span></b><span style='color:#002060'>  All 253 language pairs involving the following 23 languages: <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>                Bulgarian, Czech, Danish, Dutch, English, Estonian, German, <o:p></o:p></span></p><p class=MsoNormal style='text-indent:36.0pt'><span style='color:#002060'>Greek, Finnish, French, Irish, Hungarian, Italian, Latvian, <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'>                Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'>                Slovene, Spanish and Swedish.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:10.0pt;font-family:"Courier New";color:#632523'>            <o:p></o:p></span></p><p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Courier New";color:#632523'>URL:        </span></b><span style='font-size:10.0pt;font-family:"Courier New";color:#1F497D'><a href="http://ipsc.jrc.ec.europa.eu/?id=197">http://ipsc.jrc.ec.europa.eu/?id=197</a> </span><span style='font-size:10.0pt;font-family:"Courier New";color:#632523'> <o:p></o:p></span></p><p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Courier New";color:#632523'>Creator:    </span></b><span style='font-size:10.0pt;font-family:"Courier New";color:#632523'>European Commission - Directorate General for Translation (<a href="http://ec.europa.eu/dgs/translation/index_en.htm"><span style='color:#00007F'>DGT</span></a>)<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:10.0pt;font-family:"Courier New";color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><u><span style='color:#002060'>WHAT IS DGT-TM<o:p></o:p></span></u></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>The ‘<a href="http://europa.eu/abc/eurojargon/index_en.htm" target="_new"><span style='color:#002060;text-decoration:none'>Acquis Communautaire</span></a>’ is the entire body of European legislation, comprising all the treaties, regulations and directives adopted by the European Union (EU). Since each new country joining the EU is required to accept the whole Acquis Communautaire, this body of legislation has been translated into 22 official languages. </span><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";color:#000066'>For the 23<sup>rd</sup> official EU language, <strong><span style='font-family:"Verdana","sans-serif";font-weight:normal'>Irish</span></strong>, the Acquis has not been translated on a regular basis; which is why DGT-TM includes only little data in Irish. The Acquis Communautaire was split into sentences and aligned automatically at sentence level, resulting in the DGT translation memory, DGT-TM. The text data is accompanied by software that allows to extract all sentences and their translations for any of the 253 possible language pair combinations. <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:10.0pt;font-family:"Verdana","sans-serif";color:#000066'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><u><span style='color:#002060'>MOTIVATION FOR THIS RELEASE<o:p></o:p></span></u></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>The public data release is in line with the general effort of the European Commission to support multilingualism, language diversity and the re-use of Commission information. It follows the release of a number of further multilingual data sets:<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the <b>JRC-Acquis</b> parallel corpus in 2006 (over 1 billion words in 22 languages), <o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the <b>DGT-TM</b> Translation Memory in 2007, <o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the multilingual named entity resource <b>JRC-Names</b> in 2011, <o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the multilingual multi-label classification tool (and accompanying text data) <b>JRC EuroVoc Indexer (JEX)</b> (22 languages) in 2012, <o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the ECDC-TM Translation Memory in 2012 (domain: Public Health)<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the <b>DGT-Acquis</b> parallel corpus in 2012,<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>the EAC-TM Translation Memory in 2013 (domain: Education and Culture),<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:38.25pt;text-indent:-18.0pt;mso-list:l1 level1 lfo3'><![if !supportLists]><span style='font-family:Symbol;color:#002060'><span style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>         </span></span></span><![endif]><span style='color:#002060'>and further smaller multilingual resources. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>See <a href="http://ipsc.jrc.ec.europa.eu/?id=61">http://ipsc.jrc.ec.europa.eu/?id=61</a> for more information on these resources.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><u><span style='color:#002060'>WHAT DGT-TM CAN BE USED FOR<o:p></o:p></span></u></p><p class=MsoNormal><span style='color:#002060'>                <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'>DGT-TM can be fed into translation memory software to support human translators in their work. As it is a large parallel corpus in electronic form, it can furthermore be used by specialists in computational linguistics to train statistical machine translation software, to generate multilingual dictionaries, to train and test multilingual information extraction software, and more.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><u><span style='color:#002060'>MORE INFORMATION ON DGT-TM <o:p></o:p></span></u></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>At <a href="http://langtech.jrc.ec.europa.eu/JRC_Publications.html">http://langtech.jrc.ec.europa.eu/JRC_Publications.html</a> , you find detailed publications on the JRC’s multilingual language technology activity. For details on DGT-TM, you can read:<o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p><span style='font-size:10.0pt;font-family:"Courier New";color:#000066'>      Steinberger Ralf, Andreas Eisele, Szymon Klocek, Spyridon Pilos <o:p></o:p></span></p><p><span style='font-size:10.0pt;font-family:"Courier New";color:#000066'>      & Patrick Schlüter (2012). <o:p></o:p></span></p><p><span style='font-size:10.0pt;font-family:"Courier New";color:#000066'>      <a href="http://langtech.jrc.ec.europa.eu/Documents/2012_LREC_DGT-TM_Final.pdf" target="_blank" title="Reference publication for the DGT-Translation Memory DGT-TM"><b><span style='color:#3399CC;text-decoration:none'>DGT-TM: A freely Available Translation Memory in 22 Languages</span></b></a>. <o:p></o:p></span></p><p><span style='font-size:10.0pt;font-family:"Courier New";color:#000066'>      Proceedings of the 8<sup>th</sup> international conference on Language <o:p></o:p></span></p><p><span style='font-size:10.0pt;font-family:"Courier New";color:#000066'>      Resources and Evaluation (LREC'2012), Istanbul, 21-27 May 2012. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'> </span><span style='font-size:9.0pt;color:#002060'>               </span><span style='font-size:9.0pt;font-family:"Courier New";color:#632523'><a href="http://langtech.jrc.ec.europa.eu/Documents/2012_LREC_DGT-TM_Final.pdf"><span style='color:#00007F'>http://langtech.jrc.ec.europa.eu/Documents/2012_LREC_DGT-TM_Final.pdf</span></a><o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>The article ‘</span><strong><i><span style='font-size:9.0pt;font-family:"Verdana","sans-serif";color:#000066;font-weight:normal'>An overview of the European Union's highly multilingual parallel corpora</span></i></strong><span style='color:#002060'>’ (submitted) gives a comparative overview of the various resources distributed by the JRC. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><u><span style='color:#002060'>WHAT NEXT?<o:p></o:p></span></u></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'>The JRC and collaborating services at the European Parliament are currently finalising the release of yet another large-scale multilingual parallel corpus. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#002060'><o:p> </o:p></span></p><p class=MsoNormal><b><span style='font-size:9.0pt;color:gray'><a href="http://langtech.jrc.ec.europa.eu/RS.html">Ralf Steinberger </a> <br></span></b><span style='font-size:9.0pt;color:gray'>European Commission - Joint Research Centre (JRC)<br>21027 Ispra (VA), Italy<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:9.0pt;color:gray'>URL – Applications: <a href="http://emm.newsbrief.eu/overview.html"><span style='color:gray'>http://emm.newsbrief.eu/overview.html</span></a> <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:9.0pt;color:gray'>URL – Resources: <a href="http://ipsc.jrc.ec.europa.eu/index.php?id=61">http://ipsc.jrc.ec.europa.eu/index.php?id=61</a>  <o:p></o:p></span></p><p class=MsoNormal><o:p> </o:p></p></div></body></html>