<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
span.EmailStyle17
        {mso-style-type:personal-compose;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-family:"Calibri",sans-serif;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:970208045;
        mso-list-type:hybrid;
        mso-list-template-ids:793954882 -821259178 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l0:level1
        {mso-level-start-at:0;
        mso-level-number-format:bullet;
        mso-level-text:-;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Calibri",sans-serif;
        mso-fareast-font-family:Calibri;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1
        {mso-list-id:994068586;
        mso-list-type:hybrid;
        mso-list-template-ids:-1127844808 67698689 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l2
        {mso-list-id:1512334296;
        mso-list-type:hybrid;
        mso-list-template-ids:-1780855616 -26863866 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l2:level1
        {mso-level-start-at:0;
        mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:23.0pt;
        text-indent:-.25in;
        font-family:Symbol;
        mso-fareast-font-family:Calibri;
        mso-bidi-font-family:Calibri;}
@list l2:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:59.0pt;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l2:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:95.0pt;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l2:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:131.0pt;
        text-indent:-.25in;
        font-family:Symbol;}
@list l2:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:167.0pt;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l2:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:203.0pt;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l2:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:239.0pt;
        text-indent:-.25in;
        font-family:Symbol;}
@list l2:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:275.0pt;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l2:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        margin-left:311.0pt;
        text-indent:-.25in;
        font-family:Wingdings;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-US link="#0563C1" vlink="#954F72" style='word-wrap:break-word'><div class=WordSection1><p class=MsoNormal>(Apologies for cross-postings)<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><b>*** The GUM Corpus - Release 11.0.0 ***<o:p></o:p></b></p><p class=MsoNormal><b>*** Georgetown University Multilayer corpus ***<o:p></o:p></b></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><a href="https://gucorpling.org/corpling/">Corpling@GU</a> is happy to announce the first release of series 11 of the Georgetown University Multilayer corpus (GUM V11.0.0):<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><a href="https://gucorpling.org/gum/">https://gucorpling.org/gum/</a><o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>New in this version: <o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><ul style='margin-top:0in' type=disc><li class=MsoListParagraph style='margin-left:0in;mso-list:l0 level1 lfo2'>GUM and the out-of-domain test set GENTLE have now merged!<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l0 level1 lfo2'>New documents – the corpus now contains 268,208 tokens<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l0 level1 lfo2'>Five different summaries per document<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l0 level1 lfo2'>Graded salience scores (0-5) for each entity in every document<o:p></o:p></li></ul><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>GUM is an open source corpus of richly annotated English texts from <b>24 genres</b>: <o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><ul style='margin-top:0in' type=disc><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level1 lfo3'>Main genres: (available in train/dev/test)<o:p></o:p></li><ul style='margin-top:0in' type=circle><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>academic writing<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>biographies<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>courtroom transcripts<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>essays<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>fiction<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>how-to guides<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>interviews<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>letters<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>news<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>online forum discussions<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>podcasts<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>political speeches<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>spontaneous face to face conversations<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>textbooks<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>travel guides<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>vlogs<o:p></o:p></li></ul></ul><p class=MsoNormal><o:p> </o:p></p><ul style='margin-top:0in' type=disc><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level1 lfo3'>Out-of-domain test genres: (test2, aka GENTLE partition):<o:p></o:p></li><ul style='margin-top:0in' type=circle><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>dictionary entries<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>live esports commentary<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>legal documents<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>medical notes<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>poetry<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>mathematical proofs<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>course syllabuses<o:p></o:p></li><li class=MsoListParagraph style='margin-left:-13.0pt;mso-list:l2 level2 lfo3'>threat letters<o:p></o:p></li></ul></ul><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>The corpus is created by students as part of the Computational Linguistics curriculum at Georgetown University and is available under Creative Commons licenses.<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>This is the first version of GUM series 11, containing roughly 281 documents annotated for:<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><ul style='margin-top:0in' type=disc><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Multiple POS tags (100% manual gold PTB, extended PTB, converted CLAWS5 and UPOS) and UD morphological features<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Manually corrected lemmatization and morphological segmentation<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Sentence segmentation and rough speech act (manual)<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Document structure using TEI tags (paragraphs, headings, figures, captions etc., all manual)<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Constituent and dependency syntax (manually corrected Universal Dependencies, and PTB parses from gold tags with function labels and enhanced dependencies)<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Construction Grammar annotations following UCxn<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Information status (given-active/inactive, accessible-inferable/common ground/aggregate, and new)<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Entity type, graded salience (0-5) and coreference annotation (including non-named entities, singletons, appositions, cataphora and several types of bridging), as well as Centering Theory annotations<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Entity linking (Wikification) of all named entities with Wikipedia articles, including their non-named and pronominal mentions<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Discourse parses in enhanced Rhetorical Structure Theory (eRST) and discourse dependencies, including multiple concurrent and non-projective relations<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Discourse signal annotations classified into 9 major and 45 minor types indicating how the presence of a relation is marked (based on the Signaling Corpus scheme)<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Shallow discourse relations following the PDTB v3 scheme<o:p></o:p></li><li class=MsoListParagraph style='margin-left:0in;mso-list:l1 level1 lfo1'>Five abstractive summaries for each document following strict, comparable guidelines across genres<o:p></o:p></li></ul><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>Note on Reddit data: token text is not contained in the release but can be downloaded with an included script.<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>For more information and to search or download the corpus online, see <a href="https://gucorpling.org/gum/">the corpus website</a>.<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>Best wishes,<o:p></o:p></p><p class=MsoNormal>The GUM team<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>PS – if you like GUM, also check out our automatically annotated <a href="https://github.com/gucorpling/amalgum/">AMALGUM</a> corpus!<o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal><o:p> </o:p></p></div></body></html>