<a target="_top" href="http://ad.doubleclick.net/click;h=v8/3c3f/0/0/%2a/y;254456627;0-0;0;65352596;3454-728/90;46536741/46553464/1;;%7Esscs=%3fhttp://www.nature.com/content/reviews/KeyAdvances/index.html?WT.mc_id=WBN_KAM2012"><img src="http://s0.2mdn.net/viewad/1437696/23157-03-728-eBook.gif" alt="Click here to find out more!" border="0"></a>

<div id="header" class="constrain v2">

                                                <h1>

                                                                        <a href="http://www.nature.com/srep">Scientific Reports</a>

                                                        </h1>

                <form method="get" action="/search/executeSearch" id="header-search" class="search-form">

                        <fieldset>                        

                                                                                                        <input id="header-keyword" name="sp-q" value="" class="txt" maxlength="200" type="search">

                                                                        <a href="http://www.nature.com/search/adv_search?sp-q-1=srep">Advanced search</a>

                                                        </fieldset>

                </form><br></div>                     <div class="top-links cleared">

                                <p class="article-type"><span class="journal-title">Scientific Reports</span><span class="divider"> | </span>Article <span class="open-access">Open</span></p>

                        </div>

                                                        <h1 class="article-heading">Statistical Laws Governing Fluctuations in Word Use from Word Birth to Word Death</h1><ul class="authors citation-authors"><li class="vcard  c1">

                                <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#auth-1" class="name"><span class="fn">Alexander M. Petersen</span></a><span class="comma">,</span>                    </li><li class="vcard">

                                <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#auth-2" class="name"><span class="fn">Joel Tenenbaum</span></a><span class="comma">,</span>                   </li><li class="vcard no-comma">

                                <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#auth-3" class="name"><span class="fn">Shlomo Havlin</span></a>                      </li><li class="vcard last-author no-comma">

                                & <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#auth-4" class="name"><span class="fn">H. Eugene Stanley</span></a>                    </li></ul>

                                        <ul id="author-links" class="cleared"><li class="first"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#affil-auth">Affiliations</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#contrib-auth">Contributions</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#corres-auth">Corresponding author</a></li></ul>

        <dl class="citation"><dd class="journal-title">Scientific Reports</dd><dd class="volume">

                        2<span>,</span></dd><dt class="seq">Article number:</dt><dd class="page">313</dd><dd class="doi">doi:10.1038/srep00313</dd></dl>

        <dl class="citation dates"><dt class="received first">Received</dt><dd>

                                                                                                                        17 February 2012

                                                                                                </dd><dt class="accepted">Accepted</dt><dd>

                                                                                                                        24 February 2012

                                                                                                </dd><dt class="published-online">Published</dt><dd>

                                                                                                                        15 March 2012

                                                                                                </dd></dl>

                                                                        <div class="section first no-nav no-title first-no-nav">

                                                <div class="content">

<div class="article-tools">

        <h2 class="hidden">Article tools</h2>

        <ul class="box"><li class="print"><a id="print-link" class="track">Print</a></li><li class="sendtofriend"><a href="http://www.nature.com/srep/foxtrot/svc/mailform?doi=10.1038/srep00313&file=/srep/2012/120315/srep00313/full/srep00313.html">Email</a></li>

<li><h3 id="toggle-download-links"><a title="pdf options">pdf options</a></h3>

                                                <div class="download-popup" id="download-links">

                                                </div>

                                        </li><li class="download-citation"><a href="http://www.nature.com/srep/2012/120315/srep00313/ris/srep00313.ris">Download citation</a></li><li class="order-reprints"><a href="https://s100.copyright.com/AppDispatchServlet?publisherName=NPGR&publication=Scientific+Reports&title=Statistical+Laws+Governing+Fluctuations+in+Word+Use+from+Word+Birth+to+Word+Death&contentID=10.1038%2Fsrep00313&volumeNum=2&issueNum=&numPages=&pageNumbers=pp%24%7BnPage.startPage%7D&orderBeanReset=true&publicationDate=2012-03-15&author=Alexander+M.+Petersen%2C+Joel+Tenenbaum%2C+Shlomo+Havlin%2C+H.+Eugene+Stanley">Order reprints</a></li>

<li class="rights"><a href="https://s100.copyright.com/AppDispatchServlet?publisherName=NPG&publication=Scientific+Reports&title=Statistical+Laws+Governing+Fluctuations+in+Word+Use+from+Word+Birth+to+Word+Death&contentID=10.1038%2Fsrep00313&volumeNum=2&issueNum=&numPages=&pageNumbers=pp%24%7BnPage.startPage%7D&publicationDate=2012-03-15&cc=y&author=Alexander+M.+Petersen%2C+Joel+Tenenbaum%2C+Shlomo+Havlin%2C+H.+Eugene+Stanley">Rights and permissions</a></li>

<li>

                                <h3 id="toggle-bookmarking-links"><a title="Share/bookmark">Share/bookmark</a></h3>

                                <div class="bookmarking-popup" id="bookmarking-links">

                                </div>

                        </li></ul>

</div>

<div class="first-paragraph" id="first-paragraph"><p>We analyze the dynamic properties of 10<sup>7</sup>

 words recorded in English, Spanish and Hebrew over the period 1800–2008

 in order to gain insight into the coevolution of language and culture. 

We report language independent patterns useful as benchmarks for 

theoretical models of language evolution. A significantly decreasing 

(increasing) trend in the birth (death) rate of words indicates a recent

 shift in the selection laws governing word use. For new words, we 

observe a peak in the growth-rate fluctuations around 40 years after 

introduction, consistent with the typical entry time into standard 

dictionaries and the human generational timescale. Pronounced changes in

 the dynamics of language during periods of war shows that word 

correlations, occurring across time and between words, are largely 

influenced by coevolutionary social, technological, and political 

factors. We quantify cultural memory by analyzing the long-term 

correlations in the use of individual words using detrended fluctuation 

analysis.</p></div>

                                                                <div class="article-keywords cleared">

                        <h2>Subject terms:</h2>

                        <ul><li class="first"><a href="http://www.nature.com/search/executeSearch?sp-advanced=true&sp-m=0&siteCode=srep&sp-p=all&sp-q-9[SREP]=1&sp-p-2=all&sp-p-3=all&subject=/631/181&sp-s=date_descending&sp-c=25&facets=new">Evolution</a></li>

<li><a href="http://www.nature.com/search/executeSearch?sp-advanced=true&sp-m=0&siteCode=srep&sp-p=all&sp-q-9[SREP]=1&sp-p-2=all&sp-p-3=all&subject=/639/705/531&sp-s=date_descending&sp-c=25&facets=new">Statistics</a></li>

<li><a href="http://www.nature.com/search/executeSearch?sp-advanced=true&sp-m=0&siteCode=srep&sp-p=all&sp-q-9[SREP]=1&sp-p-2=all&sp-p-3=all&subject=/639/766/25&sp-s=date_descending&sp-c=25&facets=new">Applied physics</a></li>

<li class="last"><a href="http://www.nature.com/search/executeSearch?sp-advanced=true&sp-m=0&siteCode=srep&sp-p=all&sp-q-9[SREP]=1&sp-p-2=all&sp-p-3=all&subject=/639/766/530&sp-s=date_descending&sp-c=25&facets=new">Statistical physics, thermodynamics and nonlinear dynamics</a></li>

</ul>

                </div>

<div class="figures-at-a-glance">

        <h2>Figures at a glance</h2>

        <div class="figure-browser">

                <a class="nav left inactive" tabindex="-1"><span>left</span></a><div style="width:568px" class="thumbs masking"><ol style><li style>

        <a><img id="figure-browser-thumb-srep00313-f1" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f1.jpg" alt="" title="Word extinction." class="fig" style="width: 143px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f2" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f2.jpg" alt="" title="Dramatic shift in the birth rate and death rate of words." class="fig" style="width: 142px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f3" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f3.jpg" alt="" title="Survival of the fittest in the entry process of words." class="fig" style="width: 147.5px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f4" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f4.jpg" alt="" title="The significance of historical events on the evolution of language." class="fig" style="width: 134px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f5" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f5.jpg" alt="" title="Quantifying the tipping point for word use." class="fig" style="width: 65px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f6" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f6.jpg" alt="" title="Common leptokurtic growth distribution for new words and common words." class="fig" style="width: 59.5px; height: 100px;"></a>

                                        </li><li style>

        <a><img id="figure-browser-thumb-srep00313-f7" src="http://www.nature.com/srep/2012/120315/srep00313/carousel/srep00313-f7.jpg" alt="" title="Scaling in the growth rate fluctuations of words." class="fig" style="width: 76.5px; height: 100px;"></a>

                                        </li></ol></div><a class="nav right"><span>right</span></a>

        </div>    

</div>

                                                        </div>

                        </div>

                                                                        <div class="section   expanded" id="introduction">

                                                        <h1 class="section-heading toggle"><a title="Introduction">Introduction</a></h1>

                                                <div class="content">

                                                                                                                                                <ul class="section-nav cleared"><li class="current first"><span>Introduction</span></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#results">Results</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#discussion">Discussion</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#methods">Methods</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#references">References</a></li><li>

<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#acknowledgments">Acknowledgements</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#author-information">Author information</a></li>

<li class="last"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary information</a></li></ul>

                                                                <p>Statistical laws describing the properties of word use, such as Zipf 's law<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref1" title="Zipf, G. K. Human Behaviour and the Principle of Least Effort: An Introduction to Human Ecology (Addison-Wesley, CambridgeMA 1949)." id="ref-link-8">1</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref2" title="Tsonis, A. A., Schultz, C. & Tsonis, P. A. Zipf's law and the structure and evolution of languages. Complexity 3, 12–13 (1997)." id="ref-link-9">2</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref3" title="Serrano, M. Á., Flammini, A. & Menczer, F. Modeling Statistical Properties of Written Text. PLoS ONE 4 (4), e5372 (2009)." id="ref-link-10">3</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref4" title="Ferrer i Cancho, R. & Solé, R. V. Two regimes in the frequency of words and the origin of complex lexicons: Zipf's law revisited. Journal of Quantitative Linguistics 8, 165–173 (2001)." id="ref-link-11">4</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref5" title="Ferrer i Cancho, R. The variation of Zipf's law in human language. Eur. Phys. J. B 44, 249–257 (2005)." id="ref-link-12">5</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref6" title="Ferrer i Cancho, R. & Solé, R. V. Least effort and the origins of scaling in human language. Proc. Natl. Acad. Sci. USA 100, 788–791(2003)." id="ref-link-13">6</a></sup> and Heaps' law<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref7" title="Heaps, H. S. Information Retrieval: Computational and Theoretical Aspects. (Academic Press, New York NY, 1978)." id="ref-link-14">7</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref8" title="Bernhardsson, S., Correa da Rocha, L. E. & Minnhagen, P. The meta book and size-dependent properties of written language. New J. of Physics 11, 123015 (2009)." id="ref-link-15">8</a></sup>,

 have been thoroughly tested and modeled. These statistical laws are 

based on static snapshots of written language using empirical data 

aggregated over relatively small time periods and comprised of 

relatively small corpora ranging in size from individual texts<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref1" title="Zipf, G. K. Human Behaviour and the Principle of Least Effort: An Introduction to Human Ecology (Addison-Wesley, CambridgeMA 1949)." id="ref-link-16">1</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref2" title="Tsonis, A. A., Schultz, C. & Tsonis, P. A. Zipf's law and the structure and evolution of languages. Complexity 3, 12–13 (1997)." id="ref-link-17">2</a> to relatively small collections of topical texts<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref3" title="Serrano, M. Á., Flammini, A. & Menczer, F. Modeling Statistical Properties of Written Text. PLoS ONE 4 (4), e5372 (2009)." id="ref-link-18">3</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref4" title="Ferrer i Cancho, R. & Solé, R. V. Two regimes in the frequency of words and the origin of complex lexicons: Zipf's law revisited. Journal of Quantitative Linguistics 8, 165–173 (2001)." id="ref-link-19">4</a>.

 However, language is a fundamentally dynamic complex system, consisting

 of heterogenous entities at the level of the units (words) and the 

interacting users (us). Hence, we begin this paper with two questions: 

(i) Do languages exhibit dynamical patterns? (ii) Do individual words 

exhibit dynamical patterns?</p><p>The coevolutionary nature of language 

requires analysis both at the macro and micro scale. Here we apply 

interdisciplinary concepts to empirical language data collected in a 

massive book digitization effort by <i>Google Inc.</i>, which recently 

unveiled a database of words in seven languages, after having scanned 

approximately 4% of the world's books. The massive “n-gram” project<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref9" title="Google n-gram project. http://ngrams.googlelabs.com" id="ref-link-20">9</a></sup>

 allows for a novel view into the growth dynamics of word use and the 

birth and death processes of words in accordance with evolutionary 

selection laws<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref10" title="Nowak, M. A. Evolutionary Dynamics: exploring the equations of life (BelknapHarvard, Cambridge MA, 2006)." id="ref-link-21">10</a></sup>.</p>

<p>A recent analysis of this database by Michel et al.<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref11" title="Michel, J.-B. et al. Quantitative Analysis of Culture Using Millions of Digitized Books. Science 331, 176–182 (2011)." id="ref-link-22">11</a></sup>

 addresses numerous well-posed questions rooted in cultural anthropology

 using case studies of individual words. Here we take an alternative 

approach by analyzing the <i>aggregate</i> properties of the language dynamics recorded in the <i>Google Inc.</i>

 data in a systematic way, using the word counts of every word recorded 

over the 209-year time period 1800 – 2008 in the English, Spanish, and 

Hebrew text corpora. This period spans the incredibly rich cultural 

history that includes several international wars, revolutions, and 

numerous technological paradigm shifts. Together, the data comprise over

 1 × 10<sup>7</sup> distinct words. We use concepts from economics to 

gain quantitative insights into the role of exogenous factors on the 

evolution of language, combined with methods from statistical physics to

 quantify the competition arising from correlations between words<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref12" title="Sigman, M. & Cecchi, G. A. Global organization of the Wordnet lexicon. Proc. Natl. Acad. Sci. 99, 1742–1747 (2002)." id="ref-link-23">12</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref13" title="Steyvers, M. & Tenenbaum, J. B. The large-scale structure of semantic networks: statistical analyses and a model of semantic growth. Cogn. Sci. 29 41–78 (2005)." id="ref-link-24">13</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref14" title="Alvarez-Lacalle, E., Dorow, B., Eckmann, J.-P. & Moses, E. Hierarchical structures induce long-range dynamical correlations in written texts. Proc. Natl. Acad. Sci. 103, 7956–7961 (2006)." id="ref-link-25">14</a></sup> and the memory-driven autocorrelations in <i>u<sub>i</sub></i>(<i>t</i>) across time<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref15" title="Montemurro, M. A. & Pury, P. A. Long-range fractal correlations in literary corpora. Fractals 10, 451–461 (2002)." id="ref-link-26">15</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref16" title="Corral, A., Ferrer i Cancho, R. & Diaz-Guilera, A. Universal complex structures in written language. e-print, arXiv:0901.2924v1 (2009)." id="ref-link-27">16</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref17" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Beyond word frequency: bursts, lulls, and scaling in the temporal distributions of words. PLoS ONE 4, e7678 (2009)." id="ref-link-28">17</a></sup>.</p>

<p>For

 each corpora comprising millions of distinct words, we use a general 

word-count framework which accounts for the underlying growth of 

language over time. We first define the quantity ui(t) as the number of uses of word i in year t.

 Since the number of books and the number of distinct words have grown 

dramatically over time, we define the relative word use, <i>f<sub>i</sub></i>(<i>t</i>), as the fraction of uses of word <i>i</i> out of all word uses in the same year, </p><p>    

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m1.jpg" alt="" class="align-middle" style="width:466px;height:19px;">

</p><p>where the quantity   

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m15.jpg" alt="" class="align-middle" style="width:147px;height:25px;">

is the total number of indistinct word uses digitized from books printed in year t and Nw(t) is the total number of distinct words digitized from books printed in year t.

 To quantify the dynamic properties of word prevalence at the micro 

scale and their relation to socio-political factors at the macro scale, 

we analyze the logarithmic growth rate commonly used in finance and 

economics, </p><p>  

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m2.jpg" alt="" class="align-middle" style="width:413px;height:44px;">

</p><p>Here we analyze the single year growth rates, Δ<i>t</i>≡1.</p><p>The relative use <i>f<sub>i</sub></i>(<i>t</i>)

 depends on the intrinsic grammatical utility of the word (related to 

the number of “proper” sentences that can be constructed using the 

word), the semantic utility of the word (related to the number of 

meanings a given word can convey), and other idiosyncratic details 

related to topical context. Neutral null models for the evolution of 

language define the relative use of a word as its “fitness”<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref18" title="Blythe, R. A. Neutral evolution: a null model for language dynamics. To appear in ACS Advances in Complex Systems." id="ref-link-29">18</a>.

 In such models, the word frequency is the only factor determining the 

survival capacity of a word. In reality, word competition depends on 

more subtle features of language, such as the cognitive aspects of 

efficient communication. For example, the emergence of robust 

categorical naming patterns observed across many cultures is regarded to

 be the result of complex discrimination tactics shared by intelligent 

communicators. This is evident in the finite set of words describing the

 continuous spectrum of color names, emotional states, and other 

categorical sets<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref19" title="Loreto, V., Baronchelli, A., Mukherjee, A., Puglisi, A. & Tria, F. Statistical physics of language dynamics. J. Stat. Mech. 2011, P04006 (2011)." id="ref-link-30">19</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref20" title="Baronchelli, A., Loreto, V. & Steels, L. In-depth analysis of the Naming Game dynamics: the homogenous mixing case. Int. J. of Mod. Phys. C 19, 785–812 (2008)." id="ref-link-31">20</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref21" title="Puglisi, A., Baronchelli, A. & Loreto, V. Cultural route to the emergence of linguistic categories. Proc. Natl. Acad. Sci. 105, 7936–7940 (2008)." id="ref-link-32">21</a></sup>.</p>

<p>In

 our analysis we treat words with equivalent meanings but with different

 spellings (e.g. color versus colour) as distinct words, since we view 

the competition among synonyms and alternative spellings in the 

linguistic arena as a key ingredient in complex evolutionary dynamics<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref10" title="Nowak, M. A. Evolutionary Dynamics: exploring the equations of life (BelknapHarvard, Cambridge MA, 2006)." id="ref-link-33">10</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref22" title="Nowak, M. A., Komarova, N. L. & Niyogi, P. Computational and evolutionary aspects of language. Nature 417, 611–617 (2002)." id="ref-link-34">22</a></sup>.

 For instance, with the advent of automatic spell-checkers in the 

digital era, words recognized by spell-checkers receive a significant 

boost in their “reproductive fitness” at the expense of their misspelled

 or unstandardized counterparts.</p><p>In the linguistic arena, not just “defective” words die, even significantly used words can become extinct. <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f1">Fig. 1</a>

 shows three once-significant words: “Radiogram,” “Roentgenogram,” and 

“Xray”. These words compete for the majority share of nouns referring to

 what is now commonly known as an “X-ray” (note that such dashes are 

discarded in Google's digitization process). The word “Roentgenogram” 

has since become extinct, even though it was the most common term for 

several decades in the 20th century. It is likely that two main factors –

 (i) communication and information efficiency bias toward the use of 

shorter words<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref23" title="Piantadosi, S. T., Tily, H. & Gibson, E. Word lengths are optimized for efficient communication.. Proc. Natl. Acad. Sci. USA 108, 3526–3529 (2011)." id="ref-link-35">23</a></sup>

 and (ii) the adoption of English as the leading global language for 

science – secured the eventual success of the word “Xray” by the year 

1980. It goes without saying that there are many social and 

technological factors driving language change.</p>                                                                

        <div class="figure cleared" id="f1">

                                        <span class="legend">

                                Figure 1: Word extinction.                      </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F1.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f1.jpg" alt="Word extinction." class="fig" style="width:600px;height:420px;">

</a>

                                <div class="description"><p>The English word “Roentgenogram” derives

 from the Nobel prize winning scientist and discoverer of the X-ray, 

Wilhelm Röntgen (1845–1923). The prevalence of this word was quickly 

challenged by two main competitors, “X-ray” (recorded as “Xray” in the 

database) and “Radiogram.” The arithmetic mean frequency of these three 

time series is relatively constant over the 80-year period 1920–2000, 〈<i> f </i>〉 ≈ 10<sup>–7</sup>,

 illustrating the limited linguistic “market share” that can be achieved

 by any competitor. We conjecture that the main reason “Xray” has a 

higher frequency is due to the “fitness gain” from its efficient short 

word length and also due to the fact that English has become the base 

language for scientific publication.</p></div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F1.html">Full size image (233 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li><li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f2" title="Dramatic shift in the birth rate and death rate of words.">Next figure</a></li>

</ul>

        </div>

        <p>We begin this paper by analyzing the vocabulary growth of each 

language over time. We then analyze the lifetime growth trajectories of 

the set of words that are new to each language to gain quantitative 

insight into “infant” and “adult” stages of individual words. Using two 

sets of words, (i) the relatively new words, and (ii) the most common 

words, we analyze the statistical properties of word growth. 

Specifically, we calculate the probability density function <i>P</i>(<i>r</i>) of growth rate <i>r</i> and calculate the size-dependence of the standard deviation <i>σ</i>(<i>r</i>)

 of growth rates. In order to gain insight into the long-term cultural 

memory, we conclude the analysis by measuring the autocorrelations in 

word use by applying detrended fluctuation analysis (DFA) to individual <i>f<sub>i</sub></i>(<i>t</i>).</p>

                                                        </div>

                        </div>

                                                                        <div class="section   expanded" id="results">

                                                        <h1 class="section-heading toggle"><a title="Results">Results</a></h1>

                                                <div class="content">

                                                                                                                                                <ul class="section-nav cleared"><li class="first"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#introduction">Introduction</a></li><li class="current"><span>Results</span></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#discussion">Discussion</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#methods">Methods</a></li><li>

<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#references">References</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#acknowledgments">Acknowledgements</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#author-information">Author information</a></li><li class="last"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary information</a></li>

</ul>

                                                                <h2>Quantifying the birth rate and the death rate of words</h2><p>Just

 as a new species can be born into an environment, a word can emerge in a

 language. Evolutionary selection laws can apply pressure on the 

sustainability of new words since there are limited resources (topics, 

books, etc.) for the use of words. Along the same lines, old words can 

be driven to extinction when cultural and technological factors limit 

the use of a word, in analogy to the environmental factors that can 

change the survival capacity of a living species by altering its ability

 to survive and reproduce.</p><p>We define the birth year <i>y</i><sub>0,<i>i</i></sub> as the year <i>t</i> corresponding to the first instance of         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m16.jpg" alt="" class="align-middle" style="width:104px;height:19px;">

, where         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m17.jpg" alt="" class="align-middle" style="width:21px;height:19px;">

 is median word use     

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m18.jpg" alt="" class="align-middle" style="width:141px;height:20px;">

 of a given word over its recorded lifetime in the <i>Google</i> database. Similarly, we define the death year <i>y<sub>f,i</sub></i> as the last year <i>t</i> during which the word use satisfies     

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m19.jpg" alt="" class="align-middle" style="width:105px;height:19px;">

. We use the relative word use threshold        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m20.jpg" alt="" class="align-middle" style="width:53px;height:19px;">

 in order to avoid anomalies arising from extreme fluctuations in <i>f<sub>i</sub></i>(<i>t</i>) over the lifetime of the word. The results obtained using threshold        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m21.jpg" alt="" class="align-middle" style="width:53px;height:19px;">

 did not show a significant qualitative difference.</p><p>The significance of word births Δ<i><sub>b</sub></i>(<i>t</i>) and word deaths Δ<i><sub>d</sub></i>(<i>t</i>) for each year <i>t</i> is related to the vocabulary size <i>N<sub>w</sub></i>(<i>t</i>) of a given language. We define the birth rate <i>γ<sub>b</sub></i> and death rate <i>γ<sub>d</sub></i> by normalizing the number of births Δ<i><sub>b</sub></i>(<i>t</i>) and deaths Δ<i><sub>d</sub></i>(<i>t</i>) in a given year <i>t</i> to the total number of distinct words <i>N<sub>w</sub></i>(<i>t</i>) recorded in the same year <i>t</i>, so that </p>

<p>       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m3.jpg" alt="" class="align-middle" style="width:315px;height:51px;">

</p><p>This definition yields a proxy for the rate of emergence and 

disappearance of words. We restrict our analysis to words with 

birth-death duration <i>y<sub>f,i</sub></i> − <i>y</i><sub>0,<i>i</i></sub> + 1 ≥ 2 years and to words with first recorded use <i>t</i><sub>0,<i>i</i></sub> ≥ 1700, which selects for relatively new words in the history of a language.</p>

<p>The <i>γ<sub>b</sub></i>(<i>t</i>) and <i>γ<sub>d</sub></i>(<i>t</i>) time series plotted in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f2">Fig. 2</a>

 for the 200-year period 1800–2000 show trends that intensifies after 

the 1950s. The modern era of publishing, which is characterized by more 

strict editing procedures at publishing houses, computerized word 

editing and automatic spell-checking technology, shows a drastic 

increase in the death rate of words. Using visual inspection we verify 

most changes to the vocabulary in the last 10–20 years are due to the 

extinction of misspelled words and nonsensical print errors, and to the 

decreased birth rate of new misspelled variations and genuinely new 

words. This phenomenon reflects the decreasing marginal need for new 

words, consistent with the sub-linear Heaps' law observed for all Google

 1-gram corpora in<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-36">24</a></sup>. Moreover, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f3">Fig. 3</a> shows that <i>γ<sub>b</sub></i>(<i>t</i>) is largely comprised of words with relatively large <i>f</i> while <i>γ<sub>d</sub></i>(<i>t</i>) is almost entirely comprised of words with relatively small <i>f</i> (see also <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S1</a> in the <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary Information</a> (SI) text). Thus, the new words of tomorrow are likely be core words that are widely used.</p>

        <div class="figure cleared" id="f2">

                                        <span class="legend">

                                Figure 2: Dramatic shift in the birth rate and death rate of words.                     </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F2.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f2.jpg" alt="Dramatic shift in the birth rate and death rate of words." class="fig" style="width:600px;height:423px;">

</a>

                                <div class="description"><p>The word birth rate <i>γ<sub>b</sub></i>(<i>t</i>) and the word death rate <i>γ<sub>d</sub></i>(<i>t</i>)

 show marked underlying changes in word use competition which affects 

the entry rate and the sustainability of existing words. The modern 

print era shows a marked increase in the death rate of words which 

likely correspond to low fitness, misspelled and (technologically) 

outdated words. A simultaneous decrease in the birth rate of new words 

is consistent with the decreasing marginal need for new words indicated 

by the sub-linear allometric scaling between vocabulary size and total 

corpus size (Heaps' law)<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-1">24</a></sup>.

 Interestingly, we quantitatively observe the impact of the Balfour 

Declaration in 1917, the circumstances surrounding which effectively 

rejuvenated Hebrew as a national language, resulting in a 5-fold 

increase in the birth rate of words in the Hebrew corpus.</p></div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F2.html">Full size image (351 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f1" title="Word extinction.">Previous figure</a></li><li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li>

<li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f3" title="Survival of the fittest in the entry process of words.">Next figure</a></li></ul>

        </div>

        <div class="figure cleared" id="f3">

                                        <span class="legend">

                                Figure 3: Survival of the fittest in the entry process of words.                        </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F3.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f3.jpg" alt="Survival of the fittest in the entry process of words." class="fig" style="width:600px;height:407px;">

</a>

                                <div class="description"><p>Trends in the relative uses of words 

that either were born or died in a given year show that the entry-exit 

forces largely depend on the relative use of the word. For the English 

corpus, we calculate the average of the median lifetime relative use, 

〈Med(<i>f<sub>i</sub></i>)〉, for all words born in year <i>t</i> (top panel) and for all words that died in year <i>t</i>

 (bottom panel), which shows a 5-year moving average (dashed black 

line). There is a dramatic increase in the relative use (“utility”) of 

newborn words over the last 20–30 years, likely corresponding to new 

technical terms, which are necessary for the communication of core 

modern technology and ideas. Conversely, with higher editorial standards

 and the recent use of word processors which include spelling 

standardization technology, the words that are dying are those words 

with low relative use. We confirm by visual inspection that the lists of

 dying words contain mostly misspelled and nonsensical words.</p></div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F3.html">Full size image (250 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f2" title="Dramatic shift in the birth rate and death rate of words.">Previous figure</a></li>

<li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li><li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f4" title="The significance of historical events on the evolution of language.">Next figure</a></li>

</ul>

        </div>

        <p>We note that the main source of error in the calculation of birth 

and death rates are OCR (optical character recognition) errors in the 

digitization process, which could be responsible for a significant 

fraction of misspelled and nonsensical words existing in the data. An 

additional source of error is the variety of orthographic properties of 

language that can make very subtle variations of words, for example 

through the use of hyphens and capitalization, appear as distinct words 

when applying OCR. The digitization of many books in the computer era 

does not require OCR transfer, since the manuscripts are themselves 

digital, and so there may be a bias resulting from this recent paradigm 

shift. We confirm that the statistical patterns found using post 2000- 

data are consistent with the patterns that extend back several hundred 

years<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-37">24</a></sup>.</p>

<p>Complementary

 to the death of old words is the birth of new words, which are commonly

 associated with new social and technological trends. Topical words in 

media can display long-term persistence patterns analogous to earthquake

 shocks<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref25" title="Klimek, P., Bayer, W. & Thurner, S. The blogosphere as an excitable social medium: Richter's and Omori's Law in media coverage. Physica A 390, 3870–3875 (2011)." id="ref-link-38">25</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref26" title="Sano, Y., Yamada, K., Watanabe, H., Takayasu, H. & Takayasu, M. Empirical analysis of collective human behavior for extraordinary events in blogosphere. (preprint) arXiv:1107.4730 [physics.soc-ph]." id="ref-link-39">26</a></sup>,

 and can result in a new word having larger fitness than related 

“out-of-date” words (e.g. blog vs. log, email vs. memo). Here we show 

that a comparison of the growth dynamics between different languages can

 also illustrate the local cultural factors that influence different 

regions of the world. <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f4">Fig. 4</a>

 shows how international crisis can lead to globalization of language 

through common media attention and increased lexical diffusion. Notably,

 as illustrated in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f4">Fig. 4(a)</a>,

 we find that international conflict only perturbed the participating 

languages, while minimally affecting the languages of the 

nonparticipating regions, e.g. the Spanish speaking countries during 

WWII.</p>                                                         

        <div class="figure cleared" id="f4">

                                        <span class="legend">

                                Figure 4: The significance of historical events on the evolution of language.                   </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F4.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f4.jpg" alt="The significance of historical events on the evolution of language." class="fig" style="width:600px;height:448px;">

</a>

                                <div class="description"><p>The standard deviation <i>σ</i>(<i>t</i>)

 of growth rates demonstrates the sensitivity of language to 

international events (e.g. World War II). For all languages there is an 

overall decreasing trend in <i>σ</i>(<i>t</i>) over the period 1850–2000. However, the increase in <i>σ</i>(<i>t</i>)

 during WWII represents a“globalization” effect, whereby societies are 

brought together by a common event and a unified media. Such contact 

between relatively isolated systems necessarily leads to information 

flow, much as in the case of thermodynamic heat flow between two 

systems, initially at different temperatures, which are then brought 

into contact. (a) <i>σ</i>(<i>t</i>) calculated for the relatively new words with <i>T<sub>i</sub></i> ≥ 100 years. The Spanish corpus does not show an increase in <i>σ</i>(<i>t</i>) during World War II, indicative of the relative isolation of South America and Spain from the European conflict. (b) <i>σ</i>(<i>t</i>) for 4 sets of relatively new words that meet the criteria <i>T<sub>i</sub></i> ≥ <i>T<sub>c</sub></i> and <i>t<sub>i</sub></i><sub>,0</sub> ≥ 1800. The oldest “new” words (<i>T<sub>c</sub></i> = 200) demonstrate the most significant increase in <i>σ</i>(<i>t</i>) during World War II, with a peak around 1945. (c) The standard deviation <i>σ</i>(<i>t</i>)

 for the most common words is decreasing with time, suggesting that they

 have saturated and are being “crowded out” by new competitors. This set

 of words meets the criterion that the average relative use exceeds a 

threshold, 〈<i>f<sub>i</sub></i>〉 ≥ <i>f<sub>c</sub></i>, which we define for each corpus. (d) We compare the variation <i>σ</i>(<i>t</i>) for relatively new English words, using <i>T<sub>i</sub></i> ≥ 100, with the 20-year moving average over the time period 1820–1988. The deviations show that <i>σ</i>(<i>t</i>)

 increases abruptly during times of conflict, such as the American 

CivilWar (1861–1865), World War I (1914–1918) and World War II 

(1939–1945), and also during the 1980s and 1990s, possibly as a result 

of new digital media (e.g. the internet) which offer new environments 

for the evolutionary dynamics of word use. <i>D</i>(<i>t</i>) is the difference between the moving average and <i>σ</i>(<i>t</i>).</p></div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F4.html">Full size image (335 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f3" title="Survival of the fittest in the entry process of words.">Previous figure</a></li>

<li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li><li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f5" title="Quantifying the tipping point for word use.">Next figure</a></li>

</ul>

        </div>

        <h2>The lifetime trajectory of words</h2><p>Between birth and death, 

one contends with the interesting question of how the use of words 

evolve when they are “alive.” We focus our efforts toward quantifying 

the relative change in word use over time, both over the word lifetime 

and throughout the course of history. In order to analyze separately 

these two time frames, we select two sets of words: (i) relatively new 

words with “birth year” <i>t</i><sub>0,<i>i</i></sub> later than 1800, so that the relative age <i>τ</i> ≡ <i>t</i> − <i>t</i><sub>0,<i>i</i></sub> of word <i>i</i> is the number of years after the word's first occurrence in the database, and (ii) relatively common words, typically with <i>t</i><sub>0,<i>i</i></sub> < 1800.</p>

<p>We analyze dataset (i) words (summary statistics in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Table S1</a>)

 so that we can control for properties of the growth dynamics that are 

related to the various stages of a word's life trajectory (e.g. an 

“infant” phase, an “adolescent” phase, and a “mature” phase). For 

comparison with the young words, we also analyze the growth rates of 

dataset (ii) words in the next section (summary statistics in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Table S2</a>).

 These words are presumably old enough that they are in a stable mature 

phase. We select dataset (ii) words using the criterion 〈<i>f<sub>i</sub></i>〉 ≥ <i>f<sub>c</sub></i>, where      

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m22.jpg" alt="" class="align-middle" style="width:144px;height:24px;">

 is the average relative use of the word <i>i</i> over the word's lifetime <i>T<sub>i</sub></i> = <i>t</i><sub>0,<i>f</i></sub> − <i>t</i><sub>0,<i>i</i></sub> + 1, and <i>f<sub>c</sub></i> is a cutoff threshold derived form the Zipf rank-frequency distribution<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref1" title="Zipf, G. K. Human Behaviour and the Principle of Least Effort: An Introduction to Human Ecology (Addison-Wesley, CambridgeMA 1949)." id="ref-link-40">1</a></sup> calculated for each corpus<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-41">24</a></sup>. In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Table S3</a> we summarize the entire data for the 209-year period 1800–2008 for each of the four <i>Google</i> language sets analyzed.</p>

<p>Modern

 words typically are born in relation to technological or cultural 

events, e.g. “Antibiotics.” We ask if there exists a characteristic time

 for a word's general acceptance. In order to search for patterns in the

 growth rates as a function of relative word age, for each new word <i>i</i> at its age <i>τ</i> , we analyze the “use trajectory” <i>f<sub>i</sub></i>(<i>τ</i>) and the “growth rate trajectory” <i>r<sub>i</sub></i>(<i>τ</i>). So that we may combine the individual trajectories of words of varying prevalence, we normalize each <i>f<sub>i</sub></i>(<i>τ</i>) by its average 〈<i>f<sub>i</sub></i>〉, obtaining a normalized use trajectory         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m23.jpg" alt="" class="align-middle" style="width:115px;height:21px;">

. We perform an analogous normalization procedure for each <i>r<sub>i</sub></i>(<i>τ</i>), normalizing instead by the growth rate standard deviation <i>σ</i>[<i>r<sub>i</sub></i>], so that  

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m24.jpg" alt="" class="align-middle" style="width:124px;height:19px;">

 (see the Methods section for further detailed description).</p><p>Since

 some words will die and other words will increase in use as a result of

 the standardization of language, we hypothesize that the average growth

 rate trajectory will show large fluctuations around the time scale for 

the transition of a word into regular use. In order to quantify this 

transition time scale, we create a subset {<i>i</i> |<i>T<sub>c</sub></i>} of word trajectories <i>i</i> by combining words that meets an age criteria <i>T<sub>i</sub></i> ≥ <i>T<sub>c</sub></i>. Thus, <i>T<sub>c</sub></i>

 is a threshold to distinguish words that were born in different 

historical eras and which have varying longevity. For the values <i>T<sub>c</sub></i> = 25, 50, 100, and 200 years, we select all words that have a lifetime longer than <i>T<sub>c</sub></i> and calculate the average and standard deviation for each set of growth rate trajectories as a function of word age <i>τ</i>.</p>

<p>In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f5">Fig. 5</a> we plot  

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m25.jpg" alt="" class="align-middle" style="width:76px;height:19px;">

 for the English corpus, which shows a broad peak around <i>τ<sub>c</sub></i> ≈ 30–50 years for each <i>T<sub>c</sub></i>

 subset before the fluctuations saturate after the word enters a stable 

growth phase. A similar peak is observed for each corpus analyzed (<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Figs. S4–S7</a>).

 This single-peak growth trajectory is consistent with theoretical 

models for logistic spreading and the fixation of words in a population 

of learners<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref27" title="Solé, R. V., Corominas-Murtra, B. & Fortuny, J. Diversity, competition, extinction: the ecophysics of language change. J. R. Soc. Interface 7, 1647–1664 (2010)." id="ref-link-42">27</a></sup>. Also, since we weight the average according to 〈<i>f<sub>i</sub></i>〉, the time scale <i>τ<sub>c</sub></i>

 is likely associated with the characteristic time for a new word to 

reach sufficiently wide acceptance that the word is included in a 

typical dictionary. We note that this time scale is close to the 

generational time scale for humans, corroborating evidence that 

languages require only one generation to drastically evolve<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref27" title="Solé, R. V., Corominas-Murtra, B. & Fortuny, J. Diversity, competition, extinction: the ecophysics of language change. J. R. Soc. Interface 7, 1647–1664 (2010)." id="ref-link-43">27</a></sup>.</p>

        <div class="figure cleared" id="f5">

                                        <span class="legend">

                                Figure 5: Quantifying the tipping point for word use.                   </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F5.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f5.jpg" alt="Quantifying the tipping point for word use." class="fig" style="width:600px;height:922px;">

</a>

                                <div class="description"><p>(a) The maximum in the standard deviation <i>σ</i> of growth rates during the “adolescent” period <i>τ</i>

 ≈ 30–50 indicates the characteristic time scale for words being 

incorporated into the standard lexicon, i.e. inclusion in popular 

dictionaries. In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S4</a> we plot the average growth rate trajectory 〈<i>r</i>′(<i>τ</i>|<i>T<sub>c</sub></i>)〉 which shows relatively large positive growth rates during approximately the same 20-year period. (b) The first passage time <i>τ</i><sub>1</sub><sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref53" title="Redner, S. A Guide to First-Passage Processes. (Cambridge University Press, New York, 2001)." id="ref-link-2">53</a></sup> is defined as the number years for the relative use of a new word <i>i</i> to exceed a given <i>f</i>-value for the first time, <i>f<sub>i</sub></i>(<i>τ</i><sub>1</sub>) ≥ <i>f</i>. For relatively new words with <i>T<sub>i</sub></i> ≥ 100 years we calculate the average first-passage time 〈<i>τ</i><sub>1</sub>(<i>f</i>)〉 for a large range of <i>f</i>. We estimate for each language the <i>f<sub>c</sub></i> representing the threshold for a word belonging to the standard “kernel” lexicon<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref4" title="Ferrer i Cancho, R. & Solé, R. V. Two regimes in the frequency of words and the origin of complex lexicons: Zipf's law revisited. Journal of Quantitative Linguistics 8, 165–173 (2001)." id="ref-link-3">4</a></sup>. This method demonstrates that the English corpus threshold <i>f<sub>c</sub></i> ≡ 5 × 10<sup>–8</sup> maps to the first passage time corresponding to the peak period <i>τ</i> ≈ 30 – 50 years in <i>σ</i>(<i>τ</i>) shown in panel (a).</p>

</div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F5.html">Full size image (502 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f4" title="The significance of historical events on the evolution of language.">Previous figure</a></li>

<li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li><li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f6" title="Common leptokurtic growth distribution for new words and common words.">Next figure</a></li>

</ul>

        </div>

        <h2>Empirical laws quantifying the growth rate distribution</h2><p>How 

much do the growth rates vary from word to word? The answer to this 

question can help distinguish between candidate models for the evolution

 of word utility. Hence, we calculate the probability density function 

(pdf) of        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m26.jpg" alt="" class="align-middle" style="width:147px;height:22px;">

. Using this quantity accounts for the fact that we are aggregating growth rates of words of varying ages. The empirical pdf <i>P</i>(<i>R</i>) shown in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f6">Fig. 6</a> is leptokurtic and remarkably symmetric around <i>R</i> ≈ 0. These empirical facts are also observed in studies of the growth rates of economic institutions<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref28" title="Amaral, L. A. N. et al. Scaling Behavior in Economics: I. Empirical Results for Company Growth. J. Phys. I France 7, 621–633 (1997)." id="ref-link-44">28</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref29" title="Fu, D. et al. The growth of business firms: Theoretical framework and empirical evidence. Proc. Natl. Acad. Sci. 102, 18801–18806 (2005)." id="ref-link-45">29</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref30" title="Stanley, M. H. R. et al. Scaling behaviour in the growth of companies. Nature 379, 804–806 (1996)." id="ref-link-46">30</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref31" title="Canning, D. et al. Scaling the volatility of gdp growth rates. Economic Letters 60, 335–341 (1998)." id="ref-link-47">31</a></sup>. Since the <i>R</i> values are normalized and detrended according to the age-dependent standard deviation <i>σ</i>[<i>r′</i>(<i>τ</i>|<i>T<sub>c</sub></i>)], the standard deviation is <i>σ</i>(<i>R</i>) = 1 by construction.</p>

        <div class="figure cleared" id="f6">

                                        <span class="legend">

                                Figure 6: Common leptokurtic growth distribution for new words and common words.                        </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F6.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f6.jpg" alt="Common leptokurtic growth distribution for new words and common words." class="fig" style="width:600px;height:1005px;">

</a>

                                <div class="description"><p>(a) Independent of language, the growth 

rates of relatively new words are distributed according to the Laplace 

distribution centered around <i>R</i> ≈ 0 defined in Eq. (4). The the growth rate <i>R</i>

 defined in Eq. (11) is measured in units of standard deviation, and 

accounts for age-dependent and word-dependent factors. Yet, even with 

these normalizations, we still observe an excess number of |<i>R</i>| ≥ 3<i>σ</i> events. This fact is demonstrated by the leptokurtic form of each <i>P</i>(<i>R</i>),

 which exhibit the excess tail frequencies when compared with a 

unit-variance Gaussian distribution (dashed blue curve). The Gaussian 

distribution is the predicted distribution for the Gibrat proportional 

growth model, which is a candidate neutral null-model for the growth 

dynamics of word use<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref29" title="Fu, D. et al. The growth of business firms: Theoretical framework and empirical evidence. Proc. Natl. Acad. Sci. 102, 18801–18806 (2005)." id="ref-link-4">29</a>.

 The prevalence of large growth rates illustrate the possibility that 

words can have large variations in use even over the course of a year. 

The growth variations are intrinsically related to the dynamics of 

everyday life and reflect the cultural and technological shocks in 

society. We analyze word use data over the time period 1800–2008 for new

 words i with lifetimes <i>T<sub>i</sub></i> ≥ <i>T<sub>c</sub></i>, where we show data calculated for <i>T<sub>c</sub></i> = 100 years. (b) PDF <i>P</i>(<i>r</i>′) of the annual relative growth rate <i>r</i>′ for all words which satisfy 〈<i>f<sub>i</sub></i>〉 ≥ <i>f<sub>c</sub></i>

 (dataset #ii words which are relatively common words). In order to 

select relatively frequently used words, we use the following criteria: <i>T<sub>i</sub></i> ≥ 10 years, 1800 ≤ <i>t</i> ≤ 2008, and 〈<i>f<sub>i</sub></i>〉 ≥ <i>f<sub>c</sub></i>. The growth rate <i>r</i>′

 does not account for age-dependent factors since the common words are 

likely in the mature phase of their lifetime trajectory. In each panel, 

we plot a Laplace distribution with unit variance (solid black lines) 

and the Gaussian distribution with unit variance (dashed blue curve) for

 reference.</p></div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F6.html">Full size image (501 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f5" title="Quantifying the tipping point for word use.">Previous figure</a></li><li class="index">

<a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li><li class="next down-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f7" title="Scaling in the growth rate fluctuations of words.">Next figure</a></li>

</ul>

        </div>

        <p>A candidate model for the growth rates of word use is the Gibrat proportional growth process<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref29" title="Fu, D. et al. The growth of business firms: Theoretical framework and empirical evidence. Proc. Natl. Acad. Sci. 102, 18801–18806 (2005)." id="ref-link-48">29</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref30" title="Stanley, M. H. R. et al. Scaling behaviour in the growth of companies. Nature 379, 804–806 (1996)." id="ref-link-49">30</a></sup>, which predicts a Gaussian distribution for <i>P</i>(<i>R</i>). However, we observe the “tent-shaped” pdf <i>P</i>(<i>R</i>) which is well-approximated by a Laplace (double-exponential) distribution, defined as </p>

<p>       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m4.jpg" alt="" class="align-middle" style="width:397px;height:43px;">

</p><p>Here the average growth rate 〈<i>R</i>〉 has two properties: (a) 〈<i>R</i>〉 ≈ 0 and (b) 〈<i>R</i>〉 ≪ <i>σ</i>(<i>R</i>).

 Property (a) arises from the fact that the growth rate of distinct 

words is quite small on the annual basis (the growth rate of books in 

the Google English database is <i>γ<sub>w</sub></i> ≈ 0.011<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-50">24</a></sup>) and property (b) arises from the fact that <i>R</i>

 is defined in units of standard deviation. Being leptokurtic, the 

Laplace distribution predicts an excess number of events > 3<i>σ</i> as compared to the Gaussian distribution. For example, comparing the likelihood of events above the 3<i>σ</i> event threshold, the Laplace distribution displays a five-fold excess in the probability <i>P</i>(|<i>R</i> − 〈<i>R</i>〉| > 3<i>σ</i>), where         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m27.jpg" alt="" class="align-middle" style="width:298px;height:22px;">

 for the Laplace distribution, whereas  

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m28.jpg" alt="" class="align-middle" style="width:298px;height:22px;">

 for the Gaussian distribution. The large <i>R</i> values correspond to 

periods of rapid growth and decline in the use of words during the 

crucial “infant” and “adolescent” lifetime phases. In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f6">Fig. 6(b)</a> we also show that the growth rate distribution <i>P</i>(<i>r′</i>) for the relatively common words comprising dataset (ii) is also well-described by the Laplace distribution.</p>

<p>For hierarchical systems consisting of units each with complex internal structure<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref32" title="Amaral, L. A. N. et al. Power Law Scaling for a System of Interacting Units with Complex Internal Structure. Phys. Rev. Lett. 80, 1385–1388 (1998)." id="ref-link-51">32</a></sup>

 (e.g. a given country consists of industries, each of which consists of

 companies, each of which consists of internal subunits), a non-trivial 

scaling relation between the standard deviation of growth rates <i>σ</i>(<i>r</i>|<i>S</i>) and the system size <i>S</i> has the form </p><p>      

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m5.jpg" alt="" class="align-middle" style="width:290px;height:24px;">

</p><p>The theoretical prediction in<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref32" title="Amaral, L. A. N. et al. Power Law Scaling for a System of Interacting Units with Complex Internal Structure. Phys. Rev. Lett. 80, 1385–1388 (1998)." id="ref-link-52">32</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref33" title="Riccaboni, M. et al. The size variance relationship of business firm growth rates. Proc. Natl. Acad. Sci. 105, 19595–19600 (2008)." id="ref-link-53">33</a></sup> that <i>β</i> ∈ [0, 1/2] has been verified for several economic systems, with empirical <i>β</i> values typically in the range 0.1 < <i>β</i> < 0.3<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref33" title="Riccaboni, M. et al. The size variance relationship of business firm growth rates. Proc. Natl. Acad. Sci. 105, 19595–19600 (2008)." id="ref-link-54">33</a></sup>.</p>

<p>Since

 different words have varying lifetime trajectories as well as varying 

relative utilities, we now quantify how the standard deviation <i>σ</i>(<i>r</i>|<i>S<sub>i</sub></i>) of growth rates <i>r</i> depends on the cumulative word frequency </p><p>       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m6.jpg" alt="" class="align-middle" style="width:290px;height:49px;">

</p><p>of each word. We choose this definition for proxy of “word size” 

since a writer can learn and recall a given word through any of its 

historical uses. Hence, <i>S<sub>i</sub></i> is also proportional to the number of books in which word <i>i</i>

 appears. This is significantly different than the assumptions of 

replication null models (e.g. the Moran process) which use the 

concurrent frequency <i>f<sub>i</sub></i>(<i>t</i>) as the sole factor determining the likelihood of future replication<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref10" title="Nowak, M. A. Evolutionary Dynamics: exploring the equations of life (BelknapHarvard, Cambridge MA, 2006)." id="ref-link-55">10</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref18" title="Blythe, R. A. Neutral evolution: a null model for language dynamics. To appear in ACS Advances in Complex Systems." id="ref-link-56">18</a></sup>.</p>

<p>We estimate Eq. (5) by grouping words according to <i>S<sub>i</sub></i> and then calculating the growth rate standard deviation <i>σ</i>(<i>r</i>|<i>S<sub>i</sub></i>) for each group. <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f7">Fig. 7(b)</a> shows scaling behavior consistent with Eq. (5) for large <i>S<sub>i</sub></i>, with <i>β</i> ≈ 0.10 – 0.21 depending on the corpus. A positive <i>β</i>

 value means that words with larger cumulative word frequency have 

smaller annual growth rate fluctuations. We conjecture that this 

statistical pattern emerges from the hierarchical organization of 

written language<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref12" title="Sigman, M. & Cecchi, G. A. Global organization of the Wordnet lexicon. Proc. Natl. Acad. Sci. 99, 1742–1747 (2002)." id="ref-link-57">12</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref13" title="Steyvers, M. & Tenenbaum, J. B. The large-scale structure of semantic networks: statistical analyses and a model of semantic growth. Cogn. Sci. 29 41–78 (2005)." id="ref-link-58">13</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref14" title="Alvarez-Lacalle, E., Dorow, B., Eckmann, J.-P. & Moses, E. Hierarchical structures induce long-range dynamical correlations in written texts. Proc. Natl. Acad. Sci. 103, 7956–7961 (2006)." id="ref-link-59">14</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref15" title="Montemurro, M. A. & Pury, P. A. Long-range fractal correlations in literary corpora. Fractals 10, 451–461 (2002)." id="ref-link-60">15</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref16" title="Corral, A., Ferrer i Cancho, R. & Diaz-Guilera, A. Universal complex structures in written language. e-print, arXiv:0901.2924v1 (2009)." id="ref-link-61">16</a></sup> and the social properties of the speakers who use the words<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref8" title="Bernhardsson, S., Correa da Rocha, L. E. & Minnhagen, P. The meta book and size-dependent properties of written language. New J. of Physics 11, 123015 (2009)." id="ref-link-62">8</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref17" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Beyond word frequency: bursts, lulls, and scaling in the temporal distributions of words. PLoS ONE 4, e7678 (2009)." id="ref-link-63">17</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-64">34</a></sup>. As such, we calculate <i>β</i> values that are consistent with nontrivial correlations in word use, likely related to the basic fact that books are topical<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref3" title="Serrano, M. Á., Flammini, A. & Menczer, F. Modeling Statistical Properties of Written Text. PLoS ONE 4 (4), e5372 (2009)." id="ref-link-65">3</a></sup> and that book topics are correlated with cultural trends.</p>

        <div class="figure cleared" id="f7">

                                        <span class="legend">

                                Figure 7: Scaling in the growth rate fluctuations of words.                     </span>

                        <div class="figure-content box">

                                                                                                                                                                                                                                                        <a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F7.html" class="fig-link">        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images_article/srep00313-f7.jpg" alt="Scaling in the growth rate fluctuations of words." class="fig" style="width:600px;height:784px;">

</a>

                                <div class="description"><p>We show the dependence of growth rates on the cumulative word frequency       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m38.jpg" alt="" class="align-middle" style="width:101px;height:20px;">

 using words satisfy the criteria <i>T<sub>i</sub></i> ≥ 10 years. We verify similar results for threshold values <i>T<sub>c</sub></i> = 50, 100, and 200 years. (a) Average growth rate 〈<i>r</i>〉 saturates at relatively constant values for large <i>S</i>. (b) Scaling in the standard deviation of growth rates <i>σ</i>(<i>r</i>|<i>S</i>) ∼ <i>S<sup>–β</sup></i> for words with large <i>S</i>.

 This scaling relation is also observed for the growth rates of large 

economic institutions, ranging in size from companies to entire 

countries<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref31" title="Canning, D. et al. Scaling the volatility of gdp growth rates. Economic Letters 60, 335–341 (1998)." id="ref-link-5">31</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref33" title="Riccaboni, M. et al. The size variance relationship of business firm growth rates. Proc. Natl. Acad. Sci. 105, 19595–19600 (2008)." id="ref-link-6">33</a></sup>. Here this size-variance relation corresponds to scaling exponent values 0.10 < <i>β</i>

 < 0.21, which are related to the non-trivial bursting patterns and 

non-trivial correlation patterns in literature topicality as indicated 

by the quantitative relation to the Hurst exponent, <i>H</i> = 1 – <i>β</i> shown in<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-7">35</a></sup>. We calculate <i>β<sub>Eng.</sub></i> ≈ 0.16 ± 0.01, <i>β<sub>Eng.fict</sub></i> ≈ 0.21 ± 0.01, <i>β<sub>Spa.</sub></i> ≈ 0.10 ± 0.01 and <i>β<sub>Heb.</sub></i> ≈ 0.17 ± 0.01.</p>

</div>

                                        <ul class="resources"><li class="full"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_F7.html">Full size image (477 KB)</a></li></ul>

                        </div>

                                                        <ul class="figure-nav"><li class="prev up-arrow"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f6" title="Common leptokurtic growth distribution for new words and common words.">Previous figure</a></li>

<li class="index"><a href="http://www.nature.com/srep/2012/120315/srep00313/fig_tab/srep00313_ft.html">Figures index</a></li></ul>

        </div>

        <h2>Quantifying the long-term cultural memory</h2><p>Recent theoretical work<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-66">35</a></sup> shows that there is a fundamental relation between the size-variance exponent <i>β</i> and the Hurst exponent <i>H</i> quantifying the auto-correlations in a stochastic time series. The novel relation <i>H</i> = 1 − <i>β</i>

 indicates that the temporal long-term persistence is intrinsically 

related to the capability of the underlying mechanism to absorb 

stochastic shocks. Hence, positive correlations (<i>H</i> > 1/2) are predicted for non-trivial <i>β</i> values (i.e. 0 ≤ <i>β</i> ≤ 0.5). Note that the Gibrat proportional growth model predicts <i>β</i> = 0 and that a Yule-Simon urn model predicts <i>β</i> = 0.5<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref33" title="Riccaboni, M. et al. The size variance relationship of business firm growth rates. Proc. Natl. Acad. Sci. 105, 19595–19600 (2008)." id="ref-link-67">33</a></sup>. Thus, <i>f<sub>i</sub></i>(<i>τ</i>) belonging to words with large <i>S<sub>i</sub></i> are predicted to show significant positive correlations, <i>H<sub>i</sub></i> > 1/2.</p>

<p>To test this connection between memory correlations and the size-variance scaling, we calculate the Hurst exponent <i>H<sub>i</sub></i>

 for each time series belonging to the more relatively common words 

analyzed in dataset (ii) using detrended fluctuation analysis (DFA)<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-68">35</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref36" title="Peng, C. K. et al. Mosaic organization of DNA nucleotides. Phys. Rev. E 49, 1685 – 1689 (1994)." id="ref-link-69">36</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref37" title="Hu, K. et al. Effect of Trends on Detrended Fluctuation Analysis. Phys. Rev. E 64, 011114 (2001)." id="ref-link-70">37</a></sup>. We plot in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S2</a> the relative use time series <i>f<sub>i</sub></i>(<i>t</i>)

 for the words “polyphony,” “Americanism,” “Repatriation,” and 

“Antibiotics” along with DFA curves from which we calculate each Hi. <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S2(b)</a> shows that the Hi values for these four words are all significantly greater than Hr = 0.5, which is the expected Hurst exponent for a stochastic time series with no temporal correlations. In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S3</a> we plot the distribution of Hi values for the English fiction corpus and the Spanish corpus. Our results are consistent with the theoretical prediction 〈H〉 = 1 − β established in<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-71">35</a> relating the variance of growth rates to the underlying temporal correlations in each fi(t).

 Hence, we show that the language evolution is fundamentally related to 

the complex features of cultural memory, i.e. the dynamics of cultural 

topic formation<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref17" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Beyond word frequency: bursts, lulls, and scaling in the temporal distributions of words. PLoS ONE 4, e7678 (2009)." id="ref-link-72">17</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref25" title="Klimek, P., Bayer, W. & Thurner, S. The blogosphere as an excitable social medium: Richter's and Omori's Law in media coverage. Physica A 390, 3870–3875 (2011)." id="ref-link-73">25</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref26" title="Sano, Y., Yamada, K., Watanabe, H., Takayasu, H. & Takayasu, M. Empirical analysis of collective human behavior for extraordinary events in blogosphere. (preprint) arXiv:1107.4730 [physics.soc-ph]." id="ref-link-74">26</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-75">34</a></sup> and bursting<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref38" title="Barabási, A. L. The origin of bursts and heavy tails in human dynamics. Nature 435, 207–211 (2005)." id="ref-link-76">38</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref39" title="Crane, R. & Sornette, D. Robust dynamic classes revealed by measuring the response function of a social system. Proc. Natl. Acad. Sci. 105, 15649–15653 (2008)." id="ref-link-77">39</a></sup>.</p>

                                                        </div>

                        </div>

                                                                        <div class="section   expanded" id="discussion">

                                                        <h1 class="section-heading toggle"><a title="Discussion">Discussion</a></h1>

                                                <div class="content">

                                                                                                                                                <ul class="section-nav cleared"><li class="first"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#introduction">Introduction</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#results">Results</a></li>

<li class="current"><span>Discussion</span></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#methods">Methods</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#references">References</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#acknowledgments">Acknowledgements</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#author-information">Author information</a></li>

<li class="last"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary information</a></li></ul>

                                                                <p>With the digitization of written language, cultural trend 

analysis based around methods to extract quantitative patterns from word

 counts is an emerging interdisciplinary field that has the potential to

 provide novel insights into human sociology<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref3" title="Serrano, M. Á., Flammini, A. & Menczer, F. Modeling Statistical Properties of Written Text. PLoS ONE 4 (4), e5372 (2009)." id="ref-link-78">3</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref17" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Beyond word frequency: bursts, lulls, and scaling in the temporal distributions of words. PLoS ONE 4, e7678 (2009)." id="ref-link-79">17</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref25" title="Klimek, P., Bayer, W. & Thurner, S. The blogosphere as an excitable social medium: Richter's and Omori's Law in media coverage. Physica A 390, 3870–3875 (2011)." id="ref-link-80">25</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref26" title="Sano, Y., Yamada, K., Watanabe, H., Takayasu, H. & Takayasu, M. Empirical analysis of collective human behavior for extraordinary events in blogosphere. (preprint) arXiv:1107.4730 [physics.soc-ph]." id="ref-link-81">26</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-82">34</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref40" title="Golder, S. A. & Macy, M. W. Diurnal and Seasonal Mood Vary with Work, Sleep, and Daylength Across Diverse Cultures. Science 333, 1878–1881 (2011)." id="ref-link-83">40</a></sup>.

 Nevertheless, the amount of metadata extractable from daily internet 

feeds is dizzying. This is highlighted by the practical issue of 

defining objective significance levels to filter out the noise in the 

data deluge. For example, online blogs can be vaguely categorized 

according to the coarse hierarchical schema: “obscure blogs”, “more 

popular blogs”, “pop columns”, and “mainstream news coverage.” In 

contrast, there are well-defined entry requirements for published books 

and magazines, which must meet editorial standards and conform to the 

principles of market supply and demand. However, until recently, the 

vast information captured in the annals of written language was largely 

inaccessible.</p><p>Despite the careful guard of libraries around the 

world, which house the written corpora for almost every written 

language, little is known about the aggregate dynamics of word evolution

 in written history. Inspired by research on the growth patterns 

displayed by a wide range of competition driven systems - from countries

 and business firms<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref28" title="Amaral, L. A. N. et al. Scaling Behavior in Economics: I. Empirical Results for Company Growth. J. Phys. I France 7, 621–633 (1997)." id="ref-link-84">28</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref29" title="Fu, D. et al. The growth of business firms: Theoretical framework and empirical evidence. Proc. Natl. Acad. Sci. 102, 18801–18806 (2005)." id="ref-link-85">29</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref30" title="Stanley, M. H. R. et al. Scaling behaviour in the growth of companies. Nature 379, 804–806 (1996)." id="ref-link-86">30</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref31" title="Canning, D. et al. Scaling the volatility of gdp growth rates. Economic Letters 60, 335–341 (1998)." id="ref-link-87">31</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref32" title="Amaral, L. A. N. et al. Power Law Scaling for a System of Interacting Units with Complex Internal Structure. Phys. Rev. Lett. 80, 1385–1388 (1998)." id="ref-link-88">32</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref33" title="Riccaboni, M. et al. The size variance relationship of business firm growth rates. Proc. Natl. Acad. Sci. 105, 19595–19600 (2008)." id="ref-link-89">33</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref41" title="Buldyrev, S. V. et al. The growth of business firms: Facts and theory. J. Eur. Econ. Assoc. 5, 574–584 (2007)." id="ref-link-90">41</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref42" title="Podobnik, B. et al. Quantitative relations between risk, return, and firm size. EPL 85, 50003 (2009)." id="ref-link-91">42</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref43" title="Liu, Y. et al. The Statistical Properties of the Volatility of Price Fluctuations. Phys. Rev. E 60, 1390–1400 (1999)." id="ref-link-92">43</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref44" title="Lee, Y. et al. Universal Features in the Growth Dynamics of Complex Organizations. Phys. Rev. Lett. 81, 3275–3278 (1998)." id="ref-link-93">44</a></sup> to religious activities<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref45" title="Picoli Jr, S. & Mendes, R. S. Universal features in the growth dynamics of religious activities. Phys. Rev. E 77, 036105 (2008)." id="ref-link-94">45</a></sup>, universities<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref46" title="Plerou, V. et al. Similarities between the growth dynamics of university research and of competitive economic activities. Nature 400, 433–437 (1999)." id="ref-link-95">46</a></sup>, scientific journals<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref47" title="Picoli Jr, S. et al. Scaling behavior in the dynamics of citations to scientific journals. Europhys. Lett. 75, 673–679 (2006)." id="ref-link-96">47</a></sup>, careers<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref48" title="Petersen, A. M. Riccaboni, M., Stanley, H. E. Pammolli, F. Persistence and Uncertainty in the Academic Career. Proc. Natl. Acad. Sci. USA (2012) doi: 10.1073/pnas.1121429109." id="ref-link-97">48</a></sup> and bird populations<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref49" title="Keitt, T. H. & Stanley, H. E. Dynamics of North American breeding bird populations. Nature. 393, 257–260 (1998)." id="ref-link-98">49</a></sup> - here we extend the concepts and methods to word use dynamics.</p>

<p>This

 study provides empirical evidence that words are competing actors in a 

system of finite resources. Just as business firms compete for market 

share, words demonstrate the same growth statistics because they are 

competing for the use of the writer/speaker and for the attention of the

 corresponding reader/listener<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref18" title="Blythe, R. A. Neutral evolution: a null model for language dynamics. To appear in ACS Advances in Complex Systems." id="ref-link-99">18</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref19" title="Loreto, V., Baronchelli, A., Mukherjee, A., Puglisi, A. & Tria, F. Statistical physics of language dynamics. J. Stat. Mech. 2011, P04006 (2011)." id="ref-link-100">19</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref20" title="Baronchelli, A., Loreto, V. & Steels, L. In-depth analysis of the Naming Game dynamics: the homogenous mixing case. Int. J. of Mod. Phys. C 19, 785–812 (2008)." id="ref-link-101">20</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref21" title="Puglisi, A., Baronchelli, A. & Loreto, V. Cultural route to the emergence of linguistic categories. Proc. Natl. Acad. Sci. 105, 7936–7940 (2008)." id="ref-link-102">21</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref27" title="Solé, R. V., Corominas-Murtra, B. & Fortuny, J. Diversity, competition, extinction: the ecophysics of language change. J. R. Soc. Interface 7, 1647–1664 (2010)." id="ref-link-103">27</a></sup>.

 A prime example of fitness-mediated evolutionary competition is the 

case of irregular and regular verb use in English. By analyzing the 

regularization rate of irregular verbs through the history of the 

English language, Lieberman et al.<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref50" title="Lieberman, E. et al. Quantifying the evolutionary dynamics of language. Nature 449, 713–716 (2007)." id="ref-link-104">50</a></sup>

 show that the irregular verbs that are used more frequently are less 

likely to be overcome by their regular verb counterparts. Specifically, 

they find that the irregular verb death rate scales as the inverse 

square root of the word's relative use. A study of word diffusion across

 Indo-European languages shows similar frequency-dependence of word 

replacement rates<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref51" title="Pagel, M., Atkinson, Q. D. & Meade, A. Frequency of word-use predicts rates of lexical evolution throughout Indo-European history. Nature 449, 717–721 (2007)." id="ref-link-105">51</a></sup>.</p>

<p>We

 document the case example of X-ray, which shows how categorically 

related words can compete in a zero-sum game. Moreover, this competition

 does not occur in a vacuum. Instead, the dynamics are significantly 

related to diffusion and technology. Lexical diffusion occurs at many 

scales, both within relatively small groups and across nations<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref27" title="Solé, R. V., Corominas-Murtra, B. & Fortuny, J. Diversity, competition, extinction: the ecophysics of language change. J. R. Soc. Interface 7, 1647–1664 (2010)." id="ref-link-106">27</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-107">34</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref51" title="Pagel, M., Atkinson, Q. D. & Meade, A. Frequency of word-use predicts rates of lexical evolution throughout Indo-European history. Nature 449, 717–721 (2007)." id="ref-link-108">51</a></sup>.

 The technological forces underlying word selection have changed 

significantly over the last 20 years. With the advent of automatic 

spell-checkers in the digital era, words recognized by spell-checkers 

receive a significant boost in their “reproductive fitness” at the 

expense of their “misspelled” or unstandardized counterparts.</p><p>We 

find that the dynamics are influenced by historical context, trends in 

global communication, and the means for standardizing that 

communication. Analogous to recessions and booms in a global economy, 

the marketplace for words waxes and wanes with a global pulse as 

historical events unfold. And in analogy to financial regulations meant 

to limit risk and market domination, standardization technologies such 

as the dictionary and spell checkers serve as powerful arbiters in 

determining the characteristic properties of word evolution. Context 

matters, and so we anticipate that niches<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-109">34</a></sup>

 in various language ecosystems (ranging from spoken word to 

professionally published documents to various online forms such as 

chats, tweets and blogs) have heterogenous selection laws that may favor

 a given word in one arena but not another. Moreover, the birth and 

death rate of words and their close associates (misspellings, synonyms, 

abbreviations) depend on factors endogenous to the language domain such 

as correlations in word use to other partner words and polysemous 

contexts<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref12" title="Sigman, M. & Cecchi, G. A. Global organization of the Wordnet lexicon. Proc. Natl. Acad. Sci. 99, 1742–1747 (2002)." id="ref-link-110">12</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref13" title="Steyvers, M. & Tenenbaum, J. B. The large-scale structure of semantic networks: statistical analyses and a model of semantic growth. Cogn. Sci. 29 41–78 (2005)." id="ref-link-111">13</a></sup> as well as exogenous socio-technological factors and demographic aspects of the writers, such as age<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref13" title="Steyvers, M. & Tenenbaum, J. B. The large-scale structure of semantic networks: statistical analyses and a model of semantic growth. Cogn. Sci. 29 41–78 (2005)." id="ref-link-112">13</a></sup> and social niche<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref34" title="Altmann, E. G., Pierrehumbert, J. B. & Motter, A. E. Niche as a determinant of word fate in online groups. PLoS ONE 6, e19009 (2011)." id="ref-link-113">34</a></sup>.</p>

<p>We

 find a pronounced peak in the fluctuations of word growth rates when a 

word has reached approximately 30–50 years of age (see <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f5">Fig. 5</a>).

 We posit that this corresponds to the timescale for a word to be 

accepted into a standardized dictionary which inducts words that are 

used above a threshold frequency, consistent with the first-passage 

times to fc in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f5">Fig. 5(b)</a>. This is further corroborated by the characteristic baseline frequencies associated with standardized dictionaries<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref11" title="Michel, J.-B. et al. Quantitative Analysis of Culture Using Millions of Digitized Books. Science 331, 176–182 (2011)." id="ref-link-114">11</a>.

 Another important timescale in evolutionary systems is the reproduction

 age of the interacting gene or meme host. Interestingly, a 30–50 year 

timescale is roughly equal to the characteristic human generational time

 scale. The prominent role of new generation of speakers in language 

evolution has precedent in linguistics. For example, it has been shown 

that primitive pidgin languages, which are little more than crude mixes 

of parent languages, spontaneously acquire the full range of complex 

syntax and grammar once they are learned by the children of a community 

as a native language. It is at this point a pidgin becomes a creole, in a

 process referred to as nativization<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref22" title="Nowak, M. A., Komarova, N. L. & Niyogi, P. Computational and evolutionary aspects of language. Nature 417, 611–617 (2002)." id="ref-link-115">22</a></sup>.</p>

<p>Nativization

 also had a prominent effect in the revival of the Hebrew language, a 

significant historical event which also manifests prominently in our 

statistical analysis. The birth rate of new words in the Hebrew language

 jumped by a factor of 5 in just a few short years around 1920 following

 the Balfour Declaration of 1917 and the Second Aliyah immigration to 

Israel. The combination of new Hebrew-speaking communities and political

 endorsement of a national homeland for the Jewish people in the 

Palestine Mandate had two resounding affects: (i) the Hebrew language, 

hitherto used largely only for (religious) writing, gained official 

status as a modern spoken language, and (ii) a centralized culture 

emerged from this national community. The unique history of the Hebrew 

language in concert with the <i>Google Inc.</i> books data thus provide 

an unprecedented opportunity to quantitatively study the emerging 

dynamics of what is, in some regards, a new language.</p><p>The impact 

of historical context on language dynamics is not limited to emerging 

languages, but extends to languages that have been active and evolving 

continuously for a thousand years. We find that historical episodes can 

drastically perturb the properties of existing languages over large time

 scales. Moreover, recent studies show evidence for short-timescale 

cascading behavior in blog trends<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref25" title="Klimek, P., Bayer, W. & Thurner, S. The blogosphere as an excitable social medium: Richter's and Omori's Law in media coverage. Physica A 390, 3870–3875 (2011)." id="ref-link-116">25</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref26" title="Sano, Y., Yamada, K., Watanabe, H., Takayasu, H. & Takayasu, M. Empirical analysis of collective human behavior for extraordinary events in blogosphere. (preprint) arXiv:1107.4730 [physics.soc-ph]." id="ref-link-117">26</a></sup>,

 analogous to the aftershocks following earthquakes and the cascades of 

market volatility following financial news announcements<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref52" title="Petersen, A. M., Wang, F., Havlin, S. & Stanley, H. E. Quantitative law describing market dynamics before and after interest-rate change. Phys. Rev. E 81, 066121 (2010)." id="ref-link-118">52</a></sup>.

 The nontrivial autocorrelations and the leptokurtic growth 

distributions demonstrate the significance of exogenous shocks which can

 result in growth rates that significantly exceeding the frequencies 

that one would expect from non-interacting proportional growth models<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref29" title="Fu, D. et al. The growth of business firms: Theoretical framework and empirical evidence. Proc. Natl. Acad. Sci. 102, 18801–18806 (2005)." id="ref-link-119">29</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref30" title="Stanley, M. H. R. et al. Scaling behaviour in the growth of companies. Nature 379, 804–806 (1996)." id="ref-link-120">30</a></sup>.</p>

<p>A

 large number of the world's ethnic groups are separated along 

linguistic lines. A language barrier can isolate its speakers by serving

 as a screen to external events, which may further slow the rate of 

language evolution by stalling endogenous change. Nevertheless, we find 

that the distribution of word growth rates significantly broadens during

 times of large scale conflict, revealed through the sudden increases in

σ(t) for the English, French, German and Russian corpora during World War II<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref24" title="Petersen, A. M., Tenenbaum, J., Havlin, S. & Stanley, H. E. In: preparation, see the SI materials for the e-print: arXiv:1107.3707 Version 1." id="ref-link-121">24</a>.

 This can be understood as manifesting from the unification of public 

consciousness that creates fertile breeding ground for new topics and 

ideas. During war, people may be more likely to have their attention 

drawn to global issues. Remarkably, the pronounced change during WWII 

was not observed for the Spanish corpus, documenting the relatively 

small roles that Spain and Latin American countries played in the war.</p>

                                                        </div>

                        </div>

                                                                        <div class="section   expanded" id="methods">

                                                        <h1 class="section-heading toggle"><a title="Methods">Methods</a></h1>

                                                <div class="content">

                                                                                                                                                <ul class="section-nav cleared"><li class="first"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#introduction">Introduction</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#results">Results</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#discussion">Discussion</a></li><li class="current"><span>Methods</span></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#references">References</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#acknowledgments">Acknowledgements</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#author-information">Author information</a></li>

<li class="last"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary information</a></li></ul>

                                                                <h2>Quantifying the word use trajectory</h2><p>Once a word is 

introduced into a language, what are the characteristic growth patterns?

 To address this question, we first account for important variations in 

words, as the growth dynamics may depend on the frequency of the word as

 well as social and technological aspects of the time-period during 

which the word was born.</p><p>Here we define the age or trajectory year <i>τ</i> = <i>t</i> – <i>t</i><sub>0,<i>i</i></sub>

 as the number of years after the word's first appearance in the 

database. In order to compare trajectories across time and across 

varying word frequency, we normalize the trajectories for each word <i>i</i> by the average use </p><p> 

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m7.jpg" alt="" class="align-middle" style="width:291px;height:44px;">

</p><p>over the lifetime <i>T<sub>i</sub></i> ≡ <i>t<sub>f,i</sub></i> – <i>t</i><sub>0,<i>i</i></sub> + 1 of the word, leading to the normalized trajectory, </p><p>   

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m8.jpg" alt="" class="align-middle" style="width:361px;height:17px;">

</p><p>By analogy, in order to compare various growth trajectories, we normalize the relative growth rate trajectory        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m29.jpg" alt="" class="align-middle" style="width:24px;height:15px;">

 by the standard deviation over the entire lifetime, </p><p>        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m9.jpg" alt="" class="align-middle" style="width:328px;height:52px;">

</p><p>Hence, the normalized relative growth trajectory is </p><p>      

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m10.jpg" alt="" class="align-middle" style="width:364px;height:16px;">

</p><p><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Figs. S4–S7</a> show the weighted averages 〈<i>f </i>′(<i>τ</i>|<i>T<sub>c</sub></i>)〉 and 〈<i>r</i>′(<i>τ</i> |<i>T<sub>c</sub></i>)〉 and the weighted standard deviations <i>σ</i>[<i>f </i>′(<i>τ</i>|<i>T<sub>c</sub></i>)] and <i>σ</i>[<i>r</i>′(<i>τ</i>|<i>T<sub>c</sub></i>)] calculated using normalized trajectories for new words in each corpus. We compute      

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m30.jpg" alt="" class="align-middle" style="width:25px;height:15px;">

 and    

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m31.jpg" alt="" class="align-middle" style="width:30px;height:15px;">

 for each trajectory year <i>τ</i> using all <i>N<sub>t</sub></i> trajectories (<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Table S1</a>) that satisfy the criteria <i>T<sub>i</sub></i> ≥ <i>T<sub>c</sub></i> and <i>t<sub>i</sub></i><sub>,0</sub> ≥ 1800. We compute the weighted average and the weighted standard deviation using 〈<i>f<sub>i</sub></i>〉 as the weight value for word <i>i</i>, so that   

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m32.jpg" alt="" class="align-middle" style="width:25px;height:15px;">

 and    

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m33.jpg" alt="" class="align-middle" style="width:31px;height:16px;">

 reflect the lifetime trajectories of the more common words that are “new” to each corpus.</p><p>Since there is an intrinsic word maturity <i>σ</i>[<i>r</i>′(<i>τ</i>|<i>T<sub>c</sub></i>)] that is not accounted for in the quantity         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m34.jpg" alt="" class="align-middle" style="width:29px;height:16px;">

, we further define the detrended relative growth </p><p>   

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m11.jpg" alt="" class="align-middle" style="width:302px;height:18px;">

</p><p>which allows us to compare the growth factors for new words at 

various life stages. The result of this normalization is to rescale the 

standard deviations for a given trajectory year <i>τ</i> to unity for all values of        

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m35.jpg" alt="" class="align-middle" style="width:29px;height:15px;">

.</p><h2>Detrended fluctuation analysis of individual <i>f<sub>i</sub></i>(<i>t</i>)</h2><p>Here we outline the DFA method for quantifying temporal autocorrelations in a general time series <i>f<sub>i</sub></i>(<i>t</i>)

 that may have underlying trends, and compare the output with the 

results expected from a time series corresponding to a 1-dimensional 

random walk.</p><p>In a time interval <i>δt</i>, a time series <i>Y</i> (<i>t</i>) deviates from the previous value <i>Y</i> (<i>t</i> – <i>δt</i>) by an amount <i>δY</i> (<i>t</i>) ≡ <i>Y</i> (<i>t</i>) – <i>Y</i> (<i>t</i> – <i>δt</i>).

 A powerful result of the central limit theorem, equivalent to Fick's 

law of diffusion in 1 dimension, is that if the displacements are 

independent (uncorrelated corresponding to a simple Markov process), 

then the total displacement Δ<i>Y</i> (<i>t</i>) = <i>Y</i> (<i>t</i>) – <i>Y</i> (0) from the initial location <i>Y</i> (0) ≡ 0 scales according to the total time <i>t</i> as </p><p>    

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m12.jpg" alt="" class="align-middle" style="width:299px;height:18px;">

</p><p>However, if there are long-term correlations in the time series <i>Y</i> (<i>t</i>), then the relation is generalized to </p><p> 

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m13.jpg" alt="" class="align-middle" style="width:275px;height:17px;">

</p><p>where <i>H</i> is the Hurst exponent which corresponds to positive correlations for <i>H</i> > 1/2 and negative correlations for <i>H</i> < 1/2.</p><p>Since there may be underlying social, political, and technological trends that influence each time series <i>f<sub>i</sub></i>(<i>t</i>), we use the detrended fluctuation analysis (DFA) method<sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-122">35</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref36" title="Peng, C. K. et al. Mosaic organization of DNA nucleotides. Phys. Rev. E 49, 1685 – 1689 (1994)." id="ref-link-123">36</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref37" title="Hu, K. et al. Effect of Trends on Detrended Fluctuation Analysis. Phys. Rev. E 64, 011114 (2001)." id="ref-link-124">37</a></sup> to analyze the residual fluctuations Δ<i>f<sub>i</sub></i>(<i>t</i>) after we remove the local trends. The method detrends the time series using time windows of varying length Δ<i>t</i>. The time series       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m36.jpg" alt="" class="align-middle" style="width:44px;height:18px;">

 corresponds to the locally detrended time series using window size Δ<i>t</i>. We calculate the Hurst exponent <i>H</i> using the relation between the root-mean-square displacement <i>F</i>(Δ<i>t</i>) and the window size Δ<i>t</i><sup><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref35" title="Rybski, D. et al. Scaling laws of human interaction activity. Proc. Natl. Acad. Sci. USA 106, 12640–12645 (2009)." id="ref-link-125">35</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref36" title="Peng, C. K. et al. Mosaic organization of DNA nucleotides. Phys. Rev. E 49, 1685 – 1689 (1994)." id="ref-link-126">36</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref37" title="Hu, K. et al. Effect of Trends on Detrended Fluctuation Analysis. Phys. Rev. E 64, 011114 (2001)." id="ref-link-127">37</a></sup>, </p>

<p>       

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m14.jpg" alt="" class="align-middle" style="width:331px;height:36px;">

</p><p>Here         

        <img src="http://www.nature.com/srep/2012/120315/srep00313/images/srep00313-m37.jpg" alt="" class="align-middle" style="width:51px;height:18px;">

 is the local deviation from the average trend, analogous to Δ<i>Y</i> (<i>t</i>) defined above.</p><p><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S2</a> shows 4 different <i>f<sub>i</sub></i>(<i>t</i>) in panel (a), and plots the corresponding <i>F<sub>i</sub></i>(Δ<i>t</i>) in panel (b). The calculated <i>H<sub>i</sub></i> values for these 4 words are all significantly greater than the uncorrelated <i>H</i>

 = 0.5 value, indicating strong positive long-term correlations in the 

use of these words, even after we have removed the local trends using 

DFA. In these example cases, the trends are related to political events 

such as war in the cases of “Americanism” and “Repatriation”, or the 

bursting associated with new technology in the case of “Antibiotics,” or

 new musical trends illustrated in the case of “polyphony.”</p><p>In <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Fig. S3</a> we plot the pdf of <i>H<sub>i</sub></i> values calculated for the relatively common words analyzed in <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#f6">Fig. 6(b)</a>. We also plot the pdf of <i>H<sub>i</sub></i> values calculated from shuffled time series, and these values are centered around 〈<i>H</i>〉

 ≈ 0.5 as expected from the removal of the intrinsic temporal ordering. 

Thus, using this method, we are able to quantify the social memory 

characterized by the Hurst exponent which is related to the bursting 

properties of linguistic trends, and in general, to bursting phenomena 

in human dynamics<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref25" title="Klimek, P., Bayer, W. & Thurner, S. The blogosphere as an excitable social medium: Richter's and Omori's Law in media coverage. Physica A 390, 3870–3875 (2011)." id="ref-link-128">25</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref26" title="Sano, Y., Yamada, K., Watanabe, H., Takayasu, H. & Takayasu, M. Empirical analysis of collective human behavior for extraordinary events in blogosphere. (preprint) arXiv:1107.4730 [physics.soc-ph]." id="ref-link-129">26</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref38" title="Barabási, A. L. The origin of bursts and heavy tails in human dynamics. Nature 435, 207–211 (2005)." id="ref-link-130">38</a>, <a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref39" title="Crane, R. & Sornette, D. Robust dynamic classes revealed by measuring the response function of a social system. Proc. Natl. Acad. Sci. 105, 15649–15653 (2008)." id="ref-link-131">39</a>.

 Recent analysis of Google words data compares the Hurst exponents of 

words describing social phenomena to the Hurst exponents of words 

describing natural phenomena(<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#ref54" title="Gao, J., Hu, H., Mao, X. & Perc, M. Culturomics meets random fractal theory: insights into long-range correlations of social and natural phenomena over the past two centuries. J. R. Soc. Interface (2001).doi: 10.1098/rsif.2011.0846." id="ref-link-132">54</a>).

 Interestingly, Gao et al. find that these 2 word classes are described 

by distinct underlying processes, as indicated by the corresponding H<sub>i</sub> values.</p>

                                                                </div>

                        </div>

                                                                        <div class="section   collapsed" id="references">

                                                        <h1 class="section-heading toggle"><a title="References">References</a></h1>

                        </div>

                                                                        <div class="section   collapsed" id="acknowledgments">

                                                        <h1 class="section-heading toggle"><a title="Acknowledgements">Acknowledgements</a></h1>

                        </div>

                                                        <h1 class="section-heading toggle"><a title="Author information">Author information</a></h1>

                                                                                                                                                <ul class="section-nav cleared"><li class="first"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#introduction">Introduction</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#results">Results</a></li>

<li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#discussion">Discussion</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#methods">Methods</a></li><li>

<a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#references">References</a></li><li><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#acknowledgments">Acknowledgements</a></li>

<li class="current"><span>Author information</span></li><li class="last"><a href="http://www.nature.com/srep/2012/120315/srep00313/full/srep00313.html#supplementary-information">Supplementary information</a></li></ul>

        <div id="author-affiliations">

                <h2 id="affil-auth">Affiliations</h2>

                <ol class="affiliations"><li id="a1">

                                                <h3>Laboratory for the Analysis of Complex Economic Systems, IMT Lucca Institute for Advanced Studies, Lucca 55100, Italy</h3>

                                                                                                                                                                        <ul class="affiliation-authors"><li>Alexander M. Petersen</li></ul>

                                                                                                                                                </li><li id="a2">

                                                <h3>Center for Polymer Studies and Department of Physics, Boston University, Boston, Massachusetts 02215, USA</h3>

                                                                                                                                                                        <ul class="affiliation-authors"><li>Joel Tenenbaum &</li><li>H. Eugene Stanley</li></ul>

                                                                                                                                                </li><li id="a3">

                                                <h3>Minerva Center and Department of Physics, Bar-Ilan University, Ramat-Gan 52900, Israel</h3>

                                                                                                                                                                        <ul class="affiliation-authors"><li>Shlomo Havlin</li></ul>

                                                                                                                                                </li></ol>

        </div>

        <div id="author-contributions">

                <h2 id="contrib-auth">Contributions</h2>

                <p>A. M. P., J. T., S. H. & H. E. S., designed research, performed

 research, wrote, reviewed and approved the manuscript. A. M. P. and J. 

T. performed the numerical and statistical analysis of the data.</p>

        </div>

        <div id="competing-financial-interests">

                <h2 id="cfi">Competing financial interests</h2>

                                                                                                <p class="conflict">The authors declare no competing financial interests.</p>

                                                                                                        </div>

        <div id="corresponding-authors">

                <h2 id="corres-auth">Corresponding author</h2>

                <p class="correspondence">Correspondence to: </p>

                <ul class="correspondence"><li id="c1"><a href="http://www.nature.com/srep/foxtrot/svc/authoremailform?doi=10.1038/srep00313&file=/srep/2012/120315/srep00313/full/srep00313.html&title=Statistical+Laws+Governing+Fluctuations+in+Word+Use+from+Word+Birth+to+Word+Death&author=Alexander+M.+Petersen" class="contact">Alexander M. Petersen</a></li>

</ul>

        </div><br clear="all"><br>-- <br>=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+<br><br> Harold F. Schiffman<br><br>Professor Emeritus of <br> Dravidian Linguistics and Culture <br>Dept. of South Asia Studies                     <br>University of Pennsylvania<br>

Philadelphia, PA 19104-6305<br><br>Phone:  (215) 898-7475<br>Fax:  (215) 573-2138                                      <br><br>Email:  <a href="mailto:haroldfs@gmail.com">haroldfs@gmail.com</a><br><a href="http://ccat.sas.upenn.edu/~haroldfs/">http://ccat.sas.upenn.edu/~haroldfs/</a>    <br>

<br>-------------------------------------------------<br>