<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd">
  <identifier identifierType="DOI">10.18710/RULYMP</identifier>
  <creators>
    <creator>
      <creatorName nameType="Personal">Kang, Hui</creatorName>
      <givenName>Hui</givenName>
      <familyName>Kang</familyName>
      <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="https://orcid.org">https://orcid.org/0000-0002-5979-1658</nameIdentifier>
      <affiliation>Dalian University of Foreign Languages</affiliation>
    </creator>
    <creator>
      <creatorName nameType="Personal">Xu, Jiajin</creatorName>
      <givenName>Jiajin</givenName>
      <familyName>Xu</familyName>
      <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="https://orcid.org">https://orcid.org/0000-0003-3454-9352</nameIdentifier>
      <affiliation>Beijing Foreign Studies University</affiliation>
    </creator>
  </creators>
  <titles>
    <title>Replication data for: Salience-simplification strategy for markedness of causal subordinators: “because” and “since” in argumentative essays</title>
  </titles>
  <publisher>DataverseNO</publisher>
  <publicationYear>2022</publicationYear>
  <subjects>
    <subject>Arts and Humanities</subject>
    <subject>causal subordinators</subject>
    <subject>&amp;quot;because&amp;quot;</subject>
    <subject>&amp;quot;since&amp;quot;</subject>
    <subject>contextual features</subject>
    <subject>argumentative essays</subject>
    <subject>syntax</subject>
    <subject>English</subject>
  </subjects>
  <contributors>
    <contributor contributorType="Producer">
      <contributorName nameType="Organizational">Dalina University of Foreign Languages</contributorName>
    </contributor>
    <contributor contributorType="Distributor">
      <contributorName nameType="Personal">The Tromsø Repository of Language and Linguistics (TROLLing)</contributorName>
      <givenName>The</givenName>
      <familyName>Tromsø Repository of Language and Linguistics (TROLLing)</familyName>
    </contributor>
    <contributor contributorType="ContactPerson">
      <contributorName nameType="Personal">Kang, Hui</contributorName>
      <givenName>Hui</givenName>
      <familyName>Kang</familyName>
      <affiliation>Dalian University of Foreign Languages</affiliation>
    </contributor>
    <contributor contributorType="HostingInstitution">
      <contributorName nameType="Organizational">Software School/Intelligence Language Research Center</contributorName>
    </contributor>
    <contributor contributorType="ProjectMember">
      <contributorName nameType="Personal">Wang, Luojia</contributorName>
      <givenName>Luojia</givenName>
      <familyName>Wang</familyName>
    </contributor>
    <contributor contributorType="ProjectMember">
      <contributorName nameType="Personal">Zhang, Yaxin</contributorName>
      <givenName>Yaxin</givenName>
      <familyName>Zhang</familyName>
    </contributor>
    <contributor contributorType="ProjectMember">
      <contributorName nameType="Personal">Zhang, Xiaobo</contributorName>
      <givenName>Xiaobo</givenName>
      <familyName>Zhang</familyName>
    </contributor>
  </contributors>
  <dates>
    <date dateType="Created">2021-08-05</date>
    <date dateType="Submitted">2021-08-05</date>
    <date dateType="Available">2022-01-27</date>
    <date dateType="Updated">2023-09-28</date>
    <date dateType="Collected">2019-12-01/2021-05-01</date>
  </dates>
  <resourceType resourceTypeGeneral="Dataset">corpus data</resourceType>
  <relatedIdentifiers>
    <relatedIdentifier relationType="IsSupplementTo" relatedIdentifierType="DOI">10.1016/J.LINGUA.2022.103256</relatedIdentifier>
    <relatedIdentifier relationType="HasPart" relatedIdentifierType="DOI">10.18710/RULYMP/LNHNTU</relatedIdentifier>
    <relatedIdentifier relationType="HasPart" relatedIdentifierType="DOI">10.18710/RULYMP/8LS73Q</relatedIdentifier>
    <relatedIdentifier relationType="HasPart" relatedIdentifierType="DOI">10.18710/RULYMP/NCLF5C</relatedIdentifier>
    <relatedIdentifier relationType="HasPart" relatedIdentifierType="DOI">10.18710/RULYMP/KAHJOG</relatedIdentifier>
    <relatedIdentifier relationType="HasPart" relatedIdentifierType="DOI">10.18710/RULYMP/XF7AB6</relatedIdentifier>
  </relatedIdentifiers>
  <sizes>
    <size>12006</size>
    <size>73198</size>
    <size>1178</size>
    <size>10052</size>
    <size>2810</size>
  </sizes>
  <formats>
    <format>text/plain</format>
    <format>text/csv</format>
    <format>type/x-r-syntax</format>
    <format>type/x-r-syntax</format>
    <format>type/x-r-syntax</format>
  </formats>
  <version>1.2</version>
  <rightsList>
    <rights rightsURI="info:eu-repo/semantics/openAccess"/>
    <rights rightsURI="http://creativecommons.org/licenses/by-nc/4.0" rightsIdentifier="CC-BY-NC-4.0" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/" xml:lang="en">Creative Commons Attribution-NonCommercial 4.0 International License.</rights>
  </rightsList>
  <descriptions>
    <description descriptionType="Abstract">&amp;lt;p&amp;gt;The dataset supports the research article &amp;quot;Salience-simplification strategy to markedness of causal subordinators: The case of “because” and “since” in argumentative essays&amp;quot;. In total, the dataset marks features of 976 causal adverbial subordinations retrieved from student argumentative essays.Data points were extracted from three corpora.  Specifically, all essays in NESSIE (Native English Speakers’ Similarly or Identically-prompted Essays, created by Xu Jiajin, 781 essays; 291,911 tokens) and argumentative essays in LOCNESS (the Louvain Corpus of Native English Essays, created by Granger, 323 essays; 230,138 tokens) were selected. Native argumentative essays from BAWE’s (British Academic Written English, created by Hilary Nesi) Arts and Humanities disciplinary group were chosen (512 essays; 1,360,932 tokens). In total, 1,616 essays comprising 1,882,981 tokens were examined.&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;The dataset comprises 976 datapoints of causal subordinations conjoined by &amp;quot;because&amp;quot; and &amp;quot;since&amp;quot; in students&amp;apos; argumentative essays--488 data points of all &amp;quot;since&amp;quot; subordinations, and 488 randomly selected &amp;quot;because&amp;quot; subordinations. On these data points, ten contextual features that are potential predictors of people&amp;apos;s choices between causal subordinators &amp;quot;because&amp;quot; and &amp;quot;since&amp;quot; were annotated.&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;The ten contextual features annotated are &amp;quot;position&amp;quot;, &amp;quot;separation&amp;quot;, &amp;quot;embeddedness&amp;quot;, &amp;quot;initial adverbials&amp;quot;, &amp;quot;sub-clause&amp;quot;, &amp;quot;de-ranking&amp;quot;, &amp;quot;clause-length ratio&amp;quot;, &amp;quot;hedging terms&amp;quot;, &amp;quot;clausal relationship&amp;quot;, and &amp;quot;bridging&amp;quot;.&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt; Overall fourteen variables including ten contetual  features are annotated: &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(1) &amp;quot;No.&amp;quot; is the ID of each data point(this is one ID marker);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(2) &amp;quot;subordinator&amp;quot; marks the logical subordinators (this categorical variable has two values: &amp;quot;because&amp;quot; and &amp;quot;since&amp;quot;); &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(3) &amp;quot;position&amp;quot; marks the logical adverbial clause positions compared with the main clause (this categorical variable has two values: &amp;quot;preposed&amp;quot; or &amp;quot;postposed&amp;quot;); &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(4) &amp;quot;sep&amp;quot; indicates whether a separating punctuation mark exists between the subordinate and main clauses(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(5) &amp;quot;embeddedness&amp;quot; indicates whether a complex sentence is embedded in a larger comlex sentence(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;);  &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(6) &amp;quot;ini.adv&amp;quot; denotes whether an initial adverbial exists in the causal subordination(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(7) &amp;quot;sub-clau&amp;quot; indicates whether the causal subordinate contains sub-clauses of any type(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(8) &amp;quot;deranking&amp;quot; indicates whether the predicate of the subordinate clause is complete(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;); &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(9) &amp;quot;sub.main.ratio&amp;quot; is the length ratio of the subordinate and main clauses in terms of word count (this numerical variable is converted into ln value for better interpretation);  &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(10) &amp;quot;hedging&amp;quot; indicates whether a hedging term exists in the subordinate clause(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;); &amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(11) &amp;quot;clau.rel&amp;quot; denotes the interclausal relationships on the general level(this categorical variable has two values: &amp;quot;direct&amp;quot; or &amp;quot;indirect&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(12) &amp;quot;spc.clau.rel2&amp;quot; denotes the interclausal relationships on the secondary level(this categorical variable has five values: &amp;quot;im&amp;quot;, &amp;quot;rm&amp;quot;, &amp;quot;asst&amp;quot;, &amp;quot;inpr&amp;quot;, and &amp;quot;sugg&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(13) &amp;quot;bridging&amp;quot; indicates whether the subordinate clause contains any information referring back to the preceding clause(this categorical variable has two values: &amp;quot;YES&amp;quot; or &amp;quot;NO&amp;quot;);&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;(14) &amp;quot;source&amp;quot; shows specific corpora the data points come from (this categorical variable has three values: &amp;quot;NESSIE&amp;quot;, &amp;quot;LOCNESS&amp;quot;, or &amp;quot;BAWE&amp;quot;) ;&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;&amp;lt;/p&amp;gt;
&amp;lt;p&amp;gt;This dataset was constructed to explore contextual features that discriminate between causal subordinators of &amp;quot;because&amp;quot; and &amp;quot;since&amp;quot; and to rank the effective features.&amp;lt;/p&amp;gt;</description>
    <description descriptionType="TechnicalInfo">AntConc, 3.5.8</description>
    <description descriptionType="TechnicalInfo">R Language, 3.6.2</description>
    <description descriptionType="TechnicalInfo">RStudio Team, 1.1.456</description>
  </descriptions>
  <geoLocations>
    <geoLocation>
      <geoLocationPlace>United States</geoLocationPlace>
    </geoLocation>
    <geoLocation>
      <geoLocationPlace>United Kingdom</geoLocationPlace>
    </geoLocation>
    <geoLocation>
      <geoLocationPlace>Dalian</geoLocationPlace>
    </geoLocation>
  </geoLocations>
  <fundingReferences>
    <fundingReference>
      <funderName>Liaoning Social Science Foundation</funderName>
      <awardNumber>L20BYY016</awardNumber>
    </fundingReference>
    <fundingReference>
      <funderName>National Social Science Fund of China (NSSFC)</funderName>
      <awardNumber>19ZDA319</awardNumber>
    </fundingReference>
  </fundingReferences>
</resource>
