{"dcterms:modified":"2024-03-19","dcterms:creator":"DataverseNO","@type":"ore:ResourceMap","@id":"https://dataverse.no/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.18710/QAJKZW","ore:describes":{"citation:keyword":[{"citation:keywordValue":"multi-dimensional analysis"},{"citation:keywordValue":"register variation"},{"citation:keywordValue":"factor analysis"},{"citation:keywordValue":"corpus"},{"citation:keywordValue":"Czech"}],"publication":[{"publicationCitation":"Cvrček, V., Komrsková, Z., Lukeš, D., Poukarová, P., Řehořková, A., & Zasina, A. J. (2018). From extra- to intratextual characteristics: Charting the space of variation in Czech through MDA. Corpus Linguistics and Linguistic Theory.","publicationIDType":"doi","publicationIDNumber":"10.1515/cllt-2018-0020","publicationURL":"https://doi.org/10.1515/cllt-2018-0020"},{"publicationCitation":"Cvrček, V., Komrsková, Z., Lukeš, D., Poukarová, P., Řehořková, A., & Zasina, A. J. (forthcoming). Variabilita češtiny: multidimenzionální analýza. Slovo a slovesnost."}],"contributor":[{"citation:contributorType":"Project Leader","citation:contributorName":"Cvrček, Václav"},{"citation:contributorType":"Project Member","citation:contributorName":"Komrsková, Zuzana"},{"citation:contributorType":"Project Member","citation:contributorName":"Lukeš, David"},{"citation:contributorType":"Project Member","citation:contributorName":"Poukarová, Petra"},{"citation:contributorType":"Project Member","citation:contributorName":"Řehořková, Anna"},{"citation:contributorType":"Project Member","citation:contributorName":"Zasina, Adrian Jan"}],"grantNumber":{"citation:grantNumberAgency":"European Regional Development Fund","citation:grantNumberValue":"CZ.02.1.01/0.0/0.0/16_013/0001758"},"software":[{"citation:softwareName":"R: A Language and Environment for Statistical Computing","citation:softwareVersion":"3.4.3"},{"citation:softwareName":"psych: Procedures for Personality and Psychological Research (R package)","citation:softwareVersion":"1.7.8"}],"citation:producer":{"citation:producerName":"Czech National Corpus","citation:producerAbbreviation":"CNC","citation:producerURL":"https://korpus.cz","citation:producerLogoURL":"https://trnka.korpus.cz/index-doc/logo/CNC-sirka-01-col-RGB-poz.png"},"timePeriodCovered":{"citation:timePeriodCoveredStart":"1990","citation:timePeriodCoveredEnd":"2014"},"citation:distributor":{"citation:distributorName":"The Tromsø Repository of Language and Linguistics (TROLLing)","citation:distributorAbbreviation":"TROLLing","citation:distributorURL":"https://trolling.uit.no/"},"geospatial:geographicCoverage":{"geospatial:country":"Czech Republic"},"author":{"citation:authorName":"Cvrček, Václav","citation:authorAffiliation":"Czech National Corpus","authorIdentifierScheme":"ORCID","authorIdentifier":"0000-0003-3977-2393"},"citation:dsDescription":{"citation:dsDescriptionValue":"
\r\nOriginal data for a general-purpose multi-dimensional analysis model of\r\nregister variation in Czech.\r\n
\r\n\r\n\r\nThis post contains a CSV data set of 137 linguistic features measured on\r\n3428 Czech text chunks, and an R script which performs a factor analysis\r\non this data set. The results of this factor analysis were used as a\r\nbasis for an 8-dimensional model of register variation in Czech (see\r\nRelated Publications), following the methodology introduced by Douglas\r\nBiber (see e.g. his 1988 seminal work\r\n\r\nVariation Across Speech and Writing\r\n\r\nfor details on the methodology, or his 2014 article\r\n\r\n“Using multi-dimensional analysis to explore cross-linguistic universals\r\nof register variation”\r\n\r\nfor a review of MDA results across a variety of languages).\r\n
\r\n\r\n\r\nThe data is derived from the\r\n\r\nKoditex corpus\r\n,\r\nwhich aims to be as diversified as possible, covering various forms of\r\nspoken and written (both print and on-line) Czech. In compiling this\r\ncorpus, the purpose was to provide a solid empirical basis for a\r\ncomprehensive general-purpose model of register variation in Czech.\r\n
\r\n\r\n\r\nApart from this data set and related publications, additional\r\nresources pertaining to the project are available via the\r\n\r\nczcorpus/mda\r\n\r\nGitHub repository.\r\n
","citation:dsDescriptionDate":"2018-10-12"},"citation:datasetContact":{"citation:datasetContactName":"Lukeš, David","citation:datasetContactAffiliation":"Czech National Corpus","citation:datasetContactEmail":"david.lukes@ff.cuni.cz"},"citation:dateOfCollection":{"citation:dateOfCollectionStart":"2017","citation:dateOfCollectionEnd":"2018"},"language":"English","citation:productionPlace":"Prague, Czech Republic","dateOfDeposit":"2018-10-12","citation:depositor":"Lukeš, David","subject":"Arts and Humanities","dataSources":"Koditex corpus (https://wiki.korpus.cz/doku.php/en:cnk:koditex)","kindOfData":"corpus data","title":"Multi-Dimensional Analysis of Czech","citation:productionDate":"2018-10-12","@id":"https://doi.org/10.18710/QAJKZW","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"1.1","schema:name":"Multi-Dimensional Analysis of Czech","schema:dateModified":"Thu Sep 28 20:17:55 GMT 2023","schema:datePublished":"2018-10-30","schema:license":"http://creativecommons.org/publicdomain/zero/1.0","dvcore:fileTermsOfAccess":{"dvcore:fileRequestAccess":true},"schema:includedInDataCatalog":"DataverseNO","schema:isPartOf":{"schema:name":"TROLLing","@id":"https://dataverse.no/dataverse/trolling","schema:description":"Sign Up\n Getting started with TROLLing\n\n\n \n\nNote: No datasets will be curated/published from March 22 to April 1, 2024. More info.
","schema:isPartOf":{"schema:name":"DataverseNO","@id":"https://dataverse.no/dataverse/root"}},"ore:aggregates":[{"schema:description":"Start here.","schema:name":"00_README.docx","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":3803,"@id":"doi:10.18710/QAJKZW/AYYGNF","schema:sameAs":"https://dataverse.no/api/access/datafile/:persistentId?persistentId=doi:10.18710/QAJKZW/AYYGNF","@type":"ore:AggregatedResource","schema:fileFormat":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","dvcore:filesize":49288,"dvcore:storageIdentifier":"S3://2002-yellow-dataverseno:166c53c3feb-f5ad04ce8bb8","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"4b01482c643e34dc80b32e200a8c74e3"}},{"schema:description":"Start here.","schema:name":"00_README.pdf","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":3803,"@id":"doi:10.18710/QAJKZW/HAN1ML","schema:sameAs":"https://dataverse.no/api/access/datafile/:persistentId?persistentId=doi:10.18710/QAJKZW/HAN1ML","@type":"ore:AggregatedResource","schema:fileFormat":"application/pdf","dvcore:filesize":627018,"dvcore:storageIdentifier":"S3://2002-yellow-dataverseno:166c53c419a-fde083e7dde4","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"0ac6cbd2495d939cc50087a6334c526e"}},{"schema:description":"Values of linguistic features in individual text chunks. Each row of the table corresponds to a text chunk in the Koditex corpus. Columns represent linguistic features, and additionally text chunk ID, classification metadata (MODE, DIVISION, SUPERCLASS, CLASS) and length (_LEN).\r\n\r\nThe abbreviations for the classification categories (= values in the MODE, DIVISION, SUPERCLASS and CLASS columns) and linguistic feature names (= remaining column names) are explained in 00_README.pdf.\r\n","schema:name":"01_2017-12-05.csv","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":3803,"@id":"doi:10.18710/QAJKZW/M1I7AP","schema:sameAs":"https://dataverse.no/api/access/datafile/:persistentId?persistentId=doi:10.18710/QAJKZW/M1I7AP&format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":7390578,"dvcore:storageIdentifier":"S3://2002-yellow-dataverseno:1666846d577-377742e1f391","dvcore:currentIngestedName":"01_2017-12-05.csv","dvcore:UNF":"UNF:6:5rqhrfGF8iJspOAQER3OCA==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"daf11ccd0c1673438a5e2ec97466cb7c"}},{"schema:description":"R code for performing factor analysis on the supplied data set.","schema:name":"02_factor_analysis.R","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":3803,"@id":"doi:10.18710/QAJKZW/LTNR3K","schema:sameAs":"https://dataverse.no/api/access/datafile/:persistentId?persistentId=doi:10.18710/QAJKZW/LTNR3K","@type":"ore:AggregatedResource","schema:fileFormat":"type/x-r-syntax","dvcore:filesize":1007,"dvcore:storageIdentifier":"S3://2002-yellow-dataverseno:166c084b607-e164dc5ec89e","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"4f670b765505156dc3b9cfd53b7d397d"}}],"schema:hasPart":["doi:10.18710/QAJKZW/AYYGNF","doi:10.18710/QAJKZW/HAN1ML","doi:10.18710/QAJKZW/M1I7AP","doi:10.18710/QAJKZW/LTNR3K"]},"@context":{"author":"http://purl.org/dc/terms/creator","authorIdentifier":"http://purl.org/spar/datacite/AgentIdentifier","authorIdentifierScheme":"http://purl.org/spar/datacite/AgentIdentifierScheme","citation":"https://dataverse.org/schema/citation/","contributor":"http://purl.org/dc/terms/contributor","dataSources":"https://www.w3.org/TR/prov-o/#wasDerivedFrom","dateOfDeposit":"http://purl.org/dc/terms/dateSubmitted","dcterms":"http://purl.org/dc/terms/","dvcore":"https://dataverse.org/schema/core#","geospatial":"https://dataverse.no/schema/geospatial#","grantNumber":"https://schema.org/sponsor","kindOfData":"http://rdf-vocabulary.ddialliance.org/discovery#kindOfData","language":"http://purl.org/dc/terms/language","ore":"http://www.openarchives.org/ore/terms/","publication":"http://purl.org/dc/terms/isReferencedBy","publicationCitation":"http://purl.org/dc/terms/bibliographicCitation","publicationIDNumber":"http://purl.org/spar/datacite/ResourceIdentifier","publicationIDType":"http://purl.org/spar/datacite/ResourceIdentifierScheme","publicationURL":"https://schema.org/distribution","schema":"http://schema.org/","software":"https://www.w3.org/TR/prov-o/#wasGeneratedBy","subject":"http://purl.org/dc/terms/subject","timePeriodCovered":"https://schema.org/temporalCoverage","title":"http://purl.org/dc/terms/title"}}