{"id":5093,"identifier":"QAJKZW","persistentUrl":"https://doi.org/10.18710/QAJKZW","protocol":"doi","authority":"10.18710","publisher":"DataverseNO","publicationDate":"2018-10-30","storageIdentifier":"S3://10.18710/QAJKZW","datasetVersion":{"id":3803,"datasetId":5093,"datasetPersistentId":"doi:10.18710/QAJKZW","storageIdentifier":"S3://10.18710/QAJKZW","versionNumber":1,"versionMinorNumber":1,"versionState":"RELEASED","productionDate":"2018-10-12","UNF":"UNF:6:5rqhrfGF8iJspOAQER3OCA==","lastUpdateTime":"2023-09-28T20:17:55Z","releaseTime":"2023-09-28T20:17:55Z","createTime":"2023-09-28T15:48:37Z","publicationDate":"2018-10-30","citationDate":"2018-10-30","license":{"name":"CC0 1.0","uri":"http://creativecommons.org/publicdomain/zero/1.0","iconUri":"https://licensebuttons.net/p/zero/1.0/88x31.png"},"fileAccessRequest":true,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Multi-Dimensional Analysis of Czech"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Cvrček, Václav"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Czech National Corpus"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0000-0003-3977-2393"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Lukeš, David"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Czech National Corpus"},"datasetContactEmail":{"typeName":"datasetContactEmail","multiple":false,"typeClass":"primitive","value":"david.lukes@ff.cuni.cz"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"
\r\nOriginal data for a general-purpose multi-dimensional analysis model of\r\nregister variation in Czech.\r\n
\r\n\r\n\r\nThis post contains a CSV data set of 137 linguistic features measured on\r\n3428 Czech text chunks, and an R script which performs a factor analysis\r\non this data set. The results of this factor analysis were used as a\r\nbasis for an 8-dimensional model of register variation in Czech (see\r\nRelated Publications), following the methodology introduced by Douglas\r\nBiber (see e.g. his 1988 seminal work\r\n\r\nVariation Across Speech and Writing\r\n\r\nfor details on the methodology, or his 2014 article\r\n\r\n“Using multi-dimensional analysis to explore cross-linguistic universals\r\nof register variation”\r\n\r\nfor a review of MDA results across a variety of languages).\r\n
\r\n\r\n\r\nThe data is derived from the\r\n\r\nKoditex corpus\r\n,\r\nwhich aims to be as diversified as possible, covering various forms of\r\nspoken and written (both print and on-line) Czech. In compiling this\r\ncorpus, the purpose was to provide a solid empirical basis for a\r\ncomprehensive general-purpose model of register variation in Czech.\r\n
\r\n\r\n\r\nApart from this data set and related publications, additional\r\nresources pertaining to the project are available via the\r\n\r\nczcorpus/mda\r\n\r\nGitHub repository.\r\n
"},"dsDescriptionDate":{"typeName":"dsDescriptionDate","multiple":false,"typeClass":"primitive","value":"2018-10-12"}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Arts and Humanities"]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"multi-dimensional analysis"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"register variation"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"factor analysis"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"corpus"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Czech"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Cvrček, V., Komrsková, Z., Lukeš, D., Poukarová, P., Řehořková, A., & Zasina, A. J. (2018). From extra- to intratextual characteristics: Charting the space of variation in Czech through MDA. Corpus Linguistics and Linguistic Theory."},"publicationIDType":{"typeName":"publicationIDType","multiple":false,"typeClass":"controlledVocabulary","value":"doi"},"publicationIDNumber":{"typeName":"publicationIDNumber","multiple":false,"typeClass":"primitive","value":"10.1515/cllt-2018-0020"},"publicationURL":{"typeName":"publicationURL","multiple":false,"typeClass":"primitive","value":"https://doi.org/10.1515/cllt-2018-0020"}},{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Cvrček, V., Komrsková, Z., Lukeš, D., Poukarová, P., Řehořková, A., & Zasina, A. J. (forthcoming). Variabilita češtiny: multidimenzionální analýza. Slovo a slovesnost."}}]},{"typeName":"language","multiple":true,"typeClass":"controlledVocabulary","value":["English"]},{"typeName":"producer","multiple":true,"typeClass":"compound","value":[{"producerName":{"typeName":"producerName","multiple":false,"typeClass":"primitive","value":"Czech National Corpus"},"producerAbbreviation":{"typeName":"producerAbbreviation","multiple":false,"typeClass":"primitive","value":"CNC"},"producerURL":{"typeName":"producerURL","multiple":false,"typeClass":"primitive","value":"https://korpus.cz"},"producerLogoURL":{"typeName":"producerLogoURL","multiple":false,"typeClass":"primitive","value":"https://trnka.korpus.cz/index-doc/logo/CNC-sirka-01-col-RGB-poz.png"}}]},{"typeName":"productionDate","multiple":false,"typeClass":"primitive","value":"2018-10-12"},{"typeName":"productionPlace","multiple":true,"typeClass":"primitive","value":["Prague, Czech Republic"]},{"typeName":"contributor","multiple":true,"typeClass":"compound","value":[{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Leader"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Cvrček, Václav"}},{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Member"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Komrsková, Zuzana"}},{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Member"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Lukeš, David"}},{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Member"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Poukarová, Petra"}},{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Member"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Řehořková, Anna"}},{"contributorType":{"typeName":"contributorType","multiple":false,"typeClass":"controlledVocabulary","value":"Project Member"},"contributorName":{"typeName":"contributorName","multiple":false,"typeClass":"primitive","value":"Zasina, Adrian Jan"}}]},{"typeName":"grantNumber","multiple":true,"typeClass":"compound","value":[{"grantNumberAgency":{"typeName":"grantNumberAgency","multiple":false,"typeClass":"primitive","value":"European Regional Development Fund"},"grantNumberValue":{"typeName":"grantNumberValue","multiple":false,"typeClass":"primitive","value":"CZ.02.1.01/0.0/0.0/16_013/0001758"}}]},{"typeName":"distributor","multiple":true,"typeClass":"compound","value":[{"distributorName":{"typeName":"distributorName","multiple":false,"typeClass":"primitive","value":"The Tromsø Repository of Language and Linguistics (TROLLing)"},"distributorAbbreviation":{"typeName":"distributorAbbreviation","multiple":false,"typeClass":"primitive","value":"TROLLing"},"distributorURL":{"typeName":"distributorURL","multiple":false,"typeClass":"primitive","value":"https://trolling.uit.no/"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Lukeš, David"},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2018-10-12"},{"typeName":"timePeriodCovered","multiple":true,"typeClass":"compound","value":[{"timePeriodCoveredStart":{"typeName":"timePeriodCoveredStart","multiple":false,"typeClass":"primitive","value":"1990"},"timePeriodCoveredEnd":{"typeName":"timePeriodCoveredEnd","multiple":false,"typeClass":"primitive","value":"2014"}}]},{"typeName":"dateOfCollection","multiple":true,"typeClass":"compound","value":[{"dateOfCollectionStart":{"typeName":"dateOfCollectionStart","multiple":false,"typeClass":"primitive","value":"2017"},"dateOfCollectionEnd":{"typeName":"dateOfCollectionEnd","multiple":false,"typeClass":"primitive","value":"2018"}}]},{"typeName":"kindOfData","multiple":true,"typeClass":"primitive","value":["corpus data"]},{"typeName":"software","multiple":true,"typeClass":"compound","value":[{"softwareName":{"typeName":"softwareName","multiple":false,"typeClass":"primitive","value":"R: A Language and Environment for Statistical Computing"},"softwareVersion":{"typeName":"softwareVersion","multiple":false,"typeClass":"primitive","value":"3.4.3"}},{"softwareName":{"typeName":"softwareName","multiple":false,"typeClass":"primitive","value":"psych: Procedures for Personality and Psychological Research (R package)"},"softwareVersion":{"typeName":"softwareVersion","multiple":false,"typeClass":"primitive","value":"1.7.8"}}]},{"typeName":"dataSources","multiple":true,"typeClass":"primitive","value":["Koditex corpus (https://wiki.korpus.cz/doku.php/en:cnk:koditex)"]}]},"geospatial":{"displayName":"Geospatial Metadata","name":"geospatial","fields":[{"typeName":"geographicCoverage","multiple":true,"typeClass":"compound","value":[{"country":{"typeName":"country","multiple":false,"typeClass":"controlledVocabulary","value":"Czech Republic"}}]}]}},"files":[{"description":"Start here.","label":"00_README.docx","restricted":false,"version":1,"datasetVersionId":3803,"dataFile":{"id":5216,"persistentId":"doi:10.18710/QAJKZW/AYYGNF","pidURL":"https://doi.org/10.18710/QAJKZW/AYYGNF","filename":"00_README.docx","contentType":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","filesize":49288,"description":"Start here.","storageIdentifier":"S3://2002-yellow-dataverseno:166c53c3feb-f5ad04ce8bb8","rootDataFileId":-1,"md5":"4b01482c643e34dc80b32e200a8c74e3","checksum":{"type":"MD5","value":"4b01482c643e34dc80b32e200a8c74e3"},"creationDate":"2018-10-30"}},{"description":"Start here.","label":"00_README.pdf","restricted":false,"version":1,"datasetVersionId":3803,"dataFile":{"id":5217,"persistentId":"doi:10.18710/QAJKZW/HAN1ML","pidURL":"https://doi.org/10.18710/QAJKZW/HAN1ML","filename":"00_README.pdf","contentType":"application/pdf","filesize":627018,"description":"Start here.","storageIdentifier":"S3://2002-yellow-dataverseno:166c53c419a-fde083e7dde4","rootDataFileId":-1,"md5":"0ac6cbd2495d939cc50087a6334c526e","checksum":{"type":"MD5","value":"0ac6cbd2495d939cc50087a6334c526e"},"creationDate":"2018-10-30"}},{"description":"Values of linguistic features in individual text chunks. Each row of the table corresponds to a text chunk in the Koditex corpus. Columns represent linguistic features, and additionally text chunk ID, classification metadata (MODE, DIVISION, SUPERCLASS, CLASS) and length (_LEN).\r\n\r\nThe abbreviations for the classification categories (= values in the MODE, DIVISION, SUPERCLASS and CLASS columns) and linguistic feature names (= remaining column names) are explained in 00_README.pdf.\r\n","label":"01_2017-12-05.csv","restricted":false,"version":1,"datasetVersionId":3803,"dataFile":{"id":5095,"persistentId":"doi:10.18710/QAJKZW/M1I7AP","pidURL":"https://doi.org/10.18710/QAJKZW/M1I7AP","filename":"01_2017-12-05.csv","contentType":"text/tab-separated-values","filesize":7420627,"description":"Values of linguistic features in individual text chunks. Each row of the table corresponds to a text chunk in the Koditex corpus. Columns represent linguistic features, and additionally text chunk ID, classification metadata (MODE, DIVISION, SUPERCLASS, CLASS) and length (_LEN).\r\n\r\nThe abbreviations for the classification categories (= values in the MODE, DIVISION, SUPERCLASS and CLASS columns) and linguistic feature names (= remaining column names) are explained in 00_README.pdf.\r\n","storageIdentifier":"S3://2002-yellow-dataverseno:1666846d577-377742e1f391","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":7390578,"originalFileName":"01_2017-12-05.csv","UNF":"UNF:6:5rqhrfGF8iJspOAQER3OCA==","rootDataFileId":-1,"md5":"daf11ccd0c1673438a5e2ec97466cb7c","checksum":{"type":"MD5","value":"daf11ccd0c1673438a5e2ec97466cb7c"},"creationDate":"2018-10-12"}},{"description":"R code for performing factor analysis on the supplied data set.","label":"02_factor_analysis.R","restricted":false,"version":1,"datasetVersionId":3803,"dataFile":{"id":5208,"persistentId":"doi:10.18710/QAJKZW/LTNR3K","pidURL":"https://doi.org/10.18710/QAJKZW/LTNR3K","filename":"02_factor_analysis.R","contentType":"type/x-r-syntax","filesize":1007,"description":"R code for performing factor analysis on the supplied data set.","storageIdentifier":"S3://2002-yellow-dataverseno:166c084b607-e164dc5ec89e","rootDataFileId":-1,"md5":"4f670b765505156dc3b9cfd53b7d397d","checksum":{"type":"MD5","value":"4f670b765505156dc3b9cfd53b7d397d"},"creationDate":"2018-10-29"}}],"citation":"Cvrček, Václav, 2018, \"Multi-Dimensional Analysis of Czech\", https://doi.org/10.18710/QAJKZW, DataverseNO, V1, UNF:6:5rqhrfGF8iJspOAQER3OCA== [fileUNF]"}}