From 03205a8955f961a0b6a7077a27ee60637eea1373 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 23 Oct 2018 12:32:29 -0400 Subject: [PATCH] Add proper BCP 47 names for various languages Audit language names against BCP 47 validator and rename those which are not valid BCP 47 codes, leaving the old aliases for backward-compatibility. Validated against IANA registry at https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry with the help of the validator at http://schneegans.de/lv/ Add fully-expanded aliases for zh-XX language codes to help compatibility with other BCP 47 users. See also: https://github.com/wikimedia/mediawiki/blob/1113b1203cbc1049fb1d01e975ff9a531d72a408/languages/LanguageCode.php#L56 --- data/langdb.yaml | 48 ++++++++++----- language-data.json | 142 ++++++++++++++++++++++++++++++--------------- 2 files changed, 127 insertions(+), 63 deletions(-) diff --git a/data/langdb.yaml b/data/langdb.yaml index c3d5878..e4082eb 100644 --- a/data/langdb.yaml +++ b/data/langdb.yaml @@ -87,7 +87,8 @@ languages: byn: [Ethi, [AF], ብሊን] ca: [Latn, [EU], català] cak: [Latn, [AM], Kaqchikel] - cbk-zam: [Latn, [AS], Chavacano de Zamboanga] + cbk: [Latn, [AS], Chavacano de Zamboanga] + cbk-zam: [cbk] cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄] ce: [Cyrl, [EU], нохчийн] ceb: [Latn, [AS], Cebuano] @@ -131,7 +132,8 @@ languages: dag: [Latn, [AF], dagbanli] de-at: [Latn, [EU], Österreichisches Deutsch] de-ch: [Latn, [EU], Schweizer Hochdeutsch] - de-formal: [Latn, [EU], Deutsch (Sie-Form)] + de-formal: [de-x-formal] + de-x-formal: [Latn, [EU], Deutsch (Sie-Form)] de: [Latn, [EU], Deutsch] din: [Latn, [AF], Thuɔŋjäŋ] diq: [Latn, [EU, AS], Zazaki] @@ -147,11 +149,13 @@ languages: eml: [Latn, [EU], emiliàn e rumagnòl] en-ca: [Latn, [AM], Canadian English] en-gb: [Latn, [EU, AS, PA], British English] + en-simple: [Latn, [WW], Simple English] en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English] eo: [Latn, [WW], Esperanto] es-419: [Latn, [AM], español de América Latina] # world? - es-formal: [Latn, [EU, AM, AF, WW], español (formal)] + es-formal: [es-x-formal] + es-x-formal: [Latn, [EU, AM, AF, WW], español (formal)] # world? es: [Latn, [EU, AM, AF, WW, PA], español] es-ni: [Latn, [AM], español nicaragüense] @@ -230,7 +234,8 @@ languages: hsb: [Latn, [EU], hornjoserbsce] hsn: [Hans, [AS], 湘语] ht: [Latn, [AM], Kreyòl ayisyen] - hu-formal: [Latn, [EU], Magyar (magázó)] + hu-formal: [hu-x-formal] + hu-x-formal: [Latn, [EU], Magyar (magázó)] hu: [Latn, [EU], magyar] hy: [Armn, [EU, ME], հայերեն] hyw: [Armn, [EU, ME], արեւմտահայերէն] @@ -258,6 +263,7 @@ languages: jv: [Latn, [AS, PA], Basa Jawa] # For support in webfonts. jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ] + jv-x-bms: [Latn, [AS], Basa Banyumasan] ka: [Geor, [EU], ქართული] kaa: [Latn, [AS], Qaraqalpaqsha] # Can also be Tfng, but the Wikipedia is mostly Latn @@ -343,7 +349,7 @@ languages: # Also Geor, but the incubator is in Latn lzz: [Latn, [EU, ME], Lazuri] mai: [Deva, [AS], मैथिली] - map-bms: [Latn, [AS], Basa Banyumasan] + map-bms: [jv-x-bms] mdf: [Cyrl, [EU], мокшень] mfe: [Latn, [AM], Morisyen] mg: [Latn, [AF], Malagasy] @@ -360,7 +366,7 @@ languages: mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ] mni: [Beng, [AS], মেইতেই লোন্] mnw: [Mymr, [AS], ဘာသာ မန်] - mo: [Cyrl, [EU], молдовеняскэ] + mo: [ro-cyrl-md] moe: [Latn, [AM], Innu-aimun] mr: [Deva, [AS, ME], मराठी] mrj: [Cyrl, [EU], кырык мары] @@ -379,6 +385,7 @@ languages: nah: [Latn, [AM], Nāhuatl] nan: [Latn, [AS], Bân-lâm-gú] nap: [Latn, [EU], Napulitano] + nap-x-tara: [Latn, [EU], tarandíne] nb: [Latn, [EU], norsk (bokmål)] nd: [Latn, [AF], siNdebele saseNyakatho] nds-nl: [Latn, [EU], Nedersaksisch] @@ -388,7 +395,8 @@ languages: ng: [Latn, [AF], Oshiwambo] niu: [Latn, [PA], ko e vagahau Niuē] njo: [Latn, [AS], Ao] - nl-informal: [Latn, [EU, AM], Nederlands (informeel)] + nl-informal: [nl-x-informal] + nl-x-informal: [Latn, [EU, AM], Nederlands (informeel)] nl: [Latn, [EU, AM], Nederlands] nn: [Latn, [EU], norsk (nynorsk)] # There's also nb for Bokmål and nn for Nynorsk @@ -397,7 +405,8 @@ languages: nov: [Latn, [WW], Novial] nqo: [Nkoo, [AF], ߒߞߏ] nr: [Latn, [AF], isiNdebele seSewula] - nrm: [Latn, [EU], Nouormand] + nrf: [Latn, [EU], Nouormand] + nrm: [nrf] nso: [Latn, [AF], Sesotho sa Leboa] nv: [Latn, [AM], Diné bizaad] ny: [Latn, [AF], Chi-Chewa] @@ -452,8 +461,9 @@ languages: rmy: [Latn, [EU], Romani] rn: [Latn, [AF], Kirundi] ro: [Latn, [EU], română] + ro-cyrl-md: [Cyrl, [EU], молдовеняскэ] roa-rup: [rup] - roa-tara: [Latn, [EU], tarandíne] + roa-tara: [nap-x-tara] rtm: [Latn, [PA], Faeag Rotuma] # world? ru: [Cyrl, [EU, AS, ME], русский] @@ -490,7 +500,7 @@ languages: shi: [shi-latn] shn: [Mymr, [AS], လိၵ်ႈတႆး] si: [Sinh, [AS], සිංහල] - simple: [Latn, [WW], Simple English] + simple: [en-simple] sjd: [Cyrl, [EU], Кӣллт са̄мь кӣлл] sje: [Latn, [EU], bidumsámegiella] sjo: [Mong, [AS], ᠰᡞᠪᡝ ᡤᡞᠰᡠᠨ] @@ -608,15 +618,21 @@ languages: # world? (may apply to many varieties of Chinese) zh: [Hans, [AS], 中文] zh-classical: [lzh] - zh-cn: [Hans, [AS], 中文(中国大陆)] + zh-cn: [zh-hans-cn] + zh-hans-cn: [Hans, [AS], 中文(中国大陆)] + zh-hans-my: [Hans, [AS], 中文(马来西亚)] + zh-hans-sg: [Hans, [AS], 中文(新加坡)] zh-hans: [Hans, [AS], 中文(简体)] + zh-hant-hk: [Hant, [AS], 中文(香港)] + zh-hant-mo: [Hant, [AS], 中文(澳門)] + zh-hant-tw: [Hant, [AS], 中文(台灣)] zh-hant: [Hant, [AS], 中文(繁體)] - zh-hk: [Hant, [AS], 中文(香港)] + zh-hk: [zh-hant-hk] zh-min-nan: [nan] - zh-mo: [Hant, [AS], 中文(澳門)] - zh-my: [Hans, [AS], 中文(马来西亚)] - zh-sg: [Hans, [AS], 中文(新加坡)] - zh-tw: [Hant, [AS], 中文(台灣)] + zh-mo: [zh-hant-mo] + zh-my: [zh-hans-my] + zh-sg: [zh-hans-sg] + zh-tw: [zh-hant-tw] zh-yue: [yue] zu: [Latn, [AF], isiZulu] zun: [Latn, [AM], "Shiwi'ma"] diff --git a/language-data.json b/language-data.json index 0997e7a..37192f0 100644 --- a/language-data.json +++ b/language-data.json @@ -548,13 +548,16 @@ ], "Kaqchikel" ], - "cbk-zam": [ + "cbk": [ "Latn", [ "AS" ], "Chavacano de Zamboanga" ], + "cbk-zam": [ + "cbk" + ], "cdo": [ "Latn", [ @@ -789,6 +792,9 @@ "Schweizer Hochdeutsch" ], "de-formal": [ + "de-x-formal" + ], + "de-x-formal": [ "Latn", [ "EU" @@ -903,6 +909,13 @@ ], "British English" ], + "en-simple": [ + "Latn", + [ + "WW" + ], + "Simple English" + ], "en": [ "Latn", [ @@ -931,6 +944,9 @@ "español de América Latina" ], "es-formal": [ + "es-x-formal" + ], + "es-x-formal": [ "Latn", [ "EU", @@ -1459,6 +1475,9 @@ "Kreyòl ayisyen" ], "hu-formal": [ + "hu-x-formal" + ], + "hu-x-formal": [ "Latn", [ "EU" @@ -1644,6 +1663,13 @@ ], "ꦧꦱꦗꦮ" ], + "jv-x-bms": [ + "Latn", + [ + "AS" + ], + "Basa Banyumasan" + ], "ka": [ "Geor", [ @@ -2192,11 +2218,7 @@ "मैथिली" ], "map-bms": [ - "Latn", - [ - "AS" - ], - "Basa Banyumasan" + "jv-x-bms" ], "mdf": [ "Cyrl", @@ -2305,11 +2327,7 @@ "ဘာသာ မန်" ], "mo": [ - "Cyrl", - [ - "EU" - ], - "молдовеняскэ" + "ro-cyrl-md" ], "moe": [ "Latn", @@ -2435,6 +2453,13 @@ ], "Napulitano" ], + "nap-x-tara": [ + "Latn", + [ + "EU" + ], + "tarandíne" + ], "nb": [ "Latn", [ @@ -2499,6 +2524,9 @@ "Ao" ], "nl-informal": [ + "nl-x-informal" + ], + "nl-x-informal": [ "Latn", [ "EU", @@ -2556,13 +2584,16 @@ ], "isiNdebele seSewula" ], - "nrm": [ + "nrf": [ "Latn", [ "EU" ], "Nouormand" ], + "nrm": [ + "nrf" + ], "nso": [ "Latn", [ @@ -2935,15 +2966,18 @@ ], "română" ], + "ro-cyrl-md": [ + "Cyrl", + [ + "EU" + ], + "молдовеняскэ" + ], "roa-rup": [ "rup" ], "roa-tara": [ - "Latn", - [ - "EU" - ], - "tarandíne" + "nap-x-tara" ], "rtm": [ "Latn", @@ -3172,11 +3206,7 @@ "සිංහල" ], "simple": [ - "Latn", - [ - "WW" - ], - "Simple English" + "en-simple" ], "sjd": [ "Cyrl", @@ -3951,12 +3981,29 @@ "lzh" ], "zh-cn": [ + "zh-hans-cn" + ], + "zh-hans-cn": [ "Hans", [ "AS" ], "中文(中国大陆)" ], + "zh-hans-my": [ + "Hans", + [ + "AS" + ], + "中文(马来西亚)" + ], + "zh-hans-sg": [ + "Hans", + [ + "AS" + ], + "中文(新加坡)" + ], "zh-hans": [ "Hans", [ @@ -3964,6 +4011,27 @@ ], "中文(简体)" ], + "zh-hant-hk": [ + "Hant", + [ + "AS" + ], + "中文(香港)" + ], + "zh-hant-mo": [ + "Hant", + [ + "AS" + ], + "中文(澳門)" + ], + "zh-hant-tw": [ + "Hant", + [ + "AS" + ], + "中文(台灣)" + ], "zh-hant": [ "Hant", [ @@ -3972,42 +4040,22 @@ "中文(繁體)" ], "zh-hk": [ - "Hant", - [ - "AS" - ], - "中文(香港)" + "zh-hant-hk" ], "zh-min-nan": [ "nan" ], "zh-mo": [ - "Hant", - [ - "AS" - ], - "中文(澳門)" + "zh-hant-mo" ], "zh-my": [ - "Hans", - [ - "AS" - ], - "中文(马来西亚)" + "zh-hans-my" ], "zh-sg": [ - "Hans", - [ - "AS" - ], - "中文(新加坡)" + "zh-hans-sg" ], "zh-tw": [ - "Hant", - [ - "AS" - ], - "中文(台灣)" + "zh-hant-tw" ], "zh-yue": [ "yue"