Add proper BCP 47 names for various languages

Audit language names against BCP 47 validator and rename those which are
not valid BCP 47 codes, leaving the old aliases for backward-compatibility.

Validated against IANA registry at
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
with the help of the validator at
  http://schneegans.de/lv/

Add fully-expanded aliases for zh-XX language codes to help compatibility
with other BCP 47 users.

See also:
1113b1203c/languages/LanguageCode.php (L56)
This commit is contained in:
C. Scott Ananian
2018-10-23 12:32:29 -04:00
parent df3429ee65
commit 03205a8955
2 changed files with 127 additions and 63 deletions

View File

@@ -87,7 +87,8 @@ languages:
byn: [Ethi, [AF], ብሊን] byn: [Ethi, [AF], ብሊን]
ca: [Latn, [EU], català] ca: [Latn, [EU], català]
cak: [Latn, [AM], Kaqchikel] cak: [Latn, [AM], Kaqchikel]
cbk-zam: [Latn, [AS], Chavacano de Zamboanga] cbk: [Latn, [AS], Chavacano de Zamboanga]
cbk-zam: [cbk]
cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄] cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄]
ce: [Cyrl, [EU], нохчийн] ce: [Cyrl, [EU], нохчийн]
ceb: [Latn, [AS], Cebuano] ceb: [Latn, [AS], Cebuano]
@@ -131,7 +132,8 @@ languages:
dag: [Latn, [AF], dagbanli] dag: [Latn, [AF], dagbanli]
de-at: [Latn, [EU], Österreichisches Deutsch] de-at: [Latn, [EU], Österreichisches Deutsch]
de-ch: [Latn, [EU], Schweizer Hochdeutsch] de-ch: [Latn, [EU], Schweizer Hochdeutsch]
de-formal: [Latn, [EU], Deutsch (Sie-Form)] de-formal: [de-x-formal]
de-x-formal: [Latn, [EU], Deutsch (Sie-Form)]
de: [Latn, [EU], Deutsch] de: [Latn, [EU], Deutsch]
din: [Latn, [AF], Thuɔŋjäŋ] din: [Latn, [AF], Thuɔŋjäŋ]
diq: [Latn, [EU, AS], Zazaki] diq: [Latn, [EU, AS], Zazaki]
@@ -147,11 +149,13 @@ languages:
eml: [Latn, [EU], emiliàn e rumagnòl] eml: [Latn, [EU], emiliàn e rumagnòl]
en-ca: [Latn, [AM], Canadian English] en-ca: [Latn, [AM], Canadian English]
en-gb: [Latn, [EU, AS, PA], British English] en-gb: [Latn, [EU, AS, PA], British English]
en-simple: [Latn, [WW], Simple English]
en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English] en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English]
eo: [Latn, [WW], Esperanto] eo: [Latn, [WW], Esperanto]
es-419: [Latn, [AM], español de América Latina] es-419: [Latn, [AM], español de América Latina]
# world? # world?
es-formal: [Latn, [EU, AM, AF, WW], español (formal)] es-formal: [es-x-formal]
es-x-formal: [Latn, [EU, AM, AF, WW], español (formal)]
# world? # world?
es: [Latn, [EU, AM, AF, WW, PA], español] es: [Latn, [EU, AM, AF, WW, PA], español]
es-ni: [Latn, [AM], español nicaragüense] es-ni: [Latn, [AM], español nicaragüense]
@@ -230,7 +234,8 @@ languages:
hsb: [Latn, [EU], hornjoserbsce] hsb: [Latn, [EU], hornjoserbsce]
hsn: [Hans, [AS], 湘语] hsn: [Hans, [AS], 湘语]
ht: [Latn, [AM], Kreyòl ayisyen] ht: [Latn, [AM], Kreyòl ayisyen]
hu-formal: [Latn, [EU], Magyar (magázó)] hu-formal: [hu-x-formal]
hu-x-formal: [Latn, [EU], Magyar (magázó)]
hu: [Latn, [EU], magyar] hu: [Latn, [EU], magyar]
hy: [Armn, [EU, ME], հայերեն] hy: [Armn, [EU, ME], հայերեն]
hyw: [Armn, [EU, ME], արեւմտահայերէն] hyw: [Armn, [EU, ME], արեւմտահայերէն]
@@ -258,6 +263,7 @@ languages:
jv: [Latn, [AS, PA], Basa Jawa] jv: [Latn, [AS, PA], Basa Jawa]
# For support in webfonts. # For support in webfonts.
jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ] jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ]
jv-x-bms: [Latn, [AS], Basa Banyumasan]
ka: [Geor, [EU], ქართული] ka: [Geor, [EU], ქართული]
kaa: [Latn, [AS], Qaraqalpaqsha] kaa: [Latn, [AS], Qaraqalpaqsha]
# Can also be Tfng, but the Wikipedia is mostly Latn # Can also be Tfng, but the Wikipedia is mostly Latn
@@ -343,7 +349,7 @@ languages:
# Also Geor, but the incubator is in Latn # Also Geor, but the incubator is in Latn
lzz: [Latn, [EU, ME], Lazuri] lzz: [Latn, [EU, ME], Lazuri]
mai: [Deva, [AS], मैथिली] mai: [Deva, [AS], मैथिली]
map-bms: [Latn, [AS], Basa Banyumasan] map-bms: [jv-x-bms]
mdf: [Cyrl, [EU], мокшень] mdf: [Cyrl, [EU], мокшень]
mfe: [Latn, [AM], Morisyen] mfe: [Latn, [AM], Morisyen]
mg: [Latn, [AF], Malagasy] mg: [Latn, [AF], Malagasy]
@@ -360,7 +366,7 @@ languages:
mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ] mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ]
mni: [Beng, [AS], মেইতেই লোন্] mni: [Beng, [AS], মেইতেই লোন্]
mnw: [Mymr, [AS], ဘာသာ မန်] mnw: [Mymr, [AS], ဘာသာ မန်]
mo: [Cyrl, [EU], молдовеняскэ] mo: [ro-cyrl-md]
moe: [Latn, [AM], Innu-aimun] moe: [Latn, [AM], Innu-aimun]
mr: [Deva, [AS, ME], मराठी] mr: [Deva, [AS, ME], मराठी]
mrj: [Cyrl, [EU], кырык мары] mrj: [Cyrl, [EU], кырык мары]
@@ -379,6 +385,7 @@ languages:
nah: [Latn, [AM], Nāhuatl] nah: [Latn, [AM], Nāhuatl]
nan: [Latn, [AS], Bân-lâm-gú] nan: [Latn, [AS], Bân-lâm-gú]
nap: [Latn, [EU], Napulitano] nap: [Latn, [EU], Napulitano]
nap-x-tara: [Latn, [EU], tarandíne]
nb: [Latn, [EU], norsk (bokmål)] nb: [Latn, [EU], norsk (bokmål)]
nd: [Latn, [AF], siNdebele saseNyakatho] nd: [Latn, [AF], siNdebele saseNyakatho]
nds-nl: [Latn, [EU], Nedersaksisch] nds-nl: [Latn, [EU], Nedersaksisch]
@@ -388,7 +395,8 @@ languages:
ng: [Latn, [AF], Oshiwambo] ng: [Latn, [AF], Oshiwambo]
niu: [Latn, [PA], ko e vagahau Niuē] niu: [Latn, [PA], ko e vagahau Niuē]
njo: [Latn, [AS], Ao] njo: [Latn, [AS], Ao]
nl-informal: [Latn, [EU, AM], Nederlands (informeel)] nl-informal: [nl-x-informal]
nl-x-informal: [Latn, [EU, AM], Nederlands (informeel)]
nl: [Latn, [EU, AM], Nederlands] nl: [Latn, [EU, AM], Nederlands]
nn: [Latn, [EU], norsk (nynorsk)] nn: [Latn, [EU], norsk (nynorsk)]
# There's also nb for Bokmål and nn for Nynorsk # There's also nb for Bokmål and nn for Nynorsk
@@ -397,7 +405,8 @@ languages:
nov: [Latn, [WW], Novial] nov: [Latn, [WW], Novial]
nqo: [Nkoo, [AF], ߒߞߏ] nqo: [Nkoo, [AF], ߒߞߏ]
nr: [Latn, [AF], isiNdebele seSewula] nr: [Latn, [AF], isiNdebele seSewula]
nrm: [Latn, [EU], Nouormand] nrf: [Latn, [EU], Nouormand]
nrm: [nrf]
nso: [Latn, [AF], Sesotho sa Leboa] nso: [Latn, [AF], Sesotho sa Leboa]
nv: [Latn, [AM], Diné bizaad] nv: [Latn, [AM], Diné bizaad]
ny: [Latn, [AF], Chi-Chewa] ny: [Latn, [AF], Chi-Chewa]
@@ -452,8 +461,9 @@ languages:
rmy: [Latn, [EU], Romani] rmy: [Latn, [EU], Romani]
rn: [Latn, [AF], Kirundi] rn: [Latn, [AF], Kirundi]
ro: [Latn, [EU], română] ro: [Latn, [EU], română]
ro-cyrl-md: [Cyrl, [EU], молдовеняскэ]
roa-rup: [rup] roa-rup: [rup]
roa-tara: [Latn, [EU], tarandíne] roa-tara: [nap-x-tara]
rtm: [Latn, [PA], Faeag Rotuma] rtm: [Latn, [PA], Faeag Rotuma]
# world? # world?
ru: [Cyrl, [EU, AS, ME], русский] ru: [Cyrl, [EU, AS, ME], русский]
@@ -490,7 +500,7 @@ languages:
shi: [shi-latn] shi: [shi-latn]
shn: [Mymr, [AS], လိၵ်ႈတႆး] shn: [Mymr, [AS], လိၵ်ႈတႆး]
si: [Sinh, [AS], සිංහල] si: [Sinh, [AS], සිංහල]
simple: [Latn, [WW], Simple English] simple: [en-simple]
sjd: [Cyrl, [EU], Кӣллт са̄мь кӣлл] sjd: [Cyrl, [EU], Кӣллт са̄мь кӣлл]
sje: [Latn, [EU], bidumsámegiella] sje: [Latn, [EU], bidumsámegiella]
sjo: [Mong, [AS], ᠰᡞᠪᡝ ᡤᡞᠰᡠᠨ] sjo: [Mong, [AS], ᠰᡞᠪᡝ ᡤᡞᠰᡠᠨ]
@@ -608,15 +618,21 @@ languages:
# world? (may apply to many varieties of Chinese) # world? (may apply to many varieties of Chinese)
zh: [Hans, [AS], 中文] zh: [Hans, [AS], 中文]
zh-classical: [lzh] zh-classical: [lzh]
zh-cn: [Hans, [AS], 中文(中国大陆)] zh-cn: [zh-hans-cn]
zh-hans-cn: [Hans, [AS], 中文(中国大陆)]
zh-hans-my: [Hans, [AS], 中文(马来西亚)]
zh-hans-sg: [Hans, [AS], 中文(新加坡)]
zh-hans: [Hans, [AS], 中文(简体)] zh-hans: [Hans, [AS], 中文(简体)]
zh-hant-hk: [Hant, [AS], 中文(香港)]
zh-hant-mo: [Hant, [AS], 中文(澳門)]
zh-hant-tw: [Hant, [AS], 中文(台灣)]
zh-hant: [Hant, [AS], 中文(繁體)] zh-hant: [Hant, [AS], 中文(繁體)]
zh-hk: [Hant, [AS], 中文(香港)] zh-hk: [zh-hant-hk]
zh-min-nan: [nan] zh-min-nan: [nan]
zh-mo: [Hant, [AS], 中文(澳門)] zh-mo: [zh-hant-mo]
zh-my: [Hans, [AS], 中文(马来西亚)] zh-my: [zh-hans-my]
zh-sg: [Hans, [AS], 中文(新加坡)] zh-sg: [zh-hans-sg]
zh-tw: [Hant, [AS], 中文(台灣)] zh-tw: [zh-hant-tw]
zh-yue: [yue] zh-yue: [yue]
zu: [Latn, [AF], isiZulu] zu: [Latn, [AF], isiZulu]
zun: [Latn, [AM], "Shiwi'ma"] zun: [Latn, [AM], "Shiwi'ma"]

View File

@@ -548,13 +548,16 @@
], ],
"Kaqchikel" "Kaqchikel"
], ],
"cbk-zam": [ "cbk": [
"Latn", "Latn",
[ [
"AS" "AS"
], ],
"Chavacano de Zamboanga" "Chavacano de Zamboanga"
], ],
"cbk-zam": [
"cbk"
],
"cdo": [ "cdo": [
"Latn", "Latn",
[ [
@@ -789,6 +792,9 @@
"Schweizer Hochdeutsch" "Schweizer Hochdeutsch"
], ],
"de-formal": [ "de-formal": [
"de-x-formal"
],
"de-x-formal": [
"Latn", "Latn",
[ [
"EU" "EU"
@@ -903,6 +909,13 @@
], ],
"British English" "British English"
], ],
"en-simple": [
"Latn",
[
"WW"
],
"Simple English"
],
"en": [ "en": [
"Latn", "Latn",
[ [
@@ -931,6 +944,9 @@
"español de América Latina" "español de América Latina"
], ],
"es-formal": [ "es-formal": [
"es-x-formal"
],
"es-x-formal": [
"Latn", "Latn",
[ [
"EU", "EU",
@@ -1459,6 +1475,9 @@
"Kreyòl ayisyen" "Kreyòl ayisyen"
], ],
"hu-formal": [ "hu-formal": [
"hu-x-formal"
],
"hu-x-formal": [
"Latn", "Latn",
[ [
"EU" "EU"
@@ -1644,6 +1663,13 @@
], ],
"ꦧꦱꦗꦮ" "ꦧꦱꦗꦮ"
], ],
"jv-x-bms": [
"Latn",
[
"AS"
],
"Basa Banyumasan"
],
"ka": [ "ka": [
"Geor", "Geor",
[ [
@@ -2192,11 +2218,7 @@
"मैथिली" "मैथिली"
], ],
"map-bms": [ "map-bms": [
"Latn", "jv-x-bms"
[
"AS"
],
"Basa Banyumasan"
], ],
"mdf": [ "mdf": [
"Cyrl", "Cyrl",
@@ -2305,11 +2327,7 @@
"ဘာသာ မန်" "ဘာသာ မန်"
], ],
"mo": [ "mo": [
"Cyrl", "ro-cyrl-md"
[
"EU"
],
"молдовеняскэ"
], ],
"moe": [ "moe": [
"Latn", "Latn",
@@ -2435,6 +2453,13 @@
], ],
"Napulitano" "Napulitano"
], ],
"nap-x-tara": [
"Latn",
[
"EU"
],
"tarandíne"
],
"nb": [ "nb": [
"Latn", "Latn",
[ [
@@ -2499,6 +2524,9 @@
"Ao" "Ao"
], ],
"nl-informal": [ "nl-informal": [
"nl-x-informal"
],
"nl-x-informal": [
"Latn", "Latn",
[ [
"EU", "EU",
@@ -2556,13 +2584,16 @@
], ],
"isiNdebele seSewula" "isiNdebele seSewula"
], ],
"nrm": [ "nrf": [
"Latn", "Latn",
[ [
"EU" "EU"
], ],
"Nouormand" "Nouormand"
], ],
"nrm": [
"nrf"
],
"nso": [ "nso": [
"Latn", "Latn",
[ [
@@ -2935,15 +2966,18 @@
], ],
"română" "română"
], ],
"ro-cyrl-md": [
"Cyrl",
[
"EU"
],
"молдовеняскэ"
],
"roa-rup": [ "roa-rup": [
"rup" "rup"
], ],
"roa-tara": [ "roa-tara": [
"Latn", "nap-x-tara"
[
"EU"
],
"tarandíne"
], ],
"rtm": [ "rtm": [
"Latn", "Latn",
@@ -3172,11 +3206,7 @@
"සිංහල" "සිංහල"
], ],
"simple": [ "simple": [
"Latn", "en-simple"
[
"WW"
],
"Simple English"
], ],
"sjd": [ "sjd": [
"Cyrl", "Cyrl",
@@ -3951,12 +3981,29 @@
"lzh" "lzh"
], ],
"zh-cn": [ "zh-cn": [
"zh-hans-cn"
],
"zh-hans-cn": [
"Hans", "Hans",
[ [
"AS" "AS"
], ],
"中文(中国大陆)" "中文(中国大陆)"
], ],
"zh-hans-my": [
"Hans",
[
"AS"
],
"中文(马来西亚)"
],
"zh-hans-sg": [
"Hans",
[
"AS"
],
"中文(新加坡)"
],
"zh-hans": [ "zh-hans": [
"Hans", "Hans",
[ [
@@ -3964,6 +4011,27 @@
], ],
"中文(简体)" "中文(简体)"
], ],
"zh-hant-hk": [
"Hant",
[
"AS"
],
"中文(香港)"
],
"zh-hant-mo": [
"Hant",
[
"AS"
],
"中文(澳門)"
],
"zh-hant-tw": [
"Hant",
[
"AS"
],
"中文(台灣)"
],
"zh-hant": [ "zh-hant": [
"Hant", "Hant",
[ [
@@ -3972,42 +4040,22 @@
"中文(繁體)" "中文(繁體)"
], ],
"zh-hk": [ "zh-hk": [
"Hant", "zh-hant-hk"
[
"AS"
],
"中文(香港)"
], ],
"zh-min-nan": [ "zh-min-nan": [
"nan" "nan"
], ],
"zh-mo": [ "zh-mo": [
"Hant", "zh-hant-mo"
[
"AS"
],
"中文(澳門)"
], ],
"zh-my": [ "zh-my": [
"Hans", "zh-hans-my"
[
"AS"
],
"中文(马来西亚)"
], ],
"zh-sg": [ "zh-sg": [
"Hans", "zh-hans-sg"
[
"AS"
],
"中文(新加坡)"
], ],
"zh-tw": [ "zh-tw": [
"Hant", "zh-hant-tw"
[
"AS"
],
"中文(台灣)"
], ],
"zh-yue": [ "zh-yue": [
"yue" "yue"