Add proper BCP 47 names for various languages

Audit language names against BCP 47 validator and rename those which are
not valid BCP 47 codes, leaving the old aliases for backward-compatibility.

Validated against IANA registry at
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
with the help of the validator at
  http://schneegans.de/lv/

Add fully-expanded aliases for zh-XX language codes to help compatibility
with other BCP 47 users.

See also:
1113b1203c/languages/LanguageCode.php (L56)
This commit is contained in:
C. Scott Ananian
2018-10-23 12:32:29 -04:00
parent df3429ee65
commit 03205a8955
2 changed files with 127 additions and 63 deletions

View File

@@ -87,7 +87,8 @@ languages:
byn: [Ethi, [AF], ብሊን]
ca: [Latn, [EU], català]
cak: [Latn, [AM], Kaqchikel]
cbk-zam: [Latn, [AS], Chavacano de Zamboanga]
cbk: [Latn, [AS], Chavacano de Zamboanga]
cbk-zam: [cbk]
cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄]
ce: [Cyrl, [EU], нохчийн]
ceb: [Latn, [AS], Cebuano]
@@ -131,7 +132,8 @@ languages:
dag: [Latn, [AF], dagbanli]
de-at: [Latn, [EU], Österreichisches Deutsch]
de-ch: [Latn, [EU], Schweizer Hochdeutsch]
de-formal: [Latn, [EU], Deutsch (Sie-Form)]
de-formal: [de-x-formal]
de-x-formal: [Latn, [EU], Deutsch (Sie-Form)]
de: [Latn, [EU], Deutsch]
din: [Latn, [AF], Thuɔŋjäŋ]
diq: [Latn, [EU, AS], Zazaki]
@@ -147,11 +149,13 @@ languages:
eml: [Latn, [EU], emiliàn e rumagnòl]
en-ca: [Latn, [AM], Canadian English]
en-gb: [Latn, [EU, AS, PA], British English]
en-simple: [Latn, [WW], Simple English]
en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English]
eo: [Latn, [WW], Esperanto]
es-419: [Latn, [AM], español de América Latina]
# world?
es-formal: [Latn, [EU, AM, AF, WW], español (formal)]
es-formal: [es-x-formal]
es-x-formal: [Latn, [EU, AM, AF, WW], español (formal)]
# world?
es: [Latn, [EU, AM, AF, WW, PA], español]
es-ni: [Latn, [AM], español nicaragüense]
@@ -230,7 +234,8 @@ languages:
hsb: [Latn, [EU], hornjoserbsce]
hsn: [Hans, [AS], 湘语]
ht: [Latn, [AM], Kreyòl ayisyen]
hu-formal: [Latn, [EU], Magyar (magázó)]
hu-formal: [hu-x-formal]
hu-x-formal: [Latn, [EU], Magyar (magázó)]
hu: [Latn, [EU], magyar]
hy: [Armn, [EU, ME], հայերեն]
hyw: [Armn, [EU, ME], արեւմտահայերէն]
@@ -258,6 +263,7 @@ languages:
jv: [Latn, [AS, PA], Basa Jawa]
# For support in webfonts.
jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ]
jv-x-bms: [Latn, [AS], Basa Banyumasan]
ka: [Geor, [EU], ქართული]
kaa: [Latn, [AS], Qaraqalpaqsha]
# Can also be Tfng, but the Wikipedia is mostly Latn
@@ -343,7 +349,7 @@ languages:
# Also Geor, but the incubator is in Latn
lzz: [Latn, [EU, ME], Lazuri]
mai: [Deva, [AS], मैथिली]
map-bms: [Latn, [AS], Basa Banyumasan]
map-bms: [jv-x-bms]
mdf: [Cyrl, [EU], мокшень]
mfe: [Latn, [AM], Morisyen]
mg: [Latn, [AF], Malagasy]
@@ -360,7 +366,7 @@ languages:
mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ]
mni: [Beng, [AS], মেইতেই লোন্]
mnw: [Mymr, [AS], ဘာသာ မန်]
mo: [Cyrl, [EU], молдовеняскэ]
mo: [ro-cyrl-md]
moe: [Latn, [AM], Innu-aimun]
mr: [Deva, [AS, ME], मराठी]
mrj: [Cyrl, [EU], кырык мары]
@@ -379,6 +385,7 @@ languages:
nah: [Latn, [AM], Nāhuatl]
nan: [Latn, [AS], Bân-lâm-gú]
nap: [Latn, [EU], Napulitano]
nap-x-tara: [Latn, [EU], tarandíne]
nb: [Latn, [EU], norsk (bokmål)]
nd: [Latn, [AF], siNdebele saseNyakatho]
nds-nl: [Latn, [EU], Nedersaksisch]
@@ -388,7 +395,8 @@ languages:
ng: [Latn, [AF], Oshiwambo]
niu: [Latn, [PA], ko e vagahau Niuē]
njo: [Latn, [AS], Ao]
nl-informal: [Latn, [EU, AM], Nederlands (informeel)]
nl-informal: [nl-x-informal]
nl-x-informal: [Latn, [EU, AM], Nederlands (informeel)]
nl: [Latn, [EU, AM], Nederlands]
nn: [Latn, [EU], norsk (nynorsk)]
# There's also nb for Bokmål and nn for Nynorsk
@@ -397,7 +405,8 @@ languages:
nov: [Latn, [WW], Novial]
nqo: [Nkoo, [AF], ߒߞߏ]
nr: [Latn, [AF], isiNdebele seSewula]
nrm: [Latn, [EU], Nouormand]
nrf: [Latn, [EU], Nouormand]
nrm: [nrf]
nso: [Latn, [AF], Sesotho sa Leboa]
nv: [Latn, [AM], Diné bizaad]
ny: [Latn, [AF], Chi-Chewa]
@@ -452,8 +461,9 @@ languages:
rmy: [Latn, [EU], Romani]
rn: [Latn, [AF], Kirundi]
ro: [Latn, [EU], română]
ro-cyrl-md: [Cyrl, [EU], молдовеняскэ]
roa-rup: [rup]
roa-tara: [Latn, [EU], tarandíne]
roa-tara: [nap-x-tara]
rtm: [Latn, [PA], Faeag Rotuma]
# world?
ru: [Cyrl, [EU, AS, ME], русский]
@@ -490,7 +500,7 @@ languages:
shi: [shi-latn]
shn: [Mymr, [AS], လိၵ်ႈတႆး]
si: [Sinh, [AS], සිංහල]
simple: [Latn, [WW], Simple English]
simple: [en-simple]
sjd: [Cyrl, [EU], Кӣллт са̄мь кӣлл]
sje: [Latn, [EU], bidumsámegiella]
sjo: [Mong, [AS], ᠰᡞᠪᡝ ᡤᡞᠰᡠᠨ]
@@ -608,15 +618,21 @@ languages:
# world? (may apply to many varieties of Chinese)
zh: [Hans, [AS], 中文]
zh-classical: [lzh]
zh-cn: [Hans, [AS], 中文(中国大陆)]
zh-cn: [zh-hans-cn]
zh-hans-cn: [Hans, [AS], 中文(中国大陆)]
zh-hans-my: [Hans, [AS], 中文(马来西亚)]
zh-hans-sg: [Hans, [AS], 中文(新加坡)]
zh-hans: [Hans, [AS], 中文(简体)]
zh-hant-hk: [Hant, [AS], 中文(香港)]
zh-hant-mo: [Hant, [AS], 中文(澳門)]
zh-hant-tw: [Hant, [AS], 中文(台灣)]
zh-hant: [Hant, [AS], 中文(繁體)]
zh-hk: [Hant, [AS], 中文(香港)]
zh-hk: [zh-hant-hk]
zh-min-nan: [nan]
zh-mo: [Hant, [AS], 中文(澳門)]
zh-my: [Hans, [AS], 中文(马来西亚)]
zh-sg: [Hans, [AS], 中文(新加坡)]
zh-tw: [Hant, [AS], 中文(台灣)]
zh-mo: [zh-hant-mo]
zh-my: [zh-hans-my]
zh-sg: [zh-hans-sg]
zh-tw: [zh-hant-tw]
zh-yue: [yue]
zu: [Latn, [AF], isiZulu]
zun: [Latn, [AM], "Shiwi'ma"]

View File

@@ -548,13 +548,16 @@
],
"Kaqchikel"
],
"cbk-zam": [
"cbk": [
"Latn",
[
"AS"
],
"Chavacano de Zamboanga"
],
"cbk-zam": [
"cbk"
],
"cdo": [
"Latn",
[
@@ -789,6 +792,9 @@
"Schweizer Hochdeutsch"
],
"de-formal": [
"de-x-formal"
],
"de-x-formal": [
"Latn",
[
"EU"
@@ -903,6 +909,13 @@
],
"British English"
],
"en-simple": [
"Latn",
[
"WW"
],
"Simple English"
],
"en": [
"Latn",
[
@@ -931,6 +944,9 @@
"español de América Latina"
],
"es-formal": [
"es-x-formal"
],
"es-x-formal": [
"Latn",
[
"EU",
@@ -1459,6 +1475,9 @@
"Kreyòl ayisyen"
],
"hu-formal": [
"hu-x-formal"
],
"hu-x-formal": [
"Latn",
[
"EU"
@@ -1644,6 +1663,13 @@
],
"ꦧꦱꦗꦮ"
],
"jv-x-bms": [
"Latn",
[
"AS"
],
"Basa Banyumasan"
],
"ka": [
"Geor",
[
@@ -2192,11 +2218,7 @@
"मैथिली"
],
"map-bms": [
"Latn",
[
"AS"
],
"Basa Banyumasan"
"jv-x-bms"
],
"mdf": [
"Cyrl",
@@ -2305,11 +2327,7 @@
"ဘာသာ မန်"
],
"mo": [
"Cyrl",
[
"EU"
],
"молдовеняскэ"
"ro-cyrl-md"
],
"moe": [
"Latn",
@@ -2435,6 +2453,13 @@
],
"Napulitano"
],
"nap-x-tara": [
"Latn",
[
"EU"
],
"tarandíne"
],
"nb": [
"Latn",
[
@@ -2499,6 +2524,9 @@
"Ao"
],
"nl-informal": [
"nl-x-informal"
],
"nl-x-informal": [
"Latn",
[
"EU",
@@ -2556,13 +2584,16 @@
],
"isiNdebele seSewula"
],
"nrm": [
"nrf": [
"Latn",
[
"EU"
],
"Nouormand"
],
"nrm": [
"nrf"
],
"nso": [
"Latn",
[
@@ -2935,15 +2966,18 @@
],
"română"
],
"ro-cyrl-md": [
"Cyrl",
[
"EU"
],
"молдовеняскэ"
],
"roa-rup": [
"rup"
],
"roa-tara": [
"Latn",
[
"EU"
],
"tarandíne"
"nap-x-tara"
],
"rtm": [
"Latn",
@@ -3172,11 +3206,7 @@
"සිංහල"
],
"simple": [
"Latn",
[
"WW"
],
"Simple English"
"en-simple"
],
"sjd": [
"Cyrl",
@@ -3951,12 +3981,29 @@
"lzh"
],
"zh-cn": [
"zh-hans-cn"
],
"zh-hans-cn": [
"Hans",
[
"AS"
],
"中文(中国大陆)"
],
"zh-hans-my": [
"Hans",
[
"AS"
],
"中文(马来西亚)"
],
"zh-hans-sg": [
"Hans",
[
"AS"
],
"中文(新加坡)"
],
"zh-hans": [
"Hans",
[
@@ -3964,6 +4011,27 @@
],
"中文(简体)"
],
"zh-hant-hk": [
"Hant",
[
"AS"
],
"中文(香港)"
],
"zh-hant-mo": [
"Hant",
[
"AS"
],
"中文(澳門)"
],
"zh-hant-tw": [
"Hant",
[
"AS"
],
"中文(台灣)"
],
"zh-hant": [
"Hant",
[
@@ -3972,42 +4040,22 @@
"中文(繁體)"
],
"zh-hk": [
"Hant",
[
"AS"
],
"中文(香港)"
"zh-hant-hk"
],
"zh-min-nan": [
"nan"
],
"zh-mo": [
"Hant",
[
"AS"
],
"中文(澳門)"
"zh-hant-mo"
],
"zh-my": [
"Hans",
[
"AS"
],
"中文(马来西亚)"
"zh-hans-my"
],
"zh-sg": [
"Hans",
[
"AS"
],
"中文(新加坡)"
"zh-hans-sg"
],
"zh-tw": [
"Hant",
[
"AS"
],
"中文(台灣)"
"zh-hant-tw"
],
"zh-yue": [
"yue"