Merge "Updates for language database (~75 codes added)."

This commit is contained in:
Siebrand Mazeland
2012-07-03 15:00:15 +00:00
committed by Gerrit Code Review
3 changed files with 127 additions and 49 deletions

File diff suppressed because one or more lines are too long

View File

@@ -2,8 +2,14 @@ languages:
aa: [Latn, [AF]]
ab: [Cyrl, [EU]]
ace: [Latn, [AS, PA]]
ady-cyrl: [Cyrl, [EU]]
ady-latn: [Latn, [EU]]
ady: [Cyrl, [EU]]
aeb: [Arab, [AF]]
af: [Latn, [AF]]
ahr: [Deva, [AS]]
ak: [Latn, [AF]]
akz: [Latn, [NA]]
aln: [Latn, [EU]]
am: [Ethi, [AF]]
an: [Latn, [EU]]
@@ -12,9 +18,12 @@ languages:
ar: [Arab, [ME]]
arc: [Syrc, [ME]]
arn: [Latn, [SA]]
aro: [Latn, [LA]]
arq: [Latn, [AF]]
ary: [Latn, [ME]]
arz: [Arab, [ME]]
as: [Beng, [AS]]
ase: [Sgnw, [NA]]
ast: [Latn, [EU]]
av: [Cyrl, [EU]]
avk: [Latn, [WW]]
@@ -23,11 +32,15 @@ languages:
az: [Latn, [EU, ME]]
ba: [Cyrl, [EU]]
bar: [Latn, [EU]]
bbc-latn: [Latn, [AS]]
bbc: [Batk, [AS]]
bcc: [Arab, [AS, ME]]
bcl: [Latn, [AS]]
be: [Cyrl, [EU]]
be-tarask: [Cyrl, [EU]]
be-x-old: [Cyrl, [EU]]
be: [Cyrl, [EU]]
bew: [Latn, [AS]]
bfq: [Taml, [AS]]
bg: [Cyrl, [EU]]
bh: [Deva, [AS]]
bho: [Deva, [AS]]
@@ -41,6 +54,7 @@ languages:
br: [Latn, [EU]]
brh: [Latn, [ME, AS]]
bs: [Latn, [EU]]
bto: [Latn, [AS]]
bug: [Bugi, [AS]]
bxr: [Cyrl, [AS]]
ca: [Latn, [EU]]
@@ -57,35 +71,40 @@ languages:
cps: [Latn, [AS]]
# Also Latn
cr: [Cans, [NA]]
crh-cyrl: [Cyrl, [EU]]
crh-latn: [Latn, [EU]]
# Latn is default, Cyrl is common IRL
crh: [Latn, [EU]]
crh-latn: [Latn, [EU]]
crh-cyrl: [Cyrl, [EU]]
cs: [Latn, [EU]]
csb: [Latn, [EU]]
cu: [Cyrl, [EU]]
cv: [Cyrl, [EU]]
cy: [Latn, [EU]]
da: [Latn, [EU]]
de: [Latn, [EU]]
de-at: [Latn, [EU]]
de-ch: [Latn, [EU]]
de-formal: [Latn, [EU]]
de: [Latn, [EU]]
diq: [Latn, [EU, AS]]
dsb: [Latn, [EU]]
dtp: [Latn, [AS]]
dv: [Thaa, [AS]]
dz: [Tibt, [AS]]
ee: [Latn, [AF]]
egl: [Latn, [EU]]
el: [Grek, [EU]]
eml: [Latn, [EU]]
# world?
en: [Latn, [EU, NA, AU, AF, ME, AS, PA, WW]]
en-ca: [Latn, [NA]]
en-gb: [Latn, [EU, AS, AU]]
# world?
en: [Latn, [EU, NA, AU, AF, ME, AS, PA, WW]]
eo: [Latn, [WW]]
es-419: [Latn, [LA]]
# world?
es-formal: [Latn, [EU, NA, LA, SA, AF, WW]]
# world?
es: [Latn, [EU, NA, LA, SA, AF, WW]]
esu: [Latn, [NA]]
et: [Latn, [EU]]
eu: [Latn, [EU]]
ext: [Latn, [EU]]
@@ -103,18 +122,26 @@ languages:
fy: [Latn, [EU]]
ga: [Latn, [EU]]
gag: [Latn, [EU]]
gan: [Hant, [AS]]
gah: [Latn, [AS]]
gan-hans: [Hans, [AS]]
gan-hant: [Hant, [AS]]
gan: [Hant, [AS]]
gbz: [Latn, [AS]]
gcf: [Latn, [LA]]
gd: [Latn, [EU]]
gl: [Latn, [EU]]
glk: [Arab, [ME]]
gn: [Latn, [LA]]
gom-deva: [Deva, [AS]]
gom-latn: [Latn, [AS]]
gom: [Deva, [AS]]
# hmph
got: [Goth, [EU]]
grc: [Grek, [EU]]
gsw: [Latn, [EU]]
gu: [Gujr, [AS]]
guc: [Latn, [LA]]
gur: [Latn, [AF]]
gv: [Latn, [EU]]
# The name in Names.php is Arabic, but everything else is Latn
ha: [Latn, [AF]]
@@ -123,14 +150,18 @@ languages:
he: [Hebr, [ME]]
# Or maybe world?
hi: [Deva, [AS]]
hif: [Latn, [PA, AU, AS]]
hif-deva: [Deva, [AS]]
hif-latn: [Latn, [PA, AU, AS]]
hif: [Latn, [PA, AU, AS]]
hil: [Latn, [AS]]
hne: [Deva, [AS]]
ho: [Latn, [PA]]
hr: [Latn, [EU]]
hsb: [Latn, [EU]]
hsn: [Hans, [AS]]
# Haitian Creole. North America, right?
ht: [Latn, [NA]]
hu-formal: [Latn, [EU]]
hu: [Latn, [EU]]
hy: [Armn, [EU, ME]]
hz: [Latn, [AF]]
@@ -142,6 +173,7 @@ languages:
ik: [Latn, [NA]]
ike-cans: [Cans, [NA]]
ike-latn: [Latn, [NA]]
ike: [Latn, [NA]]
ilo: [Latn, [AS]]
inh: [Cyrl, [EU]]
io: [Latn, [WW]]
@@ -158,40 +190,45 @@ languages:
kaa: [Latn, [AS]]
# Can also be Tfng, but the Wikipedia is mostly Latn
kab: [Latn, [AF, EU]]
kbd: [Cyrl, [EU, ME]]
kbd-cyrl: [Cyrl, [EU, ME]]
kbd-latn: [Latn, [EU]]
kbd: [Cyrl, [EU, ME]]
kea: [Latn, [AF]]
kg: [Latn, [AF]]
kgp: [Latn, [LA]]
khw: [Arab, [ME, AS]]
ki: [Latn, [AF]]
kiu: [Latn, [EU, ME]]
kj: [Latn, [AF]]
kk: [Cyrl, [EU, AS]]
kk-arab: [Arab, [EU, AS]]
kk-cyrl: [Cyrl, [EU, AS]]
kk-latn: [Latn, [EU, AS, ME]]
kk-cn: [Arab, [EU, AS, ME]]
kk-cyrl: [Cyrl, [EU, AS]]
kk-kz: [Cyrl, [EU, AS]]
kk-latn: [Latn, [EU, AS, ME]]
kk-tr: [Latn, [EU, AS, ME]]
kk: [Cyrl, [EU, AS]]
kl: [Latn, [NA, EU]]
km: [Khmr, [AS]]
kn: [Knda, [AS]]
# Kore is an alias for Hangul+Han. Maybe Hang is more appropriate?
ko: [Kore, [AS]]
# Here Hang may be even more appropriate, because kp has more resistance to Han
ko-kp: [Kore, [AS]]
# Kore is an alias for Hangul+Han. Maybe Hang is more appropriate?
ko: [Kore, [AS]]
koi: [Cyrl, [EU]]
kr: [Latn, [AF]]
krc: [Cyrl, [EU]]
kri: [Latn, [AF]]
krj: [Latn, [ME, EU]]
# Just because it's the current default in the Wikipedia. Deva may be needed, too.
ks: [Arab, [AS]]
krl: [Latn, [EU]]
ks-arab: [Arab, [AS]]
ks-deva: [Deva, [AS]]
# Just because it's the current default in the Wikipedia. Deva may be needed, too.
ks: [Arab, [AS]]
ksf: [Latn, [AF]]
ksh: [Latn, [EU]]
ku: [Latn, [EU, ME]]
ku-latn: [Latn, [EU, ME]]
ku-arab: [Arab, [EU, ME]]
ku-latn: [Latn, [EU, ME]]
ku: [Latn, [EU, ME]]
kv: [Cyrl, [EU]]
kw: [Latn, [EU]]
ky: [Cyrl, [AS]]
@@ -208,6 +245,7 @@ languages:
li: [Latn, [EU]]
lij: [Latn, [EU]]
liv: [Latn, [EU]]
lld: [Latn, [EU]]
lmo: [Latn, [EU]]
ln: [Latn, [AF]]
lo: [Laoo, [AS]]
@@ -217,26 +255,34 @@ languages:
lus: [Latn, [AS]]
lv: [Latn, [EU]]
lzh: [Hant, [AS]]
lzz: [Latn, [EU, ME]] # Also Geor, but the incubator is in Latn
# Also Geor, but the incubator is in Latn
lzz: [Latn, [EU, ME]]
mai: [Deva, [AS]]
map-bms: [Latn, [AS]]
mdf: [Cyrl, [EU]]
mfe: [Latn, [LA]]
mg: [Latn, [AF]]
mh: [Latn, [PA]]
mhr: [Cyrl, [EU]]
mi: [Latn, [PA, AU]]
mic: [Latn, [NA]]
min: [Latn, [AS]]
mk: [Cyrl, [EU]]
ml: [Mlym, [AS, ME]]
# Hmm, can also have Mong some day in some way
mn: [Cyrl, [AS]]
mnc: [Mong, [AS]]
mni: [Beng, [AS]]
mnw: [Mymr, [AS]]
mo: [Cyrl, [EU]]
mr: [Deva, [AS, ME]]
mrj: [Cyrl, [EU]]
ms: [Latn, [AS]]
mt: [Latn, [EU]]
mui: [Latn, [AS]]
mus: [Latn, [NA]]
mwl: [Latn, [EU]]
mwv: [Latn, [AS]]
my: [Mymr, [AS]]
myv: [Cyrl, [EU]]
mzn: [Arab, [ME, AS]]
@@ -245,17 +291,19 @@ languages:
nan: [Latn, [AS]]
nap: [Latn, [EU]]
nb: [Latn, [EU]]
nds: [Latn, [EU]]
nds-nl: [Latn, [EU]]
nds: [Latn, [EU]]
ne: [Deva, [AS]]
new: [Deva, [AS]]
ng: [Latn, [AF]]
niu: [Latn, [PA]]
nl: [Latn, [EU, SA]]
njo: [Latn, [AS]]
nl-informal: [Latn, [EU, SA]]
nl: [Latn, [EU, SA]]
nn: [Latn, [EU]]
no: [Latn, [EU]]
"no": [Latn, [EU]]
nov: [Latn, [WW]]
nqo: [Nkoo, [AF]]
nrm: [Latn, [EU]]
nso: [Latn, [AF]]
nv: [Latn, [NA]]
@@ -274,37 +322,48 @@ languages:
pfl: [Latn, [EU]]
pi: [Deva, [AS]]
pih: [Latn, [PA]]
pis: [Latn, [PA]]
pko: [Latn, [AF]]
pl: [Latn, [EU]]
pms: [Latn, [EU]]
pnb: [Arab, [AS, ME]]
pnt: [Grek, [EU]]
ppl: [Latn, [LA]]
prg: [Latn, [EU]]
pru: [Latn, [EU]]
ps: [Arab, [AS, ME]]
pt-br: [Latn, [SA, LA]]
# world?
pt: [Latn, [EU, LA, AS, PA, AF]]
pt-br: [Latn, [SA, LA]]
qu: [Latn, [SA]]
qug: [Latn, [SA]]
rap: [Latn, [LA]]
rgn: [Latn, [EU]]
rif: [Latn, [AF]]
rki: [Mymr, [AS]]
rm: [Latn, [EU]]
rmy: [Latn, [EU]]
rn: [Latn, [AF]]
ro: [Latn, [EU]]
roa-rup: [Latn, [EU]]
roa-tara: [Latn, [EU]]
# World?
rtm: [Latn, [PA]]
# world?
ru: [Cyrl, [EU, AS, ME]]
rue: [Cyrl, [EU]]
rup: [Latn, [EU]]
# ruq: [, []]
# ruq-cyrl: [, []]
## 'ruq-grek' => 'Βλαεστε', # Megleno-Romanian (Greek script)
# ruq-latn: [, []]
ruq: [Latn, [EU]]
ruq-cyrl: [Cyrl, [EU]]
ruq-grek: [Grek, [EU]]
ruq-latn: [Latn, [EU]]
rw: [Latn, [AF]]
ryu: [Kana, [AS]]
sa: [Deva, [AS]]
sah: [Cyrl, [EU, AS]] # Russian Far East - Europe, Asia, or both?
sat: [Latn, [AS]] # Currently Latn, potentially Olck
# Russian Far East - Europe, Asia, or both?
sah: [Cyrl, [EU, AS]]
# Currently Latn, potentially Olck
sat: [Latn, [AS]]
saz: [Saur, [AS]]
sc: [Latn, [EU]]
scn: [Latn, [EU]]
sco: [Latn, [EU]]
@@ -315,22 +374,25 @@ languages:
sg: [Latn, [AF]]
sgs: [Latn, [EU]]
sh: [Latn, [EU]]
shi: [Latn, [AF]]
shi-tfng: [Tfng, [AF]]
shi-latn: [Latn, [AF]]
shi-tfng: [Tfng, [AF]]
shi: [Latn, [AF]]
shn: [Mymr, [AS]]
si: [Sinh, [AS]]
simple: [Latn, [WW]]
sk: [Latn, [EU]]
sl: [Latn, [EU]]
sli: [Latn, [EU]]
slr: [Latn, [AS]]
sly: [Latn, [AS]]
sm: [Latn, [PA]]
sma: [Latn, [EU]]
sn: [Latn, [AF]]
so: [Latn, [AF]]
sq: [Latn, [EU]]
sr: [Cyrl, [EU]]
sr-ec: [Cyrl, [EU]]
sr-el: [Latn, [EU]]
sr: [Cyrl, [EU]]
srn: [Latn, [SA, NA, EU]]
ss: [Latn, [AF]]
st: [Latn, [AF]]
@@ -338,17 +400,20 @@ languages:
su: [Latn, [AS]]
sv: [Latn, [EU]]
sw: [Latn, [AF]]
swb: [Latn, [AF]]
sxu: [Latn, [EU]]
szl: [Latn, [EU]]
ta: [Taml, [AS]]
tcy: [Knda, [AS]]
te: [Telu, [AS]]
tet: [Latn, [AS, PA]]
tg: [Cyrl, [AS]]
tg-cyrl: [Cyrl, [AS]]
tg-latn: [Latn, [AS]]
tg: [Cyrl, [AS]]
th: [Thai, [AS]]
ti: [Ethi, [AF]]
tk: [Latn, [AS]]
tkr: [Cyrl, [AS]]
tl: [Latn, [AS]]
# A very complicated case. Names.php is Cyrl. In TWN they argue about Cyrl, Latn, and Arab. I can't find reliable external sources. --Amir
tly: [Cyrl, [EU, AS, ME]]
@@ -357,18 +422,24 @@ languages:
tokipona: [Latn, [WW]]
tpi: [Latn, [PA, AS]]
tr: [Latn, [EU, ME]]
trp: [Latn, [AS]]
tru: [Latn, [AS]]
ts: [Latn, [AF]]
tt: [Cyrl, [EU]]
tsd: [Grek, [EU]]
tt-cyrl: [Cyrl, [EU]]
tt-latn: [Latn, [EU]]
tt: [Cyrl, [EU]]
ttt: [Cyrl, [AS]]
tum: [Latn, [AF]]
tw: [Latn, [AF]]
twd: [Latn, [EU]]
ty: [Latn, [PA]]
tyv: [Cyrl, [AS]]
tzm: [Tfng, [AF]]
udm: [Cyrl, [EU]]
ug: [Arab, [AS]]
ug-arab: [Arab, [AS]]
ug-latn: [Latn, [AS]]
ug: [Arab, [AS]]
uk: [Cyrl, [EU]]
ur: [Arab, [AS, ME]]
uz: [Latn, [AS]]
@@ -383,18 +454,22 @@ languages:
vro: [Latn, [EU]]
wa: [Latn, [EU]]
war: [Latn, [AS]]
wls: [Latn, [PA]]
wo: [Latn, [AF]]
wuu: [Hans, [EU]]
xal: [Cyrl, [EU]]
xh: [Latn, [AF]]
xmf: [Geor, [EU]]
ydd: [Hebr, [AS, EU]]
yi: [Hebr, [ME, EU, NA, SA]]
yo: [Latn, [AF]]
# World?
yrk: [Cyrl, [AS]]
yrl: [Latn, [LA]]
yua: [Latn, [NA, LA]]
# world?
yue: [Hant, [AS]]
za: [Latn, [AS]]
zea: [Latn, [EU]]
zh: [Hans, [AS]]
zh-classical: [Hant, [AS]]
zh-cn: [Hans, [AS]]
zh-hans: [Hans, [AS]]
@@ -402,10 +477,11 @@ languages:
zh-hk: [Hant, [AS]]
zh-min-nan: [Latn, [AS]]
zh-mo: [Hant, [AS]]
# zh-my: [, [AS]] # What is it, Myanmar?
zh-my: [Hans, [AS]]
zh-sg: [Hans, [AS]]
zh-tw: [Hant, [AS]]
zh-yue: [Hans, [AS]]
zh: [Hans, [AS]]
zu: [Latn, [AF]]
# The codes are taken from http://unicode.org/iso15924/iso15924-codes.html .
@@ -427,7 +503,7 @@ scriptgroups:
# Maybe Thaana can be moved here from SouthAsian.
# Maybe it can be unified with African.
MiddleEastern: [Hebr, Syrc]
African: [Ethi, Tfng]
African: [Ethi, Nkoo, Tfng]
# India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives.
#
# Thaana (Thaa, the script of Maldives) is here, even though it's RTL,
@@ -437,9 +513,11 @@ scriptgroups:
# Tibetan (Tibt) is here, even though it's classified as "Central Asian" by
# Unicode, because linguistically and geographically it's closely related to
# the Brahmic family.
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Sinh, Taml, Telu, Tibt, Thaa]
SouthEastAsian: [Bugi, Java, Khmr, Laoo, Mymr, Thai]
CJK: [Hans, Hant, Kore, Jpan, Yiii]
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Saur, Sinh, Taml, Telu, Tibt, Thaa]
SouthEastAsian: [Batk, Bugi, Java, Khmr, Laoo, Mymr, Thai]
Mongolian: [Mong]
SignWriting: [Sgnw]
CJK: [Hans, Hant, Kana, Kore, Jpan, Yiii]
NativeAmerican: [Cher, Cans]
regiongroups:

View File

@@ -55,14 +55,14 @@ test( "-- $.uls.data testing", function() {
var allLanguagesByRegionAndScript = $.uls.data.allLanguagesByRegionAndScript();
deepEqual( allLanguagesByRegionAndScript['3']['AS']['SouthEastAsian']['Bugi'], ['bug'], 'All languages in the Buginese script in Asia were selected' );
deepEqual( $.uls.data.languagesInRegion( 'AU' ), ["en", "en-gb", "hif", "hif-latn", "mi", "na"], "languages of region AU are selected correctly" );
deepEqual( $.uls.data.languagesInRegion( 'AU' ), ["en-gb", "en", "hif-latn", "hif", "mi", "na"], "languages of region AU are selected correctly" );
deepEqual( $.uls.data.languagesInRegions( ['NA', 'WW'] ),
[
"avk", "cho", "chr", "chy", "cr", "en", "en-ca", "eo", "es",
"haw", "ht", "ia", "ie", "ik", "ike-cans", "ike-latn", "io",
"iu", "jam", "jbo", "kl", "lfn", "mus", "nah", "nov", "nv",
"akz", "ase", "avk", "cho", "chr", "chy", "cr", "en-ca", "en", "eo", "es-formal", "es", "esu",
"haw", "ht", "ia", "ie", "ik", "ike-cans", "ike-latn", "ike", "io",
"iu", "jam", "jbo", "kl", "lfn", "mic", "mus", "nah", "nov", "nv",
"pdc", "pdt", "sei", "simple", "srn", "tokipona",
"vo", "yi"
"vo", "yi", "yua"
],
"languages of regions NA and WW are selected correctly"
);