Gave script groups clear names and sorted them. Documentation in comments. Add tests to check that no script has been left behind. Corrected Lath to Latn (thanks to testing). Rename langdb.js to ext.uls.data.js Add ResourceLoader module ext.uls.data Change-Id: I91fafa94ffd1eaf2d12c954fe3a71064276533f9
471 lines
11 KiB
YAML
471 lines
11 KiB
YAML
languages:
|
|
aa: [Latn, [AF]]
|
|
ab: [Cyrl, [EU, AS]]
|
|
ace: [Latn, [PA]]
|
|
af: [Latn, [AF]]
|
|
ak: [Latn, [AF]]
|
|
aln: [Latn, [EU]]
|
|
am: [Ethi, [AF]]
|
|
an: [Latn, [EU]]
|
|
ang: [Latn, [EU]]
|
|
anp: [Deva, [AS]]
|
|
ar: [Arab, [ME]]
|
|
arc: [Syrc, [ME]]
|
|
arn: [Latn, [SA]]
|
|
ary: [Latn, [ME]]
|
|
arz: [Arab, [ME]]
|
|
as: [Beng, [AS]]
|
|
ast: [Latn, [EU]]
|
|
av: [Cyrl, [EU]]
|
|
avk: [Latn, [WW]]
|
|
ay: [Latn, [SA]]
|
|
# also Arab, and in the past - Cyrl
|
|
az: [Latn, [EU, ME]]
|
|
ba: [Cyrl, [EU]]
|
|
bar: [Latn, [EU]]
|
|
bcc: [Arab, [AS, ME]]
|
|
bcl: [Latn, [AS]]
|
|
be: [Cyrl, [EU]]
|
|
be-tarask: [Cyrl, [EU]]
|
|
be-x-old: [Cyrl, [EU]]
|
|
bg: [Cyrl, [EU]]
|
|
bh: [Deva, [AS]]
|
|
bho: [Deva, [AS]]
|
|
bi: [Latn, [PA]]
|
|
bjn: [Latn, [AS]]
|
|
bm: [Latn, [AF]]
|
|
bn: [Beng, [AS]]
|
|
bo: [Tibt, [AS]]
|
|
bpy: [Beng, [AS]]
|
|
bqi: [Arab, [ME]]
|
|
br: [Latn, [EU]]
|
|
brh: [Latn, [ME, AS]]
|
|
bs: [Latn, [EU]]
|
|
bug: [Bugi, [AS]]
|
|
bxr: [Cyrl, [AS]]
|
|
ca: [Latn, [EU]]
|
|
cbk-zam: [Latn, [AS]]
|
|
cdo: [Latn, [AS]]
|
|
ce: [Cyrl, [EU]]
|
|
ceb: [Latn, [AS]]
|
|
ch: [Latn, [PA]]
|
|
cho: [Latn, [NA]]
|
|
chr: [Cher, [NA]]
|
|
chy: [Latn, [NA]]
|
|
ckb: [Arab, [ME]]
|
|
co: [Latn, [EU]]
|
|
cps: [Latn, [AS]]
|
|
# Also Latn
|
|
cr: [Cans, [NA]]
|
|
# Latn is default, Cyrl is common IRL
|
|
crh: [Latn, [EU]]
|
|
crh-latn: [Latn, [EU]]
|
|
crh-cyrl: [Cyrl, [EU]]
|
|
cs: [Latn, [EU]]
|
|
csb: [Latn, [EU]]
|
|
cu: [Cyrl, [EU]]
|
|
cv: [Cyrl, [EU]]
|
|
cy: [Latn, [EU]]
|
|
da: [Latn, [EU]]
|
|
de: [Latn, [EU]]
|
|
de-at: [Latn, [EU]]
|
|
de-ch: [Latn, [EU]]
|
|
de-formal: [Latn, [EU]]
|
|
diq: [Latn, [EU, AS]]
|
|
dsb: [Latn, [EU]]
|
|
dtp: [Latn, [AS]]
|
|
dv: [Thaa, [AS]]
|
|
dz: [Tibt, [AS]]
|
|
ee: [Latn, [AF]]
|
|
el: [Grek, [EU]]
|
|
eml: [Latn, [EU]]
|
|
# world?
|
|
en: [Latn, [EU, NA, AU, AF, ME, AS, PA, WW]]
|
|
en-ca: [Latn, [NA]]
|
|
en-gb: [Latn, [EU, AS, AU]]
|
|
eo: [Latn, [WW]]
|
|
# world?
|
|
es: [Latn, [EU, NA, LA, SA, AF, WW]]
|
|
et: [Latn, [EU]]
|
|
eu: [Latn, [EU]]
|
|
ext: [Latn, [EU]]
|
|
fa: [Arab, [ME]]
|
|
ff: [Latn, [AF]]
|
|
fi: [Latn, [EU]]
|
|
fit: [Latn, [EU]]
|
|
fj: [Latn, [PA]]
|
|
fo: [Latn, [EU]]
|
|
fr: [Latn, [EU]]
|
|
frc: [Latn, [EU]]
|
|
frp: [Latn, [EU]]
|
|
frr: [Latn, [EU]]
|
|
fur: [Latn, [EU]]
|
|
fy: [Latn, [EU]]
|
|
ga: [Latn, [EU]]
|
|
gag: [Latn, [EU]]
|
|
gan: [Hant, [AS]]
|
|
gan-hans: [Hans, [AS]]
|
|
gan-hant: [Hant, [AS]]
|
|
gd: [Latn, [EU]]
|
|
gl: [Latn, [EU]]
|
|
glk: [Arab, [ME]]
|
|
gn: [Latn, [LA]]
|
|
# hmph
|
|
got: [Goth, [EU]]
|
|
grc: [Grek, [EU]]
|
|
gsw: [Latn, [EU]]
|
|
gu: [Gujr, [AS]]
|
|
gv: [Latn, [EU]]
|
|
# The name in Names.php is Arabic, but everything else is Latn
|
|
ha: [Latn, [AF]]
|
|
hak: [Latn, [AS]]
|
|
haw: [Latn, [NA, PA]]
|
|
he: [Hebr, [ME]]
|
|
# Or maybe world?
|
|
hi: [Deva, [AS]]
|
|
hif: [Latn, [PA, AU, AS]]
|
|
hif-latn: [Latn, [PA, AU, AS]]
|
|
hil: [Latn, [AS]]
|
|
ho: [Latn, [PA]]
|
|
hr: [Latn, [EU]]
|
|
hsb: [Latn, [EU]]
|
|
# Haitian Creole. North America, right?
|
|
ht: [Latn, [NA]]
|
|
hu: [Latn, [EU]]
|
|
hy: [Armn, [EU, ME]]
|
|
hz: [Latn, [AF]]
|
|
ia: [Latn, [WW]]
|
|
id: [Latn, [A]]
|
|
ie: [Latn, [WW]]
|
|
ig: [Latn, [AF]]
|
|
ii: [Yiii, [AS]]
|
|
ik: [Latn, [NA]]
|
|
ike-cans: [Cans, [NA]]
|
|
ike-latn: [Latn, [NA]]
|
|
ilo: [Latn, [AS]]
|
|
inh: [Cyrl, [EU]]
|
|
io: [Latn, [WW]]
|
|
is: [Latn, [EU]]
|
|
it: [Latn, [EU]]
|
|
iu: [Cans, [NA]]
|
|
ja: [Jpan, [AS]]
|
|
jam: [Latn, [NA]]
|
|
jbo: [Latn, [WW]]
|
|
jut: [Latn, [EU]]
|
|
# also in the Javanese script (Java), but the Wikipedia is in Latn
|
|
jv: [Latn, [AS, PA]]
|
|
ka: [Geor, [EU]]
|
|
kaa: [Latn, [AS]]
|
|
# Can also be Tfng, but the Wikipedia is mostly Latn
|
|
kab: [Latn, [AF, EU]]
|
|
kbd: [Cyrl, [EU, ME]]
|
|
kbd-cyrl: [Cyrl, [EU, ME]]
|
|
kg: [Latn, [AF]]
|
|
khw: [Arab, [ME, AS]]
|
|
ki: [Latn, [AF]]
|
|
kiu: [Latn, [EU, ME]]
|
|
kj: [Latn, [AF]]
|
|
kk: [Cyrl, [EU, AS]]
|
|
kk-arab: [Arab, [EU, AS]]
|
|
kk-cyrl: [Cyrl, [EU, AS]]
|
|
kk-latn: [Latn, [EU, AS, ME]]
|
|
kk-cn: [Arab, [EU, AS, ME]]
|
|
kk-kz: [Cyrl, [EU, AS]]
|
|
kk-tr: [Latn, [EU, AS, ME]]
|
|
kl: [Latn, [NA, EU]]
|
|
km: [Khmr, [AS]]
|
|
kn: [Knda, [AS]]
|
|
# Kore is an alias for Hangul+Han. Maybe Hang is more appropriate?
|
|
ko: [Kore, [AS]]
|
|
# Here Hang may be even more appropriate, because kp has more resistance to Han
|
|
ko-kp: [Kore, [AS]]
|
|
koi: [Cyrl, [EU]]
|
|
kr: [Latn, [AF]]
|
|
krc: [Cyrl, [EU]]
|
|
kri: [Latn, [AF]]
|
|
krj: [Latn, [ME, EU]]
|
|
# Just because it's the current default in the Wikipedia. Deva may be needed, too.
|
|
ks: [Arab, [AS]]
|
|
ks-arab: [Arab, [AS]]
|
|
ks-deva: [Deva, [AS]]
|
|
ksh: [Latn, [EU]]
|
|
ku: [Latn, [EU, ME]]
|
|
ku-latn: [Latn, [EU, ME]]
|
|
ku-arab: [Arab, [EU, ME]]
|
|
kv: [Cyrl, [EU]]
|
|
kw: [Latn, [EU]]
|
|
ky: [Cyrl, [AS]]
|
|
la: [Latn, [EU]]
|
|
# Most identified with Turkey, Bulgaria, Greece, Spain and Israel,
|
|
# but also spoken in Latin America and elsewhere.
|
|
# Wikipedia is mostly in Latn, but also in Hebr. (Comparable to az.)
|
|
lad: [Latn, [ME, EU, LA]]
|
|
lb: [Latn, [EU]]
|
|
lbe: [Cyrl, [EU]]
|
|
lez: [Cyrl, [EU]]
|
|
lfn: [Latn, [WW]]
|
|
lg: [Latn, [AF]]
|
|
li: [Latn, [EU]]
|
|
lij: [Latn, [EU]]
|
|
liv: [Latn, [EU]]
|
|
lmo: [Latn, [EU]]
|
|
ln: [Latn, [AF]]
|
|
lo: [Laoo, [AS]]
|
|
loz: [Latn, [AF]]
|
|
lt: [Latn, [EU]]
|
|
ltg: [Latn, [EU]]
|
|
lus: [Latn, [AS]]
|
|
lv: [Latn, [EU]]
|
|
lzh: [Hant, [AS]]
|
|
lzz: [Latn, [EU, ME]] # Also Geor, but the incubator is in Latn
|
|
mai: [Deva, [AS]]
|
|
map-bms: [Latn, [AS]]
|
|
mdf: [Cyrl, [EU]]
|
|
mg: [Latn, [AF]]
|
|
mh: [Latn, [PA]]
|
|
mhr: [Cyrl, [EU]]
|
|
mi: [Latn, [PA, AU]]
|
|
min: [Latn, [AS]]
|
|
mk: [Cyrl, [EU]]
|
|
ml: [Mlym, [AS, ME]]
|
|
# Hmm, can also have Mong some day in some way
|
|
mn: [Cyrl, [AS]]
|
|
mo: [Cyrl, [EU]]
|
|
mr: [Deva, [AS, ME]]
|
|
mrj: [Cyrl, [EU]]
|
|
ms: [Latn, [AS]]
|
|
mt: [Latn, [EU]]
|
|
mus: [Latn, [NA]]
|
|
mwl: [Latn, [EU]]
|
|
my: [Mymr, [AS]]
|
|
myv: [Cyrl, [EU]]
|
|
mzn: [Arab, [ME, AS]]
|
|
na: [Latn, [PA, AU]]
|
|
nah: [Latn, [NA, LA]]
|
|
nan: [Latn, [AS]]
|
|
nap: [Latn, [EU]]
|
|
nb: [Latn, [EU]]
|
|
nds: [Latn, [EU]]
|
|
nds-nl: [Latn, [EU]]
|
|
ne: [Deva, [AS]]
|
|
new: [Deva, [AS]]
|
|
ng: [Latn, [AF]]
|
|
niu: [Latn, [PA]]
|
|
nl: [Latn, [EU, SA]]
|
|
nl-informal: [Latn, [EU, SA]]
|
|
nn: [Latn, [EU]]
|
|
no: [Latn, [EU]]
|
|
nov: [Latn, [WW]]
|
|
nrm: [Latn, [EU]]
|
|
nso: [Latn, [AF]]
|
|
nv: [Latn, [NA]]
|
|
ny: [Latn, [AF]]
|
|
oc: [Latn, [EU]]
|
|
om: [Latn, [AF]]
|
|
or: [Orya, [AS]]
|
|
os: [Cyrl, [EU]]
|
|
pa: [Guru, [AS]]
|
|
pag: [Latn, [AS]]
|
|
pam: [Latn, [AS]]
|
|
pap: [Latn, [LA]]
|
|
pcd: [Latn, [EU]]
|
|
pdc: [Latn, [EU, NA, SA]]
|
|
pdt: [Latn, [EU, NA, SA]]
|
|
pfl: [Latn, [EU]]
|
|
pi: [Deva, [AS]]
|
|
pih: [Latn, [PA]]
|
|
pl: [Latn, [EU]]
|
|
pms: [Latn, [EU]]
|
|
pnb: [Arab, [AS, ME]]
|
|
pnt: [Grek, [EU]]
|
|
prg: [Latn, [EU]]
|
|
ps: [Arab, [AS, ME]]
|
|
# world?
|
|
pt: [Latn, [EU, LA, AS, PA, AF]]
|
|
pt-br: [Latn, [SA, LA]]
|
|
qu: [Latn, [SA]]
|
|
qug: [Latn, [SA]]
|
|
rgn: [Latn, [EU]]
|
|
rif: [Latn, [AF]]
|
|
rm: [Latn, [EU]]
|
|
rmy: [Latn, [EU]]
|
|
rn: [Latn, [AF]]
|
|
ro: [Latn, [EU]]
|
|
roa-rup: [Latn, [EU]]
|
|
roa-tara: [Latn, [EU]]
|
|
# World?
|
|
ru: [Cyrl, [EU, AS, ME]]
|
|
rue: [Cyrl, [EU, NA]]
|
|
rup: [Latn, [EU]]
|
|
# ruq: [, []]
|
|
# ruq-cyrl: [, []]
|
|
## 'ruq-grek' => 'Βλαεστε', # Megleno-Romanian (Greek script)
|
|
# ruq-latn: [, []]
|
|
rw: [Latn, [AF]]
|
|
sa: [Deva, [AS]]
|
|
sah: [Cyrl, [EU, AS]] # Russian Far East - Europe, Asia, or both?
|
|
sat: [Latn, [AS]] # Currently Latn, potentially Olck
|
|
sc: [Latn, [EU]]
|
|
scn: [Latn, [EU]]
|
|
sco: [Latn, [EU]]
|
|
sd: [Arab, [AS]]
|
|
sdc: [Latn, [EU]]
|
|
se: [Latn, [EU]]
|
|
sei: [Latn, [NA, LA]]
|
|
sg: [Latn, [AF]]
|
|
sgs: [Latn, [EU]]
|
|
sh: [Latn, [EU]]
|
|
shi: [Latn, [AF]]
|
|
shi-tfng: [Tfng, [AF]]
|
|
shi-latn: [Latn, [AF]]
|
|
si: [Sinh, [AS]]
|
|
simple: [Latn, [WW]]
|
|
sk: [Latn, [EU]]
|
|
sl: [Latn, [EU]]
|
|
sli: [Latn, [EU]]
|
|
sm: [Latn, [PA]]
|
|
sma: [Latn, [EU]]
|
|
sn: [Latn, [AF]]
|
|
so: [Latn, [AF]]
|
|
sq: [Latn, [EU]]
|
|
sr: [Cyrl, [EU]]
|
|
sr-ec: [Cyrl, [EU]]
|
|
sr-el: [Latn, [EU]]
|
|
srn: [Latn, [SA, NA, EU]]
|
|
ss: [Latn, [AF]]
|
|
st: [Latn, [AF]]
|
|
stq: [Latn, [EU]]
|
|
su: [Latn, [AS]]
|
|
sv: [Latn, [EU]]
|
|
sw: [Latn, [AF]]
|
|
szl: [Latn, [EU]]
|
|
ta: [Taml, [AS]]
|
|
tcy: [Knda, [AS]]
|
|
te: [Telu, [AS]]
|
|
tet: [Latn, [AS, PA]]
|
|
tg: [Cyrl, [AS]]
|
|
tg-cyrl: [Cyrl, [AS]]
|
|
tg-latn: [Latn, [AS]]
|
|
th: [Thai, [AS]]
|
|
ti: [Ethi, [AF]]
|
|
tk: [Latn, [AS]]
|
|
tl: [Latn, [AS]]
|
|
# A very complicated case. Names.php is Cyrl. In TWN they argue about Cyrl, Latn, and Arab. I can't find reliable external sources. --Amir
|
|
tly: [Cyrl, [EU, AS, ME]]
|
|
tn: [Latn, [AF]]
|
|
to: [Latn, [PA]]
|
|
tokipona: [Latn, [WW]]
|
|
tpi: [Latn, [PA, AS]]
|
|
tr: [Latn, [EU, ME]]
|
|
ts: [Latn, [AF]]
|
|
tt: [Cyrl, [EU]]
|
|
tt-cyrl: [Cyrl, [EU]]
|
|
tt-latn: [Latn, [EU]]
|
|
tum: [Latn, [AF]]
|
|
tw: [Latn, [AF]]
|
|
ty: [Latn, [PA]]
|
|
tyv: [Cyrl, [AS]]
|
|
udm: [Cyrl, [EU]]
|
|
ug: [Arab, [AS]]
|
|
ug-arab: [Arab, [AS]]
|
|
ug-latn: [Latn, [AS]]
|
|
uk: [Cyrl, [EU, NA]]
|
|
ur: [Arab, [AS, ME]]
|
|
uz: [Latn, [AS]]
|
|
ve: [Latn, [AF]]
|
|
vec: [Latn, [EU]]
|
|
vep: [Latn, [EU]]
|
|
vi: [Latn, [AS]]
|
|
vls: [Latn, [EU]]
|
|
vmf: [Latn, [EU]]
|
|
vo: [Latn, [WW]]
|
|
vot: [Latn, [EU]]
|
|
vro: [Latn, [EU]]
|
|
wa: [Latn, [EU]]
|
|
war: [Latn, [AS]]
|
|
wo: [Latn, [AF]]
|
|
wuu: [Hans, [EU]]
|
|
xal: [Cyrl, [EU]]
|
|
xh: [Latn, [AF]]
|
|
xmf: [Geor, [EU]]
|
|
yi: [Hebr, [ME, EU, NA, SA]]
|
|
yo: [Latn, [AF]]
|
|
# World?
|
|
yue: [Hant, [AS]]
|
|
za: [Latn, [AS]]
|
|
zea: [Latn, [EU]]
|
|
zh: [Hans, [AS]]
|
|
zh-classical: [Hant, [AS]]
|
|
zh-cn: [Hans, [AS]]
|
|
zh-hans: [Hans, [AS]]
|
|
zh-hant: [Hant, [AS]]
|
|
zh-hk: [Hant, [AS]]
|
|
zh-min-nan: [Latn, [AS]]
|
|
zh-mo: [Hant, [AS]]
|
|
# zh-my: [, [AS]] # What is it, Myanmar?
|
|
zh-sg: [Hans, [AS]]
|
|
zh-tw: [Hant, [AS]]
|
|
zh-yue: [Hans, [AS]]
|
|
zu: [Latn, [AF]]
|
|
|
|
# The codes are taken from http://unicode.org/iso15924/iso15924-codes.html .
|
|
#
|
|
# The classification is roughly based on http://www.unicode.org/charts/
|
|
# with some practical corrections.
|
|
scriptgroups:
|
|
# Other is reserved
|
|
# Large groups, one script in each
|
|
Cyrillic: [Cyrl]
|
|
Arabic: [Arab]
|
|
# It's probalby different enough from Latin and Cyrillic, but user testing
|
|
# may prove otherwise.
|
|
Greek: [Grek]
|
|
# Couldn't find a better place for Goth.
|
|
Latin: [Latn, Goth]
|
|
WestCaucasian: [Armn, Geor]
|
|
# Maybe it can be unified with Arabic.
|
|
# Maybe Thaana can be moved here from SouthAsian.
|
|
# Maybe it can be unified with African.
|
|
MiddleEastern: [Hebr, Syrc]
|
|
African: [Ethi, Tfng]
|
|
# India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives.
|
|
#
|
|
# Thaana (Thaa, the script of Maldives) is here, even though it's RTL,
|
|
# because it's closer geographically to India. Maybe it should be moved
|
|
# to MiddleEastern or to Arabic, if that would be easier to users.
|
|
#
|
|
# Tibetan (Tibt) is here, even though it's classified as "Central Asian" by
|
|
# Unicode, because linguistically and geographically it's closely related to
|
|
# the Brahmic family.
|
|
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Sinh, Taml, Telu, Tibt, Thaa]
|
|
SouthEastAsian: [Bugi, Java, Khmr, Laoo, Mymr, Thai]
|
|
CJK: [Hans, Hant, Kore, Jpan, Yiii]
|
|
NativeAmerican: [Cher, Cans]
|
|
|
|
regiongroups:
|
|
# north-america
|
|
NA: 1
|
|
# latin-america
|
|
LA: 1
|
|
# south-america
|
|
SA: 1
|
|
# middle-east
|
|
ME: 2
|
|
# africa
|
|
AF: 2
|
|
# europe
|
|
EU: 2
|
|
# asia
|
|
AS: 3
|
|
# australia
|
|
AU: 3
|
|
# pacific
|
|
PA: 3
|
|
# world wide, international
|
|
WW: 4
|
|
|
|
# Importance in the region, 100 = official language or everybody knows it
|
|
regions:
|
|
fi: { FI: 100, SE: 10 }
|
|
ru: { RU: 100, FI: 40 }
|