Files
mediawiki-extensions-Univer…/data/langdb.yaml
Amir E. Aharoni 03df48a609 Created script groups, introduce $.uls.data
Gave script groups clear names and sorted them.
Documentation in comments.
Add tests to check that no script has been left behind.
Corrected Lath to Latn (thanks to testing).

Rename langdb.js to ext.uls.data.js

Add ResourceLoader module ext.uls.data

Change-Id: I91fafa94ffd1eaf2d12c954fe3a71064276533f9
2012-06-27 15:08:07 +05:30

471 lines
11 KiB
YAML

languages:
aa: [Latn, [AF]]
ab: [Cyrl, [EU, AS]]
ace: [Latn, [PA]]
af: [Latn, [AF]]
ak: [Latn, [AF]]
aln: [Latn, [EU]]
am: [Ethi, [AF]]
an: [Latn, [EU]]
ang: [Latn, [EU]]
anp: [Deva, [AS]]
ar: [Arab, [ME]]
arc: [Syrc, [ME]]
arn: [Latn, [SA]]
ary: [Latn, [ME]]
arz: [Arab, [ME]]
as: [Beng, [AS]]
ast: [Latn, [EU]]
av: [Cyrl, [EU]]
avk: [Latn, [WW]]
ay: [Latn, [SA]]
# also Arab, and in the past - Cyrl
az: [Latn, [EU, ME]]
ba: [Cyrl, [EU]]
bar: [Latn, [EU]]
bcc: [Arab, [AS, ME]]
bcl: [Latn, [AS]]
be: [Cyrl, [EU]]
be-tarask: [Cyrl, [EU]]
be-x-old: [Cyrl, [EU]]
bg: [Cyrl, [EU]]
bh: [Deva, [AS]]
bho: [Deva, [AS]]
bi: [Latn, [PA]]
bjn: [Latn, [AS]]
bm: [Latn, [AF]]
bn: [Beng, [AS]]
bo: [Tibt, [AS]]
bpy: [Beng, [AS]]
bqi: [Arab, [ME]]
br: [Latn, [EU]]
brh: [Latn, [ME, AS]]
bs: [Latn, [EU]]
bug: [Bugi, [AS]]
bxr: [Cyrl, [AS]]
ca: [Latn, [EU]]
cbk-zam: [Latn, [AS]]
cdo: [Latn, [AS]]
ce: [Cyrl, [EU]]
ceb: [Latn, [AS]]
ch: [Latn, [PA]]
cho: [Latn, [NA]]
chr: [Cher, [NA]]
chy: [Latn, [NA]]
ckb: [Arab, [ME]]
co: [Latn, [EU]]
cps: [Latn, [AS]]
# Also Latn
cr: [Cans, [NA]]
# Latn is default, Cyrl is common IRL
crh: [Latn, [EU]]
crh-latn: [Latn, [EU]]
crh-cyrl: [Cyrl, [EU]]
cs: [Latn, [EU]]
csb: [Latn, [EU]]
cu: [Cyrl, [EU]]
cv: [Cyrl, [EU]]
cy: [Latn, [EU]]
da: [Latn, [EU]]
de: [Latn, [EU]]
de-at: [Latn, [EU]]
de-ch: [Latn, [EU]]
de-formal: [Latn, [EU]]
diq: [Latn, [EU, AS]]
dsb: [Latn, [EU]]
dtp: [Latn, [AS]]
dv: [Thaa, [AS]]
dz: [Tibt, [AS]]
ee: [Latn, [AF]]
el: [Grek, [EU]]
eml: [Latn, [EU]]
# world?
en: [Latn, [EU, NA, AU, AF, ME, AS, PA, WW]]
en-ca: [Latn, [NA]]
en-gb: [Latn, [EU, AS, AU]]
eo: [Latn, [WW]]
# world?
es: [Latn, [EU, NA, LA, SA, AF, WW]]
et: [Latn, [EU]]
eu: [Latn, [EU]]
ext: [Latn, [EU]]
fa: [Arab, [ME]]
ff: [Latn, [AF]]
fi: [Latn, [EU]]
fit: [Latn, [EU]]
fj: [Latn, [PA]]
fo: [Latn, [EU]]
fr: [Latn, [EU]]
frc: [Latn, [EU]]
frp: [Latn, [EU]]
frr: [Latn, [EU]]
fur: [Latn, [EU]]
fy: [Latn, [EU]]
ga: [Latn, [EU]]
gag: [Latn, [EU]]
gan: [Hant, [AS]]
gan-hans: [Hans, [AS]]
gan-hant: [Hant, [AS]]
gd: [Latn, [EU]]
gl: [Latn, [EU]]
glk: [Arab, [ME]]
gn: [Latn, [LA]]
# hmph
got: [Goth, [EU]]
grc: [Grek, [EU]]
gsw: [Latn, [EU]]
gu: [Gujr, [AS]]
gv: [Latn, [EU]]
# The name in Names.php is Arabic, but everything else is Latn
ha: [Latn, [AF]]
hak: [Latn, [AS]]
haw: [Latn, [NA, PA]]
he: [Hebr, [ME]]
# Or maybe world?
hi: [Deva, [AS]]
hif: [Latn, [PA, AU, AS]]
hif-latn: [Latn, [PA, AU, AS]]
hil: [Latn, [AS]]
ho: [Latn, [PA]]
hr: [Latn, [EU]]
hsb: [Latn, [EU]]
# Haitian Creole. North America, right?
ht: [Latn, [NA]]
hu: [Latn, [EU]]
hy: [Armn, [EU, ME]]
hz: [Latn, [AF]]
ia: [Latn, [WW]]
id: [Latn, [A]]
ie: [Latn, [WW]]
ig: [Latn, [AF]]
ii: [Yiii, [AS]]
ik: [Latn, [NA]]
ike-cans: [Cans, [NA]]
ike-latn: [Latn, [NA]]
ilo: [Latn, [AS]]
inh: [Cyrl, [EU]]
io: [Latn, [WW]]
is: [Latn, [EU]]
it: [Latn, [EU]]
iu: [Cans, [NA]]
ja: [Jpan, [AS]]
jam: [Latn, [NA]]
jbo: [Latn, [WW]]
jut: [Latn, [EU]]
# also in the Javanese script (Java), but the Wikipedia is in Latn
jv: [Latn, [AS, PA]]
ka: [Geor, [EU]]
kaa: [Latn, [AS]]
# Can also be Tfng, but the Wikipedia is mostly Latn
kab: [Latn, [AF, EU]]
kbd: [Cyrl, [EU, ME]]
kbd-cyrl: [Cyrl, [EU, ME]]
kg: [Latn, [AF]]
khw: [Arab, [ME, AS]]
ki: [Latn, [AF]]
kiu: [Latn, [EU, ME]]
kj: [Latn, [AF]]
kk: [Cyrl, [EU, AS]]
kk-arab: [Arab, [EU, AS]]
kk-cyrl: [Cyrl, [EU, AS]]
kk-latn: [Latn, [EU, AS, ME]]
kk-cn: [Arab, [EU, AS, ME]]
kk-kz: [Cyrl, [EU, AS]]
kk-tr: [Latn, [EU, AS, ME]]
kl: [Latn, [NA, EU]]
km: [Khmr, [AS]]
kn: [Knda, [AS]]
# Kore is an alias for Hangul+Han. Maybe Hang is more appropriate?
ko: [Kore, [AS]]
# Here Hang may be even more appropriate, because kp has more resistance to Han
ko-kp: [Kore, [AS]]
koi: [Cyrl, [EU]]
kr: [Latn, [AF]]
krc: [Cyrl, [EU]]
kri: [Latn, [AF]]
krj: [Latn, [ME, EU]]
# Just because it's the current default in the Wikipedia. Deva may be needed, too.
ks: [Arab, [AS]]
ks-arab: [Arab, [AS]]
ks-deva: [Deva, [AS]]
ksh: [Latn, [EU]]
ku: [Latn, [EU, ME]]
ku-latn: [Latn, [EU, ME]]
ku-arab: [Arab, [EU, ME]]
kv: [Cyrl, [EU]]
kw: [Latn, [EU]]
ky: [Cyrl, [AS]]
la: [Latn, [EU]]
# Most identified with Turkey, Bulgaria, Greece, Spain and Israel,
# but also spoken in Latin America and elsewhere.
# Wikipedia is mostly in Latn, but also in Hebr. (Comparable to az.)
lad: [Latn, [ME, EU, LA]]
lb: [Latn, [EU]]
lbe: [Cyrl, [EU]]
lez: [Cyrl, [EU]]
lfn: [Latn, [WW]]
lg: [Latn, [AF]]
li: [Latn, [EU]]
lij: [Latn, [EU]]
liv: [Latn, [EU]]
lmo: [Latn, [EU]]
ln: [Latn, [AF]]
lo: [Laoo, [AS]]
loz: [Latn, [AF]]
lt: [Latn, [EU]]
ltg: [Latn, [EU]]
lus: [Latn, [AS]]
lv: [Latn, [EU]]
lzh: [Hant, [AS]]
lzz: [Latn, [EU, ME]] # Also Geor, but the incubator is in Latn
mai: [Deva, [AS]]
map-bms: [Latn, [AS]]
mdf: [Cyrl, [EU]]
mg: [Latn, [AF]]
mh: [Latn, [PA]]
mhr: [Cyrl, [EU]]
mi: [Latn, [PA, AU]]
min: [Latn, [AS]]
mk: [Cyrl, [EU]]
ml: [Mlym, [AS, ME]]
# Hmm, can also have Mong some day in some way
mn: [Cyrl, [AS]]
mo: [Cyrl, [EU]]
mr: [Deva, [AS, ME]]
mrj: [Cyrl, [EU]]
ms: [Latn, [AS]]
mt: [Latn, [EU]]
mus: [Latn, [NA]]
mwl: [Latn, [EU]]
my: [Mymr, [AS]]
myv: [Cyrl, [EU]]
mzn: [Arab, [ME, AS]]
na: [Latn, [PA, AU]]
nah: [Latn, [NA, LA]]
nan: [Latn, [AS]]
nap: [Latn, [EU]]
nb: [Latn, [EU]]
nds: [Latn, [EU]]
nds-nl: [Latn, [EU]]
ne: [Deva, [AS]]
new: [Deva, [AS]]
ng: [Latn, [AF]]
niu: [Latn, [PA]]
nl: [Latn, [EU, SA]]
nl-informal: [Latn, [EU, SA]]
nn: [Latn, [EU]]
no: [Latn, [EU]]
nov: [Latn, [WW]]
nrm: [Latn, [EU]]
nso: [Latn, [AF]]
nv: [Latn, [NA]]
ny: [Latn, [AF]]
oc: [Latn, [EU]]
om: [Latn, [AF]]
or: [Orya, [AS]]
os: [Cyrl, [EU]]
pa: [Guru, [AS]]
pag: [Latn, [AS]]
pam: [Latn, [AS]]
pap: [Latn, [LA]]
pcd: [Latn, [EU]]
pdc: [Latn, [EU, NA, SA]]
pdt: [Latn, [EU, NA, SA]]
pfl: [Latn, [EU]]
pi: [Deva, [AS]]
pih: [Latn, [PA]]
pl: [Latn, [EU]]
pms: [Latn, [EU]]
pnb: [Arab, [AS, ME]]
pnt: [Grek, [EU]]
prg: [Latn, [EU]]
ps: [Arab, [AS, ME]]
# world?
pt: [Latn, [EU, LA, AS, PA, AF]]
pt-br: [Latn, [SA, LA]]
qu: [Latn, [SA]]
qug: [Latn, [SA]]
rgn: [Latn, [EU]]
rif: [Latn, [AF]]
rm: [Latn, [EU]]
rmy: [Latn, [EU]]
rn: [Latn, [AF]]
ro: [Latn, [EU]]
roa-rup: [Latn, [EU]]
roa-tara: [Latn, [EU]]
# World?
ru: [Cyrl, [EU, AS, ME]]
rue: [Cyrl, [EU, NA]]
rup: [Latn, [EU]]
# ruq: [, []]
# ruq-cyrl: [, []]
## 'ruq-grek' => 'Βλαεστε', # Megleno-Romanian (Greek script)
# ruq-latn: [, []]
rw: [Latn, [AF]]
sa: [Deva, [AS]]
sah: [Cyrl, [EU, AS]] # Russian Far East - Europe, Asia, or both?
sat: [Latn, [AS]] # Currently Latn, potentially Olck
sc: [Latn, [EU]]
scn: [Latn, [EU]]
sco: [Latn, [EU]]
sd: [Arab, [AS]]
sdc: [Latn, [EU]]
se: [Latn, [EU]]
sei: [Latn, [NA, LA]]
sg: [Latn, [AF]]
sgs: [Latn, [EU]]
sh: [Latn, [EU]]
shi: [Latn, [AF]]
shi-tfng: [Tfng, [AF]]
shi-latn: [Latn, [AF]]
si: [Sinh, [AS]]
simple: [Latn, [WW]]
sk: [Latn, [EU]]
sl: [Latn, [EU]]
sli: [Latn, [EU]]
sm: [Latn, [PA]]
sma: [Latn, [EU]]
sn: [Latn, [AF]]
so: [Latn, [AF]]
sq: [Latn, [EU]]
sr: [Cyrl, [EU]]
sr-ec: [Cyrl, [EU]]
sr-el: [Latn, [EU]]
srn: [Latn, [SA, NA, EU]]
ss: [Latn, [AF]]
st: [Latn, [AF]]
stq: [Latn, [EU]]
su: [Latn, [AS]]
sv: [Latn, [EU]]
sw: [Latn, [AF]]
szl: [Latn, [EU]]
ta: [Taml, [AS]]
tcy: [Knda, [AS]]
te: [Telu, [AS]]
tet: [Latn, [AS, PA]]
tg: [Cyrl, [AS]]
tg-cyrl: [Cyrl, [AS]]
tg-latn: [Latn, [AS]]
th: [Thai, [AS]]
ti: [Ethi, [AF]]
tk: [Latn, [AS]]
tl: [Latn, [AS]]
# A very complicated case. Names.php is Cyrl. In TWN they argue about Cyrl, Latn, and Arab. I can't find reliable external sources. --Amir
tly: [Cyrl, [EU, AS, ME]]
tn: [Latn, [AF]]
to: [Latn, [PA]]
tokipona: [Latn, [WW]]
tpi: [Latn, [PA, AS]]
tr: [Latn, [EU, ME]]
ts: [Latn, [AF]]
tt: [Cyrl, [EU]]
tt-cyrl: [Cyrl, [EU]]
tt-latn: [Latn, [EU]]
tum: [Latn, [AF]]
tw: [Latn, [AF]]
ty: [Latn, [PA]]
tyv: [Cyrl, [AS]]
udm: [Cyrl, [EU]]
ug: [Arab, [AS]]
ug-arab: [Arab, [AS]]
ug-latn: [Latn, [AS]]
uk: [Cyrl, [EU, NA]]
ur: [Arab, [AS, ME]]
uz: [Latn, [AS]]
ve: [Latn, [AF]]
vec: [Latn, [EU]]
vep: [Latn, [EU]]
vi: [Latn, [AS]]
vls: [Latn, [EU]]
vmf: [Latn, [EU]]
vo: [Latn, [WW]]
vot: [Latn, [EU]]
vro: [Latn, [EU]]
wa: [Latn, [EU]]
war: [Latn, [AS]]
wo: [Latn, [AF]]
wuu: [Hans, [EU]]
xal: [Cyrl, [EU]]
xh: [Latn, [AF]]
xmf: [Geor, [EU]]
yi: [Hebr, [ME, EU, NA, SA]]
yo: [Latn, [AF]]
# World?
yue: [Hant, [AS]]
za: [Latn, [AS]]
zea: [Latn, [EU]]
zh: [Hans, [AS]]
zh-classical: [Hant, [AS]]
zh-cn: [Hans, [AS]]
zh-hans: [Hans, [AS]]
zh-hant: [Hant, [AS]]
zh-hk: [Hant, [AS]]
zh-min-nan: [Latn, [AS]]
zh-mo: [Hant, [AS]]
# zh-my: [, [AS]] # What is it, Myanmar?
zh-sg: [Hans, [AS]]
zh-tw: [Hant, [AS]]
zh-yue: [Hans, [AS]]
zu: [Latn, [AF]]
# The codes are taken from http://unicode.org/iso15924/iso15924-codes.html .
#
# The classification is roughly based on http://www.unicode.org/charts/
# with some practical corrections.
scriptgroups:
# Other is reserved
# Large groups, one script in each
Cyrillic: [Cyrl]
Arabic: [Arab]
# It's probalby different enough from Latin and Cyrillic, but user testing
# may prove otherwise.
Greek: [Grek]
# Couldn't find a better place for Goth.
Latin: [Latn, Goth]
WestCaucasian: [Armn, Geor]
# Maybe it can be unified with Arabic.
# Maybe Thaana can be moved here from SouthAsian.
# Maybe it can be unified with African.
MiddleEastern: [Hebr, Syrc]
African: [Ethi, Tfng]
# India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives.
#
# Thaana (Thaa, the script of Maldives) is here, even though it's RTL,
# because it's closer geographically to India. Maybe it should be moved
# to MiddleEastern or to Arabic, if that would be easier to users.
#
# Tibetan (Tibt) is here, even though it's classified as "Central Asian" by
# Unicode, because linguistically and geographically it's closely related to
# the Brahmic family.
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Sinh, Taml, Telu, Tibt, Thaa]
SouthEastAsian: [Bugi, Java, Khmr, Laoo, Mymr, Thai]
CJK: [Hans, Hant, Kore, Jpan, Yiii]
NativeAmerican: [Cher, Cans]
regiongroups:
# north-america
NA: 1
# latin-america
LA: 1
# south-america
SA: 1
# middle-east
ME: 2
# africa
AF: 2
# europe
EU: 2
# asia
AS: 3
# australia
AU: 3
# pacific
PA: 3
# world wide, international
WW: 4
# Importance in the region, 100 = official language or everybody knows it
regions:
fi: { FI: 100, SE: 10 }
ru: { RU: 100, FI: 40 }