Merge pull request #271 from santhoshtr/language-data-node-module
Use wikimedia/language-data node module for data
This commit is contained in:
@@ -98,7 +98,7 @@ $( '.uls-trigger' ).uls( {
|
|||||||
|
|
||||||
Features
|
Features
|
||||||
--------
|
--------
|
||||||
jQuery.uls has an elaborative language information collection. It knows about
|
jQuery.uls has an elaborative language information collection and it is based on https://github.com/wikimedia/language-data.git. It knows about
|
||||||
|
|
||||||
1. The script in which a language is written.
|
1. The script in which a language is written.
|
||||||
2. The script code
|
2. The script code
|
||||||
@@ -151,4 +151,3 @@ Coding style
|
|||||||
-------------
|
-------------
|
||||||
|
|
||||||
Please follow [jQuery coding guidelines](http://docs.jquery.com/JQuery_Core_Style_Guidelines)
|
Please follow [jQuery coding guidelines](http://docs.jquery.com/JQuery_Core_Style_Guidelines)
|
||||||
|
|
||||||
|
|||||||
1161
data/Spyc.php
1161
data/Spyc.php
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
654
data/langdb.yaml
654
data/langdb.yaml
@@ -1,654 +0,0 @@
|
|||||||
languages:
|
|
||||||
aa: [Latn, [AF], Qafár af]
|
|
||||||
ab: [Cyrl, [EU], Аҧсшәа]
|
|
||||||
abs: [Latn, [AS], Bahasa Ambon]
|
|
||||||
ace: [Latn, [AS, PA], Acèh]
|
|
||||||
acf: [Latn, [AM], kwéyòl]
|
|
||||||
ady: [Cyrl, [EU, ME], Адыгабзэ]
|
|
||||||
ady-cyrl: [ady]
|
|
||||||
ady-latn: [Latn, [EU, ME], Adygabze]
|
|
||||||
aeb: [aeb-arab]
|
|
||||||
aeb-arab: [Arab, [AF], تونسي]
|
|
||||||
aeb-latn: [Latn, [AF], Tûnsî]
|
|
||||||
af: [Latn, [AF], Afrikaans]
|
|
||||||
ahr: [Deva, [AS], अहिराणी]
|
|
||||||
ais: [Latn, [AS], Sakizaya]
|
|
||||||
ak: [Latn, [AF], Akan]
|
|
||||||
akz: [Latn, [AM], Albaamo innaaɬiilka]
|
|
||||||
aln: [Latn, [EU], Gegë]
|
|
||||||
am: [Ethi, [AF], አማርኛ]
|
|
||||||
ami: [Latn, [AS], Pangcah]
|
|
||||||
an: [Latn, [EU], aragonés]
|
|
||||||
ang: [Latn, [EU], Ænglisc]
|
|
||||||
anp: [Deva, [AS], अङ्गिका]
|
|
||||||
ar: [Arab, [ME], العربية]
|
|
||||||
arc: [Syrc, [ME], ܐܪܡܝܐ]
|
|
||||||
arn: [Latn, [AM], mapudungun]
|
|
||||||
aro: [Latn, [AM], Araona]
|
|
||||||
arq: [Arab, [AF], جازايرية]
|
|
||||||
ary: [Latn, [ME], Maġribi]
|
|
||||||
arz: [Arab, [ME], مصرى]
|
|
||||||
as: [Beng, [AS], অসমীয়া]
|
|
||||||
ase: [Sgnw, [AM], American sign language]
|
|
||||||
ast: [Latn, [EU], asturianu]
|
|
||||||
atj: [Latn, [AM], atikamekw]
|
|
||||||
av: [Cyrl, [EU], авар]
|
|
||||||
avk: [Latn, [WW], Kotava]
|
|
||||||
ay: [Latn, [AM], Aymar aru]
|
|
||||||
# The Latin script is the default in the North Azerbaijani Wikipedia,
|
|
||||||
# which uses the macro code 'az'. Its own standard code is 'azj'.
|
|
||||||
# The Arabic script is the default in the South Azerbaijani Wikipedia,
|
|
||||||
# which correctly uses the code 'azb'.
|
|
||||||
# CLDR uses az-latn and az-arab.
|
|
||||||
az: [az-latn]
|
|
||||||
az-arab: [Arab, [AS, ME], تۆرکجه]
|
|
||||||
az-latn: [Latn, [EU, ME], azərbaycanca]
|
|
||||||
az-cyrl: [Cyrl, [EU, ME], азәрбајҹанҹа]
|
|
||||||
azb: [az-arab]
|
|
||||||
azj: [az-latn]
|
|
||||||
ba: [Cyrl, [EU], башҡортса]
|
|
||||||
ban: [Bali, [AS], ᬩᬲᬩᬮᬶ]
|
|
||||||
bar: [Latn, [EU], Boarisch]
|
|
||||||
bat-smg: [sgs]
|
|
||||||
bbc-latn: [Latn, [AS], Batak Toba]
|
|
||||||
bbc-batk: [Batk, [AS], ᯅᯖᯂ᯲ ᯖᯬᯅ]
|
|
||||||
bbc: [bbc-latn]
|
|
||||||
bcc: [Arab, [AS, ME], جهلسری بلوچی]
|
|
||||||
bcl: [Latn, [AS], Bikol Central]
|
|
||||||
be-tarask: [Cyrl, [EU], беларуская (тарашкевіца)]
|
|
||||||
be-x-old: [be-tarask]
|
|
||||||
be: [Cyrl, [EU], беларуская]
|
|
||||||
bew: [Latn, [AS], Bahasa Betawi]
|
|
||||||
bfa: [Latn, [AF], Bari]
|
|
||||||
bft: [Arab, [AS], بلتی]
|
|
||||||
bfq: [Taml, [AS], படகா]
|
|
||||||
bg: [Cyrl, [EU], български]
|
|
||||||
bgn: [Arab, [AS, ME], روچ کپتین بلوچی]
|
|
||||||
# FIXME - currently says Bhojpuri, but it's a macrolanguage.
|
|
||||||
bh: [bho]
|
|
||||||
bho: [Deva, [AS], भोजपुरी]
|
|
||||||
bi: [Latn, [PA], Bislama]
|
|
||||||
bjn: [Latn, [AS], Bahasa Banjar]
|
|
||||||
bm: [Latn, [AF], bamanankan]
|
|
||||||
bn: [Beng, [AS], বাংলা]
|
|
||||||
bo: [Tibt, [AS], བོད་ཡིག]
|
|
||||||
bpy: [Beng, [AS], বিষ্ণুপ্রিয়া মণিপুরী]
|
|
||||||
bqi: [Arab, [AS, ME], بختیاری]
|
|
||||||
br: [Latn, [EU], brezhoneg]
|
|
||||||
brh: [Latn, [ME, AS], Bráhuí]
|
|
||||||
brx: [Deva, [AS], बड़ो]
|
|
||||||
bs: [Latn, [EU], bosanski]
|
|
||||||
bto: [Latn, [AS], Iriga Bicolano]
|
|
||||||
bug: [Bugi, [AS], ᨅᨔ ᨕᨘᨁᨗ]
|
|
||||||
bxr: [Cyrl, [AS], буряад]
|
|
||||||
byn: [Ethi, [AF], ብሊን]
|
|
||||||
ca: [Latn, [EU], català]
|
|
||||||
cbk-zam: [Latn, [AS], Chavacano de Zamboanga]
|
|
||||||
cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄]
|
|
||||||
ce: [Cyrl, [EU], нохчийн]
|
|
||||||
ceb: [Latn, [AS], Cebuano]
|
|
||||||
ch: [Latn, [PA], Chamoru]
|
|
||||||
# FIXME chm is actually a macro language that includes
|
|
||||||
# mhr and mrj, but CLDR Territory-Language information
|
|
||||||
# uses chm instead of mhr, so for practical reasons
|
|
||||||
# it should redirect there.
|
|
||||||
# A better fix would be this:
|
|
||||||
# https://phabricator.wikimedia.org/T136164
|
|
||||||
chm: [mhr]
|
|
||||||
cho: [Latn, [AM], Choctaw]
|
|
||||||
chr: [Cher, [AM], ᏣᎳᎩ]
|
|
||||||
chy: [Latn, [AM], Tsetsêhestâhese]
|
|
||||||
ciw: [Latn, [AM], Ojibwemowin]
|
|
||||||
cjy: [cjy-hant]
|
|
||||||
cjy-hans: [Hans, [AS], 晋语(简化字)]
|
|
||||||
cjy-hant: [Hant, [AS], 晉語]
|
|
||||||
ckb: [Arab, [ME], کوردی]
|
|
||||||
cnh: [Latn, [AS], Lai holh]
|
|
||||||
co: [Latn, [EU], corsu]
|
|
||||||
cop: [Copt, [AF, ME], ϯⲙⲉⲧⲣⲉⲙⲛ̀ⲭⲏⲙⲓ]
|
|
||||||
cps: [Latn, [AS], Capiceño]
|
|
||||||
cr: [Cans, [AM], ᓀᐦᐃᔭᐍᐏᐣ]
|
|
||||||
cr-cans: [cr]
|
|
||||||
cr-latn: [Latn, [AM], Nēhiyawēwin]
|
|
||||||
crh: [Latn, [EU], qırımtatarca]
|
|
||||||
crh-cyrl: [Cyrl, [EU], къырымтатарджа]
|
|
||||||
crh-latn: [crh]
|
|
||||||
cs: [Latn, [EU], čeština]
|
|
||||||
csb: [Latn, [EU], kaszëbsczi]
|
|
||||||
# FIXME: which script to prefer?..
|
|
||||||
cu: [Cyrl, [EU], словѣньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ]
|
|
||||||
cv: [Cyrl, [EU], Чӑвашла]
|
|
||||||
cy: [Latn, [EU], Cymraeg]
|
|
||||||
da: [Latn, [EU], dansk]
|
|
||||||
dag: [Latn, [AF], dagbanli]
|
|
||||||
de-at: [Latn, [EU], Österreichisches Deutsch]
|
|
||||||
de-ch: [Latn, [EU], Schweizer Hochdeutsch]
|
|
||||||
de-formal: [Latn, [EU], Deutsch (Sie-Form)]
|
|
||||||
de: [Latn, [EU], Deutsch]
|
|
||||||
din: [Latn, [AF], Thuɔŋjäŋ]
|
|
||||||
diq: [Latn, [EU, AS], Zazaki]
|
|
||||||
doi: [Deva, [AS], डोगरी]
|
|
||||||
dsb: [Latn, [EU], dolnoserbski]
|
|
||||||
dtp: [Latn, [AS], Dusun Bundu-liwan]
|
|
||||||
dty: [Deva, [AS], डोटेली]
|
|
||||||
dv: [Thaa, [AS], ދިވެހިބަސް]
|
|
||||||
dz: [Tibt, [AS], ཇོང་ཁ]
|
|
||||||
ee: [Latn, [AF], eʋegbe]
|
|
||||||
egl: [Latn, [EU], Emiliàn]
|
|
||||||
el: [Grek, [EU], Ελληνικά]
|
|
||||||
eml: [Latn, [EU], emiliàn e rumagnòl]
|
|
||||||
en-ca: [Latn, [AM], Canadian English]
|
|
||||||
en-gb: [Latn, [EU, AS, PA], British English]
|
|
||||||
en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English]
|
|
||||||
eo: [Latn, [WW], Esperanto]
|
|
||||||
es-419: [Latn, [AM], español de America Latina]
|
|
||||||
# world?
|
|
||||||
es-formal: [Latn, [EU, AM, AF, WW], español (formal)]
|
|
||||||
# world?
|
|
||||||
es: [Latn, [EU, AM, AF, WW], español]
|
|
||||||
es-ni: [Latn, [AM], español nicaragüense]
|
|
||||||
esu: [Latn, [AM], "Yup'ik"]
|
|
||||||
et: [Latn, [EU], eesti]
|
|
||||||
eu: [Latn, [EU], euskara]
|
|
||||||
ext: [Latn, [EU], estremeñu]
|
|
||||||
fa: [Arab, [AS, ME], فارسی]
|
|
||||||
fax: [Latn, [EU], Fala]
|
|
||||||
ff: [Latn, [AF], Fulfulde]
|
|
||||||
fi: [Latn, [EU], suomi]
|
|
||||||
fil: [tl]
|
|
||||||
fit: [Latn, [EU], meänkieli]
|
|
||||||
fiu-vro: [vro]
|
|
||||||
fj: [Latn, [PA], Na Vosa Vakaviti]
|
|
||||||
fo: [Latn, [EU], føroyskt]
|
|
||||||
fr: [Latn, [EU, AM, WW], français]
|
|
||||||
frc: [Latn, [AM], français cadien]
|
|
||||||
frp: [Latn, [EU], arpetan]
|
|
||||||
frr: [Latn, [EU], Nordfriisk]
|
|
||||||
fur: [Latn, [EU], furlan]
|
|
||||||
fy: [Latn, [EU], Frysk]
|
|
||||||
ga: [Latn, [EU], Gaeilge]
|
|
||||||
gaa: [Latn, [AF], Ga]
|
|
||||||
gag: [Latn, [EU], Gagauz]
|
|
||||||
gah: [Latn, [AS], Alekano]
|
|
||||||
gan-hans: [Hans, [AS], 赣语(简体)]
|
|
||||||
gan-hant: [gan]
|
|
||||||
gan: [Hant, [AS], 贛語]
|
|
||||||
gbz: [Latn, [AS], Dari-e Mazdeyasnā]
|
|
||||||
gcf: [Latn, [AM], Guadeloupean Creole French]
|
|
||||||
gd: [Latn, [EU], Gàidhlig]
|
|
||||||
gl: [Latn, [EU], galego]
|
|
||||||
glk: [Arab, [AS, ME], گیلکی]
|
|
||||||
gn: [Latn, [AM], "Avañe'ẽ"]
|
|
||||||
gom: [gom-deva]
|
|
||||||
gom-deva: [Deva, [AS], गोंयची कोंकणी]
|
|
||||||
gom-latn: [Latn, [AS], Gõychi Konknni]
|
|
||||||
got: [Goth, [EU], 𐌲𐌿𐍄𐌹𐍃𐌺]
|
|
||||||
grc: [Grek, [EU], Ἀρχαία ἑλληνικὴ]
|
|
||||||
gsw: [Latn, [EU], Alemannisch]
|
|
||||||
gu: [Gujr, [AS], ગુજરાતી]
|
|
||||||
guc: [Latn, [AM], Wayúu]
|
|
||||||
gum: [Latn, [AM], Namtrik]
|
|
||||||
gur: [Latn, [AF], Gurenɛ]
|
|
||||||
gv: [Latn, [EU], Gaelg]
|
|
||||||
# CLDR uses ha-latn and ha-arab. Latin is more common and is used in Wikipedia.
|
|
||||||
ha-arab: [Arab, [AF], هَوُسَ]
|
|
||||||
ha-latn: [Latn, [AF], Hausa]
|
|
||||||
ha: [ha-latn]
|
|
||||||
hak: [Latn, [AS], Hak-kâ-fa]
|
|
||||||
haw: [Latn, [AM, PA], Hawai`i]
|
|
||||||
he: [Hebr, [ME], עברית]
|
|
||||||
hak-hans: [Hans, [AS], 客家语(简体)]
|
|
||||||
hak-hant: [Hant, [AS], 客家語(繁體)]
|
|
||||||
hi: [Deva, [AS], हिन्दी]
|
|
||||||
hif: [Latn, [PA, AS], Fiji Hindi]
|
|
||||||
hif-deva: [Deva, [AS], फ़ीजी हिन्दी]
|
|
||||||
hif-latn: [hif]
|
|
||||||
hil: [Latn, [AS], Ilonggo]
|
|
||||||
hne: [Deva, [AS], छत्तीसगढ़ी]
|
|
||||||
ho: [Latn, [PA], Hiri Motu]
|
|
||||||
hoc: [Wara, [AS], 𑢹𑣉𑣉]
|
|
||||||
hr: [Latn, [EU], hrvatski]
|
|
||||||
hrx: [Latn, [AM], Hunsrik]
|
|
||||||
hsb: [Latn, [EU], hornjoserbsce]
|
|
||||||
hsn: [Hans, [AS], 湘语]
|
|
||||||
ht: [Latn, [AM], Kreyòl ayisyen]
|
|
||||||
hu-formal: [Latn, [EU], Magyar (magázó)]
|
|
||||||
hu: [Latn, [EU], magyar]
|
|
||||||
hy: [Armn, [EU, ME], Հայերեն]
|
|
||||||
hz: [Latn, [AF], Otsiherero]
|
|
||||||
ia: [Latn, [WW], interlingua]
|
|
||||||
id: [Latn, [AS], Bahasa Indonesia]
|
|
||||||
ie: [Latn, [WW], Interlingue]
|
|
||||||
ig: [Latn, [AF], Igbo]
|
|
||||||
ii: [Yiii, [AS], ꆇꉙ]
|
|
||||||
ik: [Latn, [AM], Iñupiak]
|
|
||||||
ike-cans: [Cans, [AM], ᐃᓄᒃᑎᑐᑦ]
|
|
||||||
ike-latn: [Latn, [AM], inuktitut]
|
|
||||||
ilo: [Latn, [AS], Ilokano]
|
|
||||||
inh: [Cyrl, [EU], ГӀалгӀай]
|
|
||||||
io: [Latn, [WW], Ido]
|
|
||||||
is: [Latn, [EU], íslenska]
|
|
||||||
it: [Latn, [EU], italiano]
|
|
||||||
iu: [ike-cans]
|
|
||||||
ja: [Jpan, [AS], 日本語]
|
|
||||||
jam: [Latn, [AM], Patois]
|
|
||||||
jbo: [Latn, [WW], lojban]
|
|
||||||
jdt: [jdt-cyrl]
|
|
||||||
jdt-cyrl: [Cyrl, [EU, AS], жугьури]
|
|
||||||
jut: [Latn, [EU], jysk]
|
|
||||||
jv: [Latn, [AS, PA], Basa Jawa]
|
|
||||||
# For support in webfonts.
|
|
||||||
jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ]
|
|
||||||
ka: [Geor, [EU], ქართული]
|
|
||||||
kaa: [Latn, [AS], Qaraqalpaqsha]
|
|
||||||
# Can also be Tfng, but the Wikipedia is mostly Latn
|
|
||||||
kab: [Latn, [AF, EU], Taqbaylit]
|
|
||||||
kac: [Latn, [AS], Jinghpaw]
|
|
||||||
kbd-cyrl: [kbd]
|
|
||||||
kbd-latn: [Latn, [EU], Qabardjajəbza]
|
|
||||||
kbd: [Cyrl, [EU, ME], Адыгэбзэ]
|
|
||||||
kbp: [Latn, [AF], Kabɩyɛ]
|
|
||||||
kea: [Latn, [AF], Kabuverdianu]
|
|
||||||
kg: [Latn, [AF], Kongo]
|
|
||||||
kgp: [Latn, [AM], Kaingáng]
|
|
||||||
khw: [Arab, [ME, AS], کھوار]
|
|
||||||
ki: [Latn, [AF], Gĩkũyũ]
|
|
||||||
kiu: [Latn, [EU, ME], Kırmancki]
|
|
||||||
kj: [Latn, [AF], Kwanyama]
|
|
||||||
kjh: [Cyrl, [AS], хакас]
|
|
||||||
kk: [kk-cyrl]
|
|
||||||
kk-arab: [Arab, [EU, AS], قازاقشا (تٶتە)]
|
|
||||||
kk-cn: [kk-arab]
|
|
||||||
kk-cyrl: [Cyrl, [EU, AS], қазақша]
|
|
||||||
kk-kz: [kk-cyrl]
|
|
||||||
kk-latn: [Latn, [EU, AS, ME], qazaqşa]
|
|
||||||
kk-tr: [kk-latn]
|
|
||||||
kl: [Latn, [AM, EU], kalaallisut]
|
|
||||||
km: [Khmr, [AS], ភាសាខ្មែរ]
|
|
||||||
kn: [Knda, [AS], ಕನ್ನಡ]
|
|
||||||
knn: [Deva, [AS], महाराष्ट्रीय कोंकणी]
|
|
||||||
# Here Hang may be even more appropriate, because kp has more resistance to Han
|
|
||||||
ko-kp: [Kore, [AS], 한국어 (조선)]
|
|
||||||
# Kore is an alias for Hangul+Han. Maybe Hang is more appropriate?
|
|
||||||
ko: [Kore, [AS], 한국어]
|
|
||||||
koi: [Cyrl, [EU], Перем Коми]
|
|
||||||
kr: [Latn, [AF], Kanuri]
|
|
||||||
krc: [Cyrl, [EU], къарачай-малкъар]
|
|
||||||
kri: [Latn, [AF], Krio]
|
|
||||||
krj: [Latn, [ME, EU], Kinaray-a]
|
|
||||||
krl: [Latn, [EU], Karjala]
|
|
||||||
ks-arab: [Arab, [AS], کٲشُر]
|
|
||||||
ks-deva: [Deva, [AS], कॉशुर]
|
|
||||||
ks: [ks-arab]
|
|
||||||
ksf: [Latn, [AF], Bafia]
|
|
||||||
ksh: [Latn, [EU], Ripoarisch]
|
|
||||||
# CLDR uses ku-latn and ku-arab. Latin is more common and is used in Wikipedia.
|
|
||||||
ku: [ku-latn]
|
|
||||||
ku-arab: [Arab, [EU, ME], كوردي]
|
|
||||||
ku-latn: [Latn, [EU, ME], Kurdî]
|
|
||||||
kv: [Cyrl, [EU], коми]
|
|
||||||
kw: [Latn, [EU], kernowek]
|
|
||||||
ky: [Cyrl, [AS], Кыргызча]
|
|
||||||
la: [Latn, [EU], Latina]
|
|
||||||
lad: [lad-latn]
|
|
||||||
lad-latn: [Latn, [ME, EU, AM], Ladino]
|
|
||||||
lad-hebr: [Hebr, [ME, EU, AM], לאדינו]
|
|
||||||
lag: [Latn, [AF], Kilaangi]
|
|
||||||
lb: [Latn, [EU], Lëtzebuergesch]
|
|
||||||
lbe: [Cyrl, [EU], лакку]
|
|
||||||
lez: [Cyrl, [EU], лезги]
|
|
||||||
lfn: [Latn, [WW], Lingua Franca Nova]
|
|
||||||
lg: [Latn, [AF], Luganda]
|
|
||||||
li: [Latn, [EU], Limburgs]
|
|
||||||
lij: [Latn, [EU], Ligure]
|
|
||||||
liv: [Latn, [EU], Līvõ kēļ]
|
|
||||||
lki: [Arab, [AS, ME], لەکی]
|
|
||||||
lkt: [Latn, [AM], Lakȟótiyapi]
|
|
||||||
lld: [Latn, [EU], Ladin]
|
|
||||||
lmo: [Latn, [EU], lumbaart]
|
|
||||||
ln: [Latn, [AF], lingála]
|
|
||||||
lo: [Laoo, [AS], ລາວ]
|
|
||||||
loz: [Latn, [AF], Silozi]
|
|
||||||
lt: [Latn, [EU], lietuvių]
|
|
||||||
lrc: [Arab, [AS, ME], لۊری شومالی]
|
|
||||||
ltg: [Latn, [EU], latgaļu]
|
|
||||||
lud: [Latn, [EU], lüüdi]
|
|
||||||
lus: [Latn, [AS], Mizo ţawng]
|
|
||||||
lut: [Latn, [AM], dxʷləšucid]
|
|
||||||
luz: [Arab, [ME], لئری دوٙمینی]
|
|
||||||
lv: [Latn, [EU], latviešu]
|
|
||||||
lzh: [Hant, [AS], 文言]
|
|
||||||
# Also Geor, but the incubator is in Latn
|
|
||||||
lzz: [Latn, [EU, ME], Lazuri]
|
|
||||||
mai: [Deva, [AS], मैथिली]
|
|
||||||
map-bms: [Latn, [AS], Basa Banyumasan]
|
|
||||||
mdf: [Cyrl, [EU], мокшень]
|
|
||||||
mfe: [Latn, [AM], Morisyen]
|
|
||||||
mg: [Latn, [AF], Malagasy]
|
|
||||||
mh: [Latn, [PA], Ebon]
|
|
||||||
mhr: [Cyrl, [EU], олык марий]
|
|
||||||
mi: [Latn, [PA], Māori]
|
|
||||||
mic: [Latn, [AM], "Mi'kmaq"]
|
|
||||||
min: [Latn, [AS], Baso Minangkabau]
|
|
||||||
miq: [Latn, [AM], Mískitu]
|
|
||||||
mk: [Cyrl, [EU], македонски]
|
|
||||||
ml: [Mlym, [AS, ME], മലയാളം]
|
|
||||||
# Hmm, can also have Mong some day in some way
|
|
||||||
mn: [Cyrl, [AS], монгол]
|
|
||||||
mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ]
|
|
||||||
mni: [Beng, [AS], মেইতেই লোন্]
|
|
||||||
mnw: [Mymr, [AS], ဘာသာ မန်]
|
|
||||||
mo: [Cyrl, [EU], молдовеняскэ]
|
|
||||||
mr: [Deva, [AS, ME], मराठी]
|
|
||||||
mrj: [Cyrl, [EU], кырык мары]
|
|
||||||
ms: [Latn, [AS], Bahasa Melayu]
|
|
||||||
mt: [Latn, [EU], Malti]
|
|
||||||
mui: [Latn, [AS], Musi]
|
|
||||||
mus: [Latn, [AM], Mvskoke]
|
|
||||||
mwl: [Latn, [EU], Mirandés]
|
|
||||||
mwv: [Latn, [AS], Behase Mentawei]
|
|
||||||
mww: [mww-latn]
|
|
||||||
mww-latn: [Latn, [AS], Hmoob Dawb]
|
|
||||||
my: [Mymr, [AS], မြန်မာဘာသာ]
|
|
||||||
myv: [Cyrl, [EU], эрзянь]
|
|
||||||
mzn: [Arab, [ME, AS], مازِرونی]
|
|
||||||
na: [Latn, [PA], Dorerin Naoero]
|
|
||||||
nah: [Latn, [AM], Nāhuatl]
|
|
||||||
nan: [Latn, [AS], Bân-lâm-gú]
|
|
||||||
nap: [Latn, [EU], Napulitano]
|
|
||||||
nb: [Latn, [EU], norsk (bokmål)]
|
|
||||||
nds-nl: [Latn, [EU], Nedersaksisch]
|
|
||||||
nds: [Latn, [EU], Plattdüütsch]
|
|
||||||
ne: [Deva, [AS], नेपाली]
|
|
||||||
new: [Deva, [AS], नेपाल भाषा]
|
|
||||||
ng: [Latn, [AF], Oshiwambo]
|
|
||||||
niu: [Latn, [PA], ko e vagahau Niuē]
|
|
||||||
njo: [Latn, [AS], Ao]
|
|
||||||
nl-informal: [Latn, [EU, AM], Nederlands (informeel)]
|
|
||||||
nl: [Latn, [EU, AM], Nederlands]
|
|
||||||
nn: [Latn, [EU], norsk (nynorsk)]
|
|
||||||
# There's also nb for Bokmål and nn for Nynorsk
|
|
||||||
"no": [Latn, [EU], norsk]
|
|
||||||
nod: [Thai, [AS], คำเมือง]
|
|
||||||
nov: [Latn, [WW], Novial]
|
|
||||||
nqo: [Nkoo, [AF], ߒߞߏ]
|
|
||||||
nrm: [Latn, [EU], Nouormand]
|
|
||||||
nso: [Latn, [AF], Sesotho sa Leboa]
|
|
||||||
nv: [Latn, [AM], Diné bizaad]
|
|
||||||
ny: [Latn, [AF], Chi-Chewa]
|
|
||||||
nys: [Latn, [PA], Nyungar]
|
|
||||||
oc: [Latn, [EU], occitan]
|
|
||||||
olo: [Latn, [AS, EU], Livvinkarjala]
|
|
||||||
om: [Latn, [AF], Oromoo]
|
|
||||||
or: [Orya, [AS], ଓଡ଼ିଆ]
|
|
||||||
os: [Cyrl, [EU], Ирон]
|
|
||||||
# Bug: 60815
|
|
||||||
ota: [Arab, [AS, EU], لسان عثمانى]
|
|
||||||
ovd: [Latn, [EU], övdalsk]
|
|
||||||
pa: [pa-guru]
|
|
||||||
pa-guru: [Guru, [AS], ਪੰਜਾਬੀ]
|
|
||||||
pag: [Latn, [AS], Pangasinan]
|
|
||||||
pam: [Latn, [AS], Kapampangan]
|
|
||||||
pap: [Latn, [AM], Papiamentu]
|
|
||||||
pbb: [Latn, [AM], Nasa Yuwe]
|
|
||||||
pcd: [Latn, [EU], Picard]
|
|
||||||
pdc: [Latn, [EU, AM], Deitsch]
|
|
||||||
pdt: [Latn, [EU, AM], Plautdietsch]
|
|
||||||
pfl: [Latn, [EU], Pälzisch]
|
|
||||||
pi: [Deva, [AS], पालि]
|
|
||||||
pih: [Latn, [PA], Norfuk / Pitkern]
|
|
||||||
pis: [Latn, [PA], Pijin]
|
|
||||||
pko: [Latn, [AF], Pökoot]
|
|
||||||
pl: [Latn, [EU], polski]
|
|
||||||
pms: [Latn, [EU], Piemontèis]
|
|
||||||
pnb: [Arab, [AS, ME], پنجابی]
|
|
||||||
pnt: [Grek, [EU], Ποντιακά]
|
|
||||||
ppl: [Latn, [AM], Nawat]
|
|
||||||
prg: [Latn, [EU], Prūsiskan]
|
|
||||||
prs: [Arab, [AS, ME], دری]
|
|
||||||
ps: [Arab, [AS, ME], پښتو]
|
|
||||||
pt-br: [Latn, [AM], português do Brasil]
|
|
||||||
pt: [Latn, [EU, AM, AS, PA, AF, WW], português]
|
|
||||||
qu: [Latn, [AM], Runa Simi]
|
|
||||||
qug: [Latn, [AM], Runa shimi]
|
|
||||||
rap: [Latn, [AM], arero rapa nui]
|
|
||||||
rcf: [Latn, [AF], Kreol Réyoné]
|
|
||||||
rgn: [Latn, [EU], Rumagnôl]
|
|
||||||
rif: [Latn, [AF], Tarifit]
|
|
||||||
rki: [Mymr, [AS], ရခိုင်]
|
|
||||||
rm: [Latn, [EU], rumantsch]
|
|
||||||
# Also known as Fíntika Rómma
|
|
||||||
rmf: [Latn, [EU], kaalengo tšimb]
|
|
||||||
rmy: [Latn, [EU], Romani]
|
|
||||||
rn: [Latn, [AF], Kirundi]
|
|
||||||
ro: [Latn, [EU], română]
|
|
||||||
roa-rup: [rup]
|
|
||||||
roa-tara: [Latn, [EU], tarandíne]
|
|
||||||
rtm: [Latn, [PA], Faeag Rotuma]
|
|
||||||
# world?
|
|
||||||
ru: [Cyrl, [EU, AS, ME], русский]
|
|
||||||
rue: [Cyrl, [EU], русиньскый]
|
|
||||||
rup: [Latn, [EU], armãneashti]
|
|
||||||
ruq: [Cyrl, [EU], Влахесте]
|
|
||||||
ruq-cyrl: [ruq]
|
|
||||||
# FIXME: broken autonym
|
|
||||||
ruq-grek: [Grek, [EU], Megleno-Romanian (Greek script)]
|
|
||||||
ruq-latn: [Latn, [EU], Vlăheşte]
|
|
||||||
rut: [Cyrl, [EU], мыхаӀбишды]
|
|
||||||
rw: [Latn, [AF], Kinyarwanda]
|
|
||||||
# Bug: 60815
|
|
||||||
rwr: [Deva, [AS], मारवाड़ी]
|
|
||||||
ryu: [Kana, [AS], ʔucināguci]
|
|
||||||
sa: [Deva, [AS], संस्कृतम्]
|
|
||||||
sah: [Cyrl, [EU, AS], саха тыла]
|
|
||||||
# Currently Latn, potentially Olck
|
|
||||||
sat: [Latn, [AS], Santali]
|
|
||||||
saz: [Saur, [AS], ꢱꣃꢬꢵꢯ꣄ꢡ꣄ꢬꢵ]
|
|
||||||
sc: [Latn, [EU], sardu]
|
|
||||||
scn: [Latn, [EU], sicilianu]
|
|
||||||
sco: [Latn, [EU], Scots]
|
|
||||||
sd: [Arab, [AS], سنڌي]
|
|
||||||
sdc: [Latn, [EU], Sassaresu]
|
|
||||||
sdh: [Arab, [ME], کوردی خوارگ]
|
|
||||||
se: [Latn, [EU], sámegiella]
|
|
||||||
ses: [Latn, [AF], Koyraboro Senni]
|
|
||||||
sei: [Latn, [AM], Cmique Itom]
|
|
||||||
sg: [Latn, [AF], Sängö]
|
|
||||||
sgs: [Latn, [EU], žemaitėška]
|
|
||||||
sh: [Latn, [EU], srpskohrvatski]
|
|
||||||
shi-latn: [Latn, [AF], Tašlḥiyt]
|
|
||||||
shi-tfng: [Tfng, [AF], ⵜⴰⵛⵍⵃⵉⵜ]
|
|
||||||
shi: [shi-latn]
|
|
||||||
shn: [Mymr, [AS], လိၵ်ႈတႆး]
|
|
||||||
si: [Sinh, [AS], සිංහල]
|
|
||||||
simple: [Latn, [WW], Simple English]
|
|
||||||
sjo: [Mong, [AS], ᠰᡞᠪᡝ ᡤᡞᠰᡠᠨ]
|
|
||||||
sk: [Latn, [EU], slovenčina]
|
|
||||||
sl: [Latn, [EU], slovenščina]
|
|
||||||
sli: [Latn, [EU], Schläsch]
|
|
||||||
slr: [Latn, [AS], Salırça]
|
|
||||||
sly: [Latn, [AS], Bahasa Selayar]
|
|
||||||
skr-arab: [Arab, [AS], سرائیکی]
|
|
||||||
skr: [skr-arab]
|
|
||||||
syc: [Syrc, [ME], ܣܘܪܝܝܐ]
|
|
||||||
sm: [Latn, [PA], Gagana Samoa]
|
|
||||||
sma: [Latn, [EU], åarjelsaemien]
|
|
||||||
smj: [Latn, [EU], julevsámegiella]
|
|
||||||
smn: [Latn, [EU], anarâškielâ]
|
|
||||||
sms: [Latn, [EU], sää´mǩiõll]
|
|
||||||
sn: [Latn, [AF], chiShona]
|
|
||||||
so: [Latn, [AF], Soomaaliga]
|
|
||||||
son: [Latn, [AF], soŋay]
|
|
||||||
sq: [Latn, [EU], shqip]
|
|
||||||
sr: [sr-cyrl]
|
|
||||||
sr-ec: [sr-cyrl]
|
|
||||||
sr-cyrl: [Cyrl, [EU], српски]
|
|
||||||
sr-el: [sr-latn]
|
|
||||||
sr-latn: [Latn, [EU], srpski]
|
|
||||||
srn: [Latn, [AM, EU], Sranantongo]
|
|
||||||
ss: [Latn, [AF], SiSwati]
|
|
||||||
st: [Latn, [AF], Sesotho]
|
|
||||||
stq: [Latn, [EU], Seeltersk]
|
|
||||||
sty: [Cyrl, [EU, AS], себертатар]
|
|
||||||
su: [Latn, [AS], Basa Sunda]
|
|
||||||
sv: [Latn, [EU], svenska]
|
|
||||||
sw: [Latn, [AF], Kiswahili]
|
|
||||||
swb: [Latn, [AF], Shikomoro]
|
|
||||||
sxu: [Latn, [EU], Säggssch]
|
|
||||||
szl: [Latn, [EU], ślůnski]
|
|
||||||
ta: [Taml, [AS], தமிழ்]
|
|
||||||
tay: [Latn, [AS], Tayal]
|
|
||||||
tcy: [Knda, [AS], ತುಳು]
|
|
||||||
te: [Telu, [AS], తెలుగు]
|
|
||||||
tet: [Latn, [AS, PA], tetun]
|
|
||||||
tg-cyrl: [Cyrl, [AS], тоҷикӣ]
|
|
||||||
tg-latn: [Latn, [AS], tojikī]
|
|
||||||
tg: [tg-cyrl]
|
|
||||||
th: [Thai, [AS], ไทย]
|
|
||||||
ti: [Ethi, [AF], ትግርኛ]
|
|
||||||
tig: [Ethi, [AF], ትግረ]
|
|
||||||
tk: [Latn, [AS], Türkmençe]
|
|
||||||
tkr: [Cyrl, [AS], ЦӀаӀхна миз]
|
|
||||||
tl: [Latn, [AS], Tagalog]
|
|
||||||
# A very complicated case. Names.php is Cyrl. In TWN they argue about Cyrl, Latn, and Arab. I can't find reliable external sources. --Amir
|
|
||||||
tly: [Cyrl, [EU, AS, ME], толышә зывон]
|
|
||||||
tmr: [Hebr, [ME, EU, AM], ארמית בבלית]
|
|
||||||
tn: [Latn, [AF], Setswana]
|
|
||||||
to: [Latn, [PA], lea faka-Tonga]
|
|
||||||
tokipona: [Latn, [WW], Toki Pona]
|
|
||||||
tpi: [Latn, [PA, AS], Tok Pisin]
|
|
||||||
tr: [Latn, [EU, ME], Türkçe]
|
|
||||||
trp: [Latn, [AS], Kokborok (Tripuri)]
|
|
||||||
tru: [Latn, [AS], Ṫuroyo]
|
|
||||||
trv: [Latn, [AS], Sediq Taroko]
|
|
||||||
ts: [Latn, [AF], Xitsonga]
|
|
||||||
tsd: [Grek, [EU], Τσακωνικά]
|
|
||||||
tt: [Cyrl, [EU], татарча]
|
|
||||||
tt-cyrl: [tt]
|
|
||||||
tt-latn: [Latn, [EU], tatarça]
|
|
||||||
ttt: [Cyrl, [AS], Tati]
|
|
||||||
tum: [Latn, [AF], chiTumbuka]
|
|
||||||
tw: [Latn, [AF], Twi]
|
|
||||||
twd: [Latn, [EU], Tweants]
|
|
||||||
ty: [Latn, [PA], reo tahiti]
|
|
||||||
tyv: [Cyrl, [AS], тыва дыл]
|
|
||||||
tzl: [Latn, [WW], Talossan]
|
|
||||||
tzm: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ]
|
|
||||||
udm: [Cyrl, [EU], удмурт]
|
|
||||||
# CLDR uses ug-arab, ug-latn and ug-cyrl.
|
|
||||||
# Arabic seems to have the largest number of users and is the main script in the Wikipedia.
|
|
||||||
ug: [ug-arab]
|
|
||||||
ug-arab: [Arab, [AS], ئۇيغۇرچە]
|
|
||||||
ug-latn: [Latn, [AS], uyghurche]
|
|
||||||
ug-cyrl: [Cyrl, [AS], уйғурчә]
|
|
||||||
uk: [Cyrl, [EU], українська]
|
|
||||||
ur: [Arab, [AS, ME], اردو]
|
|
||||||
uz: [Latn, [AS], oʻzbekcha]
|
|
||||||
ve: [Latn, [AF], Tshivenda]
|
|
||||||
vec: [Latn, [EU], vèneto]
|
|
||||||
vep: [Latn, [EU], vepsän kel’]
|
|
||||||
vi: [Latn, [AS], Tiếng Việt]
|
|
||||||
vls: [Latn, [EU], West-Vlams]
|
|
||||||
vmf: [Latn, [EU], Mainfränkisch]
|
|
||||||
vo: [Latn, [WW], Volapük]
|
|
||||||
vot: [Latn, [EU], Vaďďa]
|
|
||||||
vro: [Latn, [EU], Võro]
|
|
||||||
wa: [Latn, [EU], walon]
|
|
||||||
war: [Latn, [AS], Winaray]
|
|
||||||
wls: [Latn, [PA], "Faka'uvea"]
|
|
||||||
wo: [Latn, [AF], Wolof]
|
|
||||||
wuu: [Hans, [AS], 吴语]
|
|
||||||
xal: [Cyrl, [EU], хальмг]
|
|
||||||
xh: [Latn, [AF], isiXhosa]
|
|
||||||
xmf: [Geor, [EU], მარგალური]
|
|
||||||
xsy: [Latn, [AS], SaiSiyat]
|
|
||||||
ydd: [Hebr, [AS, EU], Eastern Yiddish]
|
|
||||||
yi: [Hebr, [ME, EU, AM], ייִדיש]
|
|
||||||
yo: [Latn, [AF], Yorùbá]
|
|
||||||
yrk: [Cyrl, [AS], Ненэцяʼ вада]
|
|
||||||
yrl: [Latn, [AM], "ñe'engatú"]
|
|
||||||
yua: [Latn, [AM], "Maaya T'aan"]
|
|
||||||
yue: [Hant, [AS], 粵語]
|
|
||||||
za: [Latn, [AS], Vahcuengh]
|
|
||||||
zea: [Latn, [EU], Zeêuws]
|
|
||||||
zgh: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ ⵜⴰⵏⴰⵡⴰⵢⵜ]
|
|
||||||
# world? (may apply to many varieties of Chinese)
|
|
||||||
zh: [Hans, [AS], 中文]
|
|
||||||
zh-classical: [lzh]
|
|
||||||
zh-cn: [Hans, [AS], 中文(中国大陆)]
|
|
||||||
zh-hans: [Hans, [AS], 中文(简体)]
|
|
||||||
zh-hant: [Hant, [AS], 中文(繁體)]
|
|
||||||
zh-hk: [Hant, [AS], 中文(香港)]
|
|
||||||
zh-min-nan: [nan]
|
|
||||||
zh-mo: [Hant, [AS], 中文(澳門)]
|
|
||||||
zh-my: [Hans, [AS], 中文(马来西亚)]
|
|
||||||
zh-sg: [Hans, [AS], 中文(新加坡)]
|
|
||||||
zh-tw: [Hant, [AS], 中文(台灣)]
|
|
||||||
zh-yue: [yue]
|
|
||||||
zu: [Latn, [AF], isiZulu]
|
|
||||||
|
|
||||||
# All the supported scripts, grouped logically.
|
|
||||||
#
|
|
||||||
# The codes are taken from http://unicode.org/iso15924/iso15924-codes.html .
|
|
||||||
#
|
|
||||||
# The classification is roughly based on http://www.unicode.org/charts/
|
|
||||||
# with some practical corrections.
|
|
||||||
#
|
|
||||||
# The order of the groups affects display. Pau Giner suggested the order;
|
|
||||||
# the rationale of the order is to distance the largest groups from
|
|
||||||
# one another to improve discoverability.
|
|
||||||
#
|
|
||||||
# The group name "Other" is reserved.
|
|
||||||
scriptgroups:
|
|
||||||
# It's hard to find a better place for Goth except the Latin group.
|
|
||||||
Latin: [Latn, Goth]
|
|
||||||
# Greek is probably different enough from Latin and Cyrillic, but user testing
|
|
||||||
# may prove otherwise.
|
|
||||||
Greek: [Grek]
|
|
||||||
WestCaucasian: [Armn, Geor]
|
|
||||||
Arabic: [Arab]
|
|
||||||
# Maybe MiddleEastern can be unified with Arabic.
|
|
||||||
# Maybe Thaana can be moved here from SouthAsian.
|
|
||||||
# Maybe it can be unified with African.
|
|
||||||
MiddleEastern: [Hebr, Syrc]
|
|
||||||
African: [Ethi, Nkoo, Tfng, Copt]
|
|
||||||
# India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives.
|
|
||||||
#
|
|
||||||
# Thaana (Thaa, the script of Maldives) is here, even though it's RTL,
|
|
||||||
# because it's closer geographically to India. Maybe it should be moved
|
|
||||||
# to MiddleEastern or to Arabic, if that would be easier to users.
|
|
||||||
#
|
|
||||||
# Tibetan (Tibt) is here, even though it's classified as "Central Asian" by
|
|
||||||
# Unicode, because linguistically and geographically it's closely related to
|
|
||||||
# the Brahmic family.
|
|
||||||
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Saur, Sinh, Taml, Telu, Tibt, Thaa, Wara]
|
|
||||||
Cyrillic: [Cyrl]
|
|
||||||
CJK: [Hans, Hant, Kana, Kore, Jpan, Yiii]
|
|
||||||
SouthEastAsian: [Bali, Batk, Bugi, Java, Khmr, Laoo, Mymr, Thai]
|
|
||||||
Mongolian: [Mong]
|
|
||||||
SignWriting: [Sgnw]
|
|
||||||
NativeAmerican: [Cher, Cans]
|
|
||||||
# Undetermined script
|
|
||||||
Special: [Zyyy]
|
|
||||||
|
|
||||||
rtlscripts:
|
|
||||||
[Arab, Hebr, Syrc, Nkoo, Thaa]
|
|
||||||
|
|
||||||
# The numbers are also used in HTML id attributes
|
|
||||||
regiongroups:
|
|
||||||
# Worldwide, international
|
|
||||||
WW: 1
|
|
||||||
# Special languages. To be shown near Worldwide when relevant.
|
|
||||||
SP: 1
|
|
||||||
# America
|
|
||||||
AM: 2
|
|
||||||
# Europe
|
|
||||||
EU: 3
|
|
||||||
# Middle East
|
|
||||||
ME: 3
|
|
||||||
# Africa
|
|
||||||
AF: 3
|
|
||||||
# Asia
|
|
||||||
AS: 4
|
|
||||||
# Pacific
|
|
||||||
PA: 4
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
<?php
|
|
||||||
/**
|
|
||||||
* Script to create the language data in JSON format for ULS.
|
|
||||||
*
|
|
||||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
|
||||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
|
||||||
* contributors. See CREDITS for a list.
|
|
||||||
*
|
|
||||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
|
||||||
* have to do anything special to choose one license or the other and you don't
|
|
||||||
* have to notify anyone which license you are using. You are free to use
|
|
||||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
|
||||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
|
||||||
*
|
|
||||||
* @file
|
|
||||||
* @ingroup Extensions
|
|
||||||
* @licence GNU General Public Licence 2.0 or later
|
|
||||||
* @licence MIT License
|
|
||||||
*/
|
|
||||||
|
|
||||||
include __DIR__ . '/Spyc.php';
|
|
||||||
|
|
||||||
print "Reading langdb.yaml...\n";
|
|
||||||
$yamlLangdb = file_get_contents( 'langdb.yaml' );
|
|
||||||
$parsedLangdb = spyc_load( $yamlLangdb );
|
|
||||||
|
|
||||||
$supplementalDataFilename = 'supplementalData.xml';
|
|
||||||
$supplementalDataUrl =
|
|
||||||
"http://unicode.org/repos/cldr/trunk/common/supplemental/$supplementalDataFilename";
|
|
||||||
|
|
||||||
$curl = curl_init( $supplementalDataUrl );
|
|
||||||
$supplementalDataFile = fopen( $supplementalDataFilename, 'w' );
|
|
||||||
|
|
||||||
curl_setopt( $curl, CURLOPT_FILE, $supplementalDataFile );
|
|
||||||
curl_setopt( $curl, CURLOPT_HEADER, 0 );
|
|
||||||
|
|
||||||
print "Trying to download $supplementalDataUrl...\n";
|
|
||||||
$curlSuccess = curl_exec( $curl );
|
|
||||||
curl_close( $curl );
|
|
||||||
fclose( $supplementalDataFile );
|
|
||||||
|
|
||||||
if ( !$curlSuccess ) {
|
|
||||||
die( "Failed to download CLDR data from $supplementalDataUrl.\n" );
|
|
||||||
}
|
|
||||||
print "Downloaded $supplementalDataFilename, trying to parse...\n";
|
|
||||||
|
|
||||||
$supplementalData = simplexml_load_file( $supplementalDataFilename );
|
|
||||||
|
|
||||||
if ( !( $supplementalData instanceof SimpleXMLElement ) ) {
|
|
||||||
die( "Attempt to load CLDR data from $supplementalDataFilename failed.\n" );
|
|
||||||
}
|
|
||||||
|
|
||||||
print "CLDR supplemental data parsed successfully, reading territories info...\n";
|
|
||||||
$parsedLangdb['territories'] = array();
|
|
||||||
|
|
||||||
foreach ( $supplementalData->territoryInfo->territory as $territoryRecord ) {
|
|
||||||
$territoryAtributes = $territoryRecord->attributes();
|
|
||||||
$territoryCodeAttr = $territoryAtributes['type'];
|
|
||||||
$territoryCode = (string) $territoryCodeAttr[0];
|
|
||||||
$parsedLangdb['territories'][$territoryCode] = array();
|
|
||||||
|
|
||||||
foreach ( $territoryRecord->languagePopulation as $languageRecord ) {
|
|
||||||
$languageAttributes = $languageRecord->attributes();
|
|
||||||
$languageCodeAttr = $languageAttributes['type'];
|
|
||||||
// Lower case is a convention for language codes in ULS.
|
|
||||||
// '_' is used in CLDR for compound codes and it's replaced with '-' here.
|
|
||||||
|
|
||||||
$normalisedCode = strtr( strtolower( (string) $languageCodeAttr[0] ), '_', '-' );
|
|
||||||
|
|
||||||
$parsedLangdb['territories'][$territoryCode][] = $normalisedCode;
|
|
||||||
|
|
||||||
// In case of codes with variants, also add the base because ULS might consider
|
|
||||||
// them as separate languages, e.g. zh, zh-hant and zh-hans.
|
|
||||||
if ( strpos( $normalisedCode, '-' ) !== false ) {
|
|
||||||
$parts = explode( '-', $normalisedCode );
|
|
||||||
$parsedLangdb['territories'][$territoryCode][] = $parts[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ( $parsedLangdb['territories'] as $territoryCode => $languages ) {
|
|
||||||
foreach ( $languages as $index => $language ) {
|
|
||||||
if ( !isset( $parsedLangdb['languages'][$language] ) ) {
|
|
||||||
echo "Unknown language $language for territory $territoryCode\n";
|
|
||||||
unset( $parsedLangdb['territories'][$territoryCode][$index] );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$data = $parsedLangdb['languages'][$language];
|
|
||||||
if ( count( $data ) === 1 ) {
|
|
||||||
echo "Redirect for language $language to {$data[0]} territory $territoryCode\n";
|
|
||||||
$parsedLangdb['territories'][$territoryCode][$index] = $data[0];
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean-up to save space
|
|
||||||
if ( count( $parsedLangdb['territories'][$territoryCode] ) === 0 ) {
|
|
||||||
unset( $parsedLangdb['territories'][$territoryCode] );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove duplicates we might have created
|
|
||||||
$parsedLangdb['territories'][$territoryCode] =
|
|
||||||
array_unique( $parsedLangdb['territories'][$territoryCode] );
|
|
||||||
|
|
||||||
|
|
||||||
// We need to renumber or json conversion thinks these are objects
|
|
||||||
$parsedLangdb['territories'][$territoryCode] =
|
|
||||||
array_values( $parsedLangdb['territories'][$territoryCode] );
|
|
||||||
}
|
|
||||||
|
|
||||||
print "Writing JSON langdb...\n";
|
|
||||||
$jsonVerbose = json_encode( $parsedLangdb, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );
|
|
||||||
$jsonSlim = json_encode( $parsedLangdb, JSON_UNESCAPED_UNICODE );
|
|
||||||
$js = <<<JAVASCRIPT
|
|
||||||
// Please do not edit. This file is generated from data/langdb.yaml by ulsdata2json.php
|
|
||||||
( function ( $ ) {
|
|
||||||
'use strict';
|
|
||||||
$.uls = $.uls || {};
|
|
||||||
//noinspection JSHint
|
|
||||||
$.uls.data = $jsonSlim;
|
|
||||||
} ( jQuery ) );
|
|
||||||
|
|
||||||
JAVASCRIPT;
|
|
||||||
file_put_contents( '../src/jquery.uls.data.js', $js );
|
|
||||||
// For making diff review easier.
|
|
||||||
file_put_contents( 'generated-langdb.json', $jsonVerbose );
|
|
||||||
|
|
||||||
print "Done.\n";
|
|
||||||
32
scripts/fetch-language-data.sh
Executable file
32
scripts/fetch-language-data.sh
Executable file
@@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
BASEDIR=$(dirname "$0")
|
||||||
|
BASEDIR="$BASEDIR/.."
|
||||||
|
|
||||||
|
DEST="$BASEDIR/src/"
|
||||||
|
CLONEDIR="$BASEDIR/vendor/language-data"
|
||||||
|
|
||||||
|
UPSTREAM="https://github.com/wikimedia/language-data.git"
|
||||||
|
|
||||||
|
echo "Getting latest language-data from $UPSTREAM"
|
||||||
|
|
||||||
|
if [ -d "$CLONEDIR" ]; then
|
||||||
|
pushd "$CLONEDIR"
|
||||||
|
git pull
|
||||||
|
popd
|
||||||
|
else
|
||||||
|
git clone "$UPSTREAM" "$CLONEDIR"
|
||||||
|
fi
|
||||||
|
if [ -d "$BASEDIR"/node_modules/browserify ];then
|
||||||
|
echo "browserify already installed"
|
||||||
|
else
|
||||||
|
echo "Installing browserify"
|
||||||
|
npm install browserify
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Transforming language-data"
|
||||||
|
|
||||||
|
"$BASEDIR"/node_modules/browserify/bin/cmd.js "$CLONEDIR"/language-data.json -t "$BASEDIR"/scripts/transform.js -o "$DEST"/jquery.uls.data.js
|
||||||
|
|
||||||
|
echo "language-data wrote to $DEST/jquery.uls.data.js"
|
||||||
|
echo "Done."
|
||||||
15
scripts/transform.js
Normal file
15
scripts/transform.js
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
var through = require( 'through' );
|
||||||
|
|
||||||
|
module.exports = function ( file ) {
|
||||||
|
var data, end, write;
|
||||||
|
data = '( function ( $ ) {\n\t$.uls = $.uls || {};\n\t$.uls.data = ';
|
||||||
|
write = function ( buf ) {
|
||||||
|
return data += buf;
|
||||||
|
};
|
||||||
|
end = function () {
|
||||||
|
data += '\n} ( jQuery ) );';
|
||||||
|
this.queue( data );
|
||||||
|
return this.queue( null );
|
||||||
|
};
|
||||||
|
return through( write, end );
|
||||||
|
};
|
||||||
File diff suppressed because one or more lines are too long
@@ -20,207 +20,8 @@
|
|||||||
|
|
||||||
module( 'jquery.uls' );
|
module( 'jquery.uls' );
|
||||||
|
|
||||||
var orphanScripts, badRedirects, doubleRedirects, doubleAutonyms, languagesWithoutAutonym;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Runs over all script codes mentioned in langdb and checks whether
|
|
||||||
* they belong to the 'Other' group.
|
|
||||||
*/
|
|
||||||
orphanScripts = function () {
|
|
||||||
var language, script,
|
|
||||||
result = [];
|
|
||||||
|
|
||||||
for ( language in $.uls.data.languages ) {
|
|
||||||
script = $.uls.data.getScript( language );
|
|
||||||
|
|
||||||
if ( $.uls.data.getGroupOfScript( script ) === 'Other' ) {
|
|
||||||
result.push( script );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Runs over all languages and checks that all redirects have a valid target.
|
|
||||||
*/
|
|
||||||
badRedirects = function () {
|
|
||||||
var language, target,
|
|
||||||
result = [];
|
|
||||||
|
|
||||||
for ( language in $.uls.data.languages ) {
|
|
||||||
target = $.uls.data.isRedirect( language );
|
|
||||||
|
|
||||||
if ( target && !$.uls.data.languages[target] ) {
|
|
||||||
result.push( language );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Runs over all languages and checks that all autonyms are unique.
|
|
||||||
*/
|
|
||||||
doubleAutonyms = function () {
|
|
||||||
var language, autonym,
|
|
||||||
autonyms = [],
|
|
||||||
duplicateAutonyms = [];
|
|
||||||
|
|
||||||
for ( language in $.uls.data.languages ) {
|
|
||||||
if ( $.uls.data.isRedirect( language ) ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
autonym = $.uls.data.getAutonym( language );
|
|
||||||
|
|
||||||
if ( $.inArray( autonym, autonyms ) > -1 ) {
|
|
||||||
duplicateAutonyms.push( language );
|
|
||||||
}
|
|
||||||
|
|
||||||
autonyms.push( autonym );
|
|
||||||
}
|
|
||||||
|
|
||||||
return duplicateAutonyms;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Runs over all languages and checks that all redirects point to a language.
|
|
||||||
* There's no reason to have double redirects.
|
|
||||||
*/
|
|
||||||
doubleRedirects = function () {
|
|
||||||
var language, target,
|
|
||||||
result = [];
|
|
||||||
|
|
||||||
for ( language in $.uls.data.languages ) {
|
|
||||||
target = $.uls.data.isRedirect( language );
|
|
||||||
|
|
||||||
if ( target && $.uls.data.isRedirect( target ) ) {
|
|
||||||
result.push( language );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Runs over all script codes mentioned in langdb and checks whether
|
|
||||||
* they have something that looks like an autonym.
|
|
||||||
*/
|
|
||||||
languagesWithoutAutonym = function () {
|
|
||||||
var language,
|
|
||||||
result = [];
|
|
||||||
|
|
||||||
for ( language in $.uls.data.languages ) {
|
|
||||||
if ( typeof $.uls.data.getAutonym( language ) !== 'string' ) {
|
|
||||||
result.push( language );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
test( '-- Initial check', 1, function ( assert ) {
|
test( '-- Initial check', 1, function ( assert ) {
|
||||||
assert.ok( $.fn.uls, '$.fn.uls is defined' );
|
assert.ok( $.fn.uls, '$.fn.uls is defined' );
|
||||||
} );
|
} );
|
||||||
|
|
||||||
test( '-- $.uls.data testing', 31, function ( assert ) {
|
|
||||||
var autonyms,
|
|
||||||
languagesToGroup, groupedLanguages;
|
|
||||||
|
|
||||||
// Add a language in run time.
|
|
||||||
// This is done early to make sure that it doesn't break other functions.
|
|
||||||
$.uls.data.addLanguage( 'qqq', {
|
|
||||||
script: 'Latn',
|
|
||||||
regions: ['SP'],
|
|
||||||
autonym: 'Language documentation'
|
|
||||||
} );
|
|
||||||
|
|
||||||
assert.ok( $.uls.data.getAutonym( 'qqq' ), 'Language documentation', 'Language qqq was added with the correct autonym' );
|
|
||||||
|
|
||||||
assert.strictEqual( $.uls.data.isRedirect( 'sr-ec' ), 'sr-cyrl', '"sr-ec" is a redirect to "sr-cyrl"' );
|
|
||||||
autonyms = $.uls.data.getAutonyms();
|
|
||||||
assert.strictEqual( autonyms['zu'], 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' );
|
|
||||||
assert.strictEqual( autonyms['pa'], undefined, 'Language "pa" is not listed in autonyms, because it is a redirect' );
|
|
||||||
assert.strictEqual( autonyms['pa-guru'], 'ਪੰਜਾਬੀ', 'Language "pa-guru" has the correct autonym' );
|
|
||||||
|
|
||||||
// This test assumes that we don't want any scripts to be in the 'Other'
|
|
||||||
// group. Actually, this may become wrong some day.
|
|
||||||
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
|
|
||||||
|
|
||||||
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
|
|
||||||
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
|
|
||||||
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
|
|
||||||
assert.deepEqual( doubleAutonyms(), [], 'All languages have distinct autonyms.' );
|
|
||||||
|
|
||||||
assert.strictEqual(
|
|
||||||
$.uls.data.getGroupOfScript( 'Beng' ),
|
|
||||||
'SouthAsian',
|
|
||||||
'Bengali script belongs to the SouthAsian group.'
|
|
||||||
);
|
|
||||||
assert.strictEqual(
|
|
||||||
$.uls.data.getScriptGroupOfLanguage( 'iu' ),
|
|
||||||
'NativeAmerican',
|
|
||||||
'The script of the Inupiaq language belongs to the NativeAmerican group.'
|
|
||||||
);
|
|
||||||
|
|
||||||
assert.strictEqual( $.uls.data.getScript( 'no-such-language' ), 'Zyyy', 'A script for an unknown language is Zyyy - undetermined' );
|
|
||||||
assert.strictEqual( $.uls.data.getScript( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' );
|
|
||||||
assert.deepEqual( $.uls.data.getRegions( 'lzz' ), [
|
|
||||||
'EU', 'ME'
|
|
||||||
], 'Correct regions of the Laz language were selected' );
|
|
||||||
assert.strictEqual( $.uls.data.getRegions( 'no-such-language' ), 'UNKNOWN', 'The region of an invalid language is "UNKNOWN"' );
|
|
||||||
|
|
||||||
assert.deepEqual( $.uls.data.getLanguagesInScript( 'Knda' ), [
|
|
||||||
'kn', 'tcy'
|
|
||||||
], 'languages in script Knda are selected correctly' );
|
|
||||||
assert.deepEqual( $.uls.data.getLanguagesInScript( 'Guru' ),
|
|
||||||
['pa-guru'],
|
|
||||||
'"pa-guru" is written in script Guru, and "pa" is skipped as a redirect'
|
|
||||||
);
|
|
||||||
assert.deepEqual( $.uls.data.getLanguagesInScripts( ['Geor', 'Armn'] ),
|
|
||||||
['hy', 'ka', 'xmf'],
|
|
||||||
'languages in scripts Geor and Armn are selected correctly'
|
|
||||||
);
|
|
||||||
|
|
||||||
assert.strictEqual( $.uls.data.getAutonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa.' );
|
|
||||||
assert.strictEqual( $.uls.data.getAutonym( 'pa-guru' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa-guru.' );
|
|
||||||
|
|
||||||
languagesToGroup = {
|
|
||||||
'en': 'English',
|
|
||||||
'fiu-vro': 'Võro', // Alias before target
|
|
||||||
'ru': 'русский',
|
|
||||||
'sr': 'српски', // Alias before target
|
|
||||||
'sr-cyrl': 'српски', // Target before alias
|
|
||||||
'sr-latn': 'srpski', // Target before alias
|
|
||||||
'sr-el': 'srpski', // Alias after target
|
|
||||||
'vro': 'Võro' // Target after alias
|
|
||||||
};
|
|
||||||
groupedLanguages = {
|
|
||||||
Latin: [ 'en', 'fiu-vro', 'sr-latn', 'sr-el', 'vro' ],
|
|
||||||
Cyrillic: [ 'ru', 'sr', 'sr-cyrl' ]
|
|
||||||
};
|
|
||||||
|
|
||||||
assert.deepEqual( $.uls.data.getLanguagesByScriptGroup( languagesToGroup ), groupedLanguages,
|
|
||||||
'A custom list of languages is grouped correctly using getLanguagesByScriptGroup.' );
|
|
||||||
|
|
||||||
// autonyms: gn: avañe'ẽ, de: deutsch, hu: magyar, fi: suomi
|
|
||||||
assert.deepEqual( ['de', 'fi', 'gn', 'hu'].sort( $.uls.data.sortByAutonym ), [
|
|
||||||
'gn', 'de', 'hu', 'fi'
|
|
||||||
], 'Languages are correctly sorted by autonym' );
|
|
||||||
|
|
||||||
assert.strictEqual( $.uls.data.isRtl( 'te' ), false, 'Telugu language is not RTL' );
|
|
||||||
assert.strictEqual( $.uls.data.isRtl( 'dv' ), true, 'Divehi language is RTL' );
|
|
||||||
assert.strictEqual( $.uls.data.getDir( 'mzn' ), 'rtl', 'Mazandarani language is RTL' );
|
|
||||||
assert.strictEqual( $.uls.data.getDir( 'uk' ), 'ltr', 'Ukrainian language is LTR' );
|
|
||||||
|
|
||||||
assert.ok(
|
|
||||||
$.inArray( 'sah', $.uls.data.getLanguagesInTerritory( 'RU' ) ) > -1,
|
|
||||||
'Sakha language is spoken in Russia'
|
|
||||||
);
|
|
||||||
|
|
||||||
assert.ok( $.uls.data.deleteLanguage( 'qqq' ), 'Deleting language qqq, which was added earlier, returns true.' );
|
|
||||||
assert.strictEqual( $.uls.data.languages['qqq'], undefined, 'Data about qqq is undefined after being deleted.' );
|
|
||||||
assert.ok( !$.uls.data.deleteLanguage( 'qqr' ), 'Deleting language qqr, which was never added, returns false.' );
|
|
||||||
} );
|
|
||||||
}( jQuery ) );
|
}( jQuery ) );
|
||||||
|
|||||||
Reference in New Issue
Block a user