Merge pull request #59 from amire80/arrange_aliases

Aliases for all the languages that need them
This commit is contained in:
Santhosh Thottingal
2013-01-02 02:33:22 -08:00
3 changed files with 63 additions and 58 deletions

View File

@@ -2,9 +2,8 @@ languages:
aa: [Latn, [AF], Qafár af]
ab: [Cyrl, [EU], Аҧсшәа]
ace: [Latn, [AS, PA], Acèh]
# FIXME: alias
ady: [Cyrl, [EU, ME], Адыгэбзэ]
ady-cyrl: [Cyrl, [EU, ME], Адыгэбзэ]
ady-cyrl: [ady]
ady-latn: [Latn, [EU, ME], Adygabze]
aeb: [Arab, [AF], زَوُن]
af: [Latn, [AF], Afrikaans]
@@ -30,21 +29,23 @@ languages:
avk: [Latn, [WW], Kotava]
ay: [Latn, [AM], Aymar aru]
# Latin is the default in Azerbaijani Wikipedia
# FIXME: alias
az: [Latn, [EU, ME], azərbaycanca]
# CLDR uses az-latn and az-arab
az: [az-latn]
az-latn: [Latn, [EU, ME], azərbaycanca]
az-arab: [Arab, [EU, ME], آذربايجانجا]
az-cyrl: [Latn, [EU, ME], азәрбајҹанҹа]
# CLDR uses az-arab for the Arabic-script Azerbaijani of Iran
azb: [az-arab]
ba: [Cyrl, [EU], башҡортса]
bar: [Latn, [EU], Boarisch]
bbc-latn: [Latn, [AS], Batak Toba]
# FIXME
bbc: [Batk, [AS], Batak Toba/Batak autonym unknown]
# FIXME - autonym in the Batak script is unknown
bbc-batk: [Batk, [AS], Batak Toba]
bbc: [Latn, [AS], Batak Toba]
bcc: [Arab, [AS, ME], بلوچی مکرانی]
bcl: [Latn, [AS], Bikol Central]
# FIXME: alias
be-tarask: [Cyrl, [EU], беларуская (тарашкевіца)]
be-x-old: [Cyrl, [EU], беларуская (тарашкевіца)]
be-x-old: [be-tarask]
be: [Cyrl, [EU], беларуская]
bew: [Latn, [AS], Bahasa Betawi]
bfq: [Taml, [AS], படகா]
@@ -77,17 +78,15 @@ languages:
ckb: [Arab, [ME], کوردی]
co: [Latn, [EU], corsu]
cps: [Latn, [AS], Capiceño]
# FIXME: alias
cr: [Cans, [AM], ᓀᐦᐃᔭᐍᐏᐣ]
cr-cans: [Cans, [AM], ᓀᐦᐃᔭᐍᐏᐣ]
cr-cans: [cr]
cr-latn: [Latn, [AM], Nēhiyawēwin]
# FIXME: alias
crh: [Latn, [EU], qırımtatarca]
crh-cyrl: [Cyrl, [EU], къырымтатарджа]
crh-latn: [Latn, [EU], qırımtatarca]
crh-latn: [crh]
cs: [Latn, [EU], česky]
csb: [Latn, [EU], kaszëbsczi]
# FIXME: what script?
# FIXME: which script to prefer?..
cu: [Cyrl, [EU], словѣ́ньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ]
cv: [Cyrl, [EU], Чӑвашла]
cy: [Latn, [EU], Cymraeg]
@@ -133,9 +132,8 @@ languages:
ga: [Latn, [EU], Gaeilge]
gag: [Latn, [EU], Gagauz]
gah: [Latn, [AS], Alekano]
# FIXME: alias
gan-hans: [Hans, [AS], 赣语(简体)]
gan-hant: [Hant, [AS], 贛語(繁體)]
gan-hans: [Hans, [AS], 赣语(简体]
gan-hant: [gan]
gan: [Hant, [AS], 贛語]
gbz: [Latn, [AS], Dari]
gcf: [Latn, [AM], Guadeloupean Creole French]
@@ -143,9 +141,8 @@ languages:
gl: [Latn, [EU], galego]
glk: [Arab, [ME], گیلکی]
gn: [Latn, [AM], "Avañe'ẽ"]
# FIXME: alias
gom: [Deva, [AS], कोंकणी]
gom-deva: [Deva, [AS], कोंकणी]
gom-deva: [gom]
gom-latn: [Latn, [AS], Konknni]
got: [Goth, [EU], 𐌲𐌿𐍄𐌹𐍃𐌺]
grc: [Grek, [EU], Ἀρχαία ἑλληνικὴ]
@@ -154,25 +151,23 @@ languages:
guc: [Latn, [AM], Wayúu]
gur: [Latn, [AF], Gurenɛ]
gv: [Latn, [EU], Gaelg]
# FIXME: alias
# CLDR uses ha-latn and ha-arab. Latin is more common and is used in Wikipedia.
ha-arab: [Arab, [AF], هَوُسَ]
ha-latn: [Latn, [AF], Hausa]
ha: [Latn, [AF], Hausa]
ha: [ha-latn]
hak: [Latn, [AS], Hak-kâ-fa]
haw: [Latn, [AM, PA], Hawai`i]
he: [Hebr, [ME], עברית]
hi: [Deva, [AS], हिन्दी]
# FIXME: alias
hif: [Latn, [PA, AS], Fiji Hindi]
hif-deva: [Deva, [AS], फ़ीजी हिन्दी]
hif-latn: [Latn, [PA, AS], Fiji Hindi]
hif-latn: [hif]
hil: [Latn, [AS], Ilonggo]
hne: [Deva, [AS], छत्तीसगढ़ी]
ho: [Latn, [PA], Hiri Motu]
hr: [Latn, [EU], hrvatski]
hsb: [Latn, [EU], hornjoserbsce]
hsn: [Hans, [AS], 湘语]
# Haitian Creole. North America, right?
ht: [Latn, [AM], Kreyòl ayisyen]
hu-formal: [Latn, [EU], Magyar (magázó)]
hu: [Latn, [EU], magyar]
@@ -204,7 +199,7 @@ languages:
kaa: [Latn, [AS], Qaraqalpaqsha]
# Can also be Tfng, but the Wikipedia is mostly Latn
kab: [Latn, [AF, EU], Taqbaylit]
kbd-cyrl: [Cyrl, [EU, ME], Адыгэбзэ]
kbd-cyrl: [kbd]
kbd-latn: [Latn, [EU], Qabardjajəbza]
kbd: [Cyrl, [EU, ME], Адыгэбзэ]
kea: [Latn, [AF], Kabuverdianu]
@@ -214,14 +209,13 @@ languages:
ki: [Latn, [AF], Gĩkũyũ]
kiu: [Latn, [EU, ME], Kırmancki]
kj: [Latn, [AF], Kwanyama]
# FIXME: alias
kk: [Cyrl, [EU, AS], қазақша]
kk: [kk-cyrl]
kk-arab: [Arab, [EU, AS], قازاقشا (تٴوتە)]
kk-cn: [Arab, [EU, AS, ME], قازاقشا (جۇنگو)]
kk-cn: [kk-arab]
kk-cyrl: [Cyrl, [EU, AS], қазақша]
kk-kz: [Cyrl, [EU, AS], қазақша (Қазақстан)]
kk-kz: [kk-cyrl]
kk-latn: [Latn, [EU, AS, ME], qazaqşa]
kk-tr: [Latn, [EU, AS, ME], qazaqşa (Türkïya)]
kk-tr: [kk-latn]
kl: [Latn, [AM, EU], kalaallisut]
km: [Khmr, [AS], ភាសាខ្មែរ]
kn: [Knda, [AS], ಕನ್ನಡ]
@@ -237,20 +231,18 @@ languages:
krl: [Latn, [EU], Karjala]
ks-arab: [Arab, [AS], کٲشُر]
ks-deva: [Deva, [AS], कॉशुर]
# FIXME: alias
ks: [Arab, [AS], کٲشُر]
ksf: [Latn, [AF], Bafia]
ksh: [Latn, [EU], Ripoarisch]
# FIXME: alias
ku: [Latn, [EU, ME], Kurdî]
# CLDR uses ku-latn and ku-arab. Latin is more common and is used in Wikipedia.
ku: [ku-latn]
ku-arab: [Arab, [EU, ME], كوردي]
ku-latn: [Latn, [EU, ME], Kurdî]
kv: [Cyrl, [EU], коми]
kw: [Latn, [EU], kernowek]
ky: [Cyrl, [AS], Кыргызча]
la: [Latn, [EU], Latina]
# FIXME: alias
lad: [Latn, [ME, EU, AM], Ladino]
lad: [lad-latn]
lad-latn: [Latn, [ME, EU, AM], Ladino]
lad-hebr: [Hebr, [ME, EU, AM], לאדינו]
lb: [Latn, [EU], Lëtzebuergesch]
@@ -368,15 +360,14 @@ languages:
ru: [Cyrl, [EU, AS, ME], русский]
rue: [Cyrl, [EU], русиньскый]
rup: [Latn, [EU], Armãneashce]
ruq: [Latn, [EU], Vlăheşte]
ruq-cyrl: [Cyrl, [EU], Влахесте]
ruq: [Cyrl, [EU], Влахесте]
ruq-cyrl: [ruq]
# FIXME: broken autonym
ruq-grek: [Grek, [EU], Megleno-Romanian (Greek script)]
ruq-latn: [Latn, [EU], Vlăheşte]
rw: [Latn, [AF], Kinyarwanda]
ryu: [Kana, [AS], ʔucināguci]
sa: [Deva, [AS], संस्कृतम्]
# Russian Far East - Europe, Asia, or both?
sah: [Cyrl, [EU, AS], саха тыла]
# Currently Latn, potentially Olck
sat: [Latn, [AS], Santali]
@@ -390,14 +381,10 @@ languages:
sei: [Latn, [AM], Cmique Itom]
sg: [Latn, [AF], Sängö]
sgs: [Latn, [EU], žemaitėška]
sh-cyrl: [Cyrl, [EU], српскохрватски]
sh-latn: [Latn, [EU], srpskohrvatski]
# FIXME: alias
sh: [Latn, [EU], srpskohrvatski]
shi-latn: [Latn, [AF], Tašlḥiyt]
shi-tfng: [Tfng, [AF], ⵜⴰⵛⵍⵃⵉⵜ]
# FIXME: which one to pick and alias?
shi: [Latn, [AF], Tašlḥiyt / ⵜⴰⵛⵍⵃⵉⵜ]
shi: [shi-latn]
shn: [Mymr, [AS], လိၵ်ႈတႆး]
si: [Sinh, [AS], සිංහල]
simple: [Latn, [WW], Simple English]
@@ -414,8 +401,7 @@ languages:
sn: [Latn, [AF], chiShona]
so: [Latn, [AF], Soomaaliga]
sq: [Latn, [EU], shqip]
# FIXME: do we need the double name here?
sr: [Cyrl, [EU], српски]
sr: [sr-cyrl]
sr-ec: [sr-cyrl]
sr-cyrl: [Cyrl, [EU], српски]
sr-el: [sr-latn]
@@ -453,11 +439,8 @@ languages:
tru: [Latn, [AS], Ṫuroyo]
ts: [Latn, [AF], Xitsonga]
tsd: [Grek, [EU], Τσακωνικά]
tt-cyrl: [Cyrl, [EU], татарча]
tt-latn: [Latn, [EU], tatarça]
# FIXME: alias
tt: [Cyrl, [EU], татарча]
tt-cyrl: [Cyrl, [EU], татарча]
tt-cyrl: [tt]
tt-latn: [Latn, [EU], tatarça]
ttt: [Cyrl, [AS], Tati]
tum: [Latn, [AF], chiTumbuka]
@@ -467,10 +450,12 @@ languages:
tyv: [Cyrl, [AS], тыва дыл]
tzm: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ]
udm: [Cyrl, [EU], удмурт]
# FIXME: alias
ug: [Arab, [AS], ئۇيغۇرچە ]
ug-arab: [Arab, [AS], ئۇيغۇرچە ]
ug-latn: [Latn, [AS], Uyghurche ]
# CLDR uses ug-arab, ug-latn and ug-cyrl.
# Arabic seems to have the largest number of users and is the main script in the Wikipedia.
ug: [ug-arab]
ug-arab: [Arab, [AS], ئۇيغۇرچە]
ug-latn: [Latn, [AS], uyghurche]
ug-cyrl: [Cyrl, [AS], уйғурчә]
uk: [Cyrl, [EU], українська]
ur: [Arab, [AS, ME], اردو]
uz: [Latn, [AS], oʻzbekcha]

File diff suppressed because one or more lines are too long

View File

@@ -51,6 +51,24 @@
return result;
};
/*
* Runs over all languages and checks that all redirects point to a language.
* There's no reason to have double redirects.
*/
var doubleRedirects = function () {
var result = [];
for ( var language in $.uls.data.languages ) {
var target = $.uls.data.isRedirect( language );
if ( target && $.uls.data.isRedirect( target ) ) {
result.push( language );
}
}
return result;
};
/*
* Runs over all script codes mentioned in langdb and checks whether
* they have something that looks like an autonym.
@@ -71,7 +89,7 @@
assert.ok( $.fn.uls, "$.fn.uls is defined" );
} );
test( "-- $.uls.data testing", 40, function ( assert ) {
test( "-- $.uls.data testing", 41, function ( assert ) {
assert.strictEqual( $.uls.data.isRedirect( 'sr-ec' ), 'sr-cyrl', "'sr-ec' is a redirect to 'sr-cyrl'" );
var autonyms = $.uls.data.getAutonyms();
@@ -82,7 +100,9 @@
// This test assumes that we don't want any scripts to be in the 'Other'
// group. Actually, this may become wrong some day.
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
assert.strictEqual(
@@ -116,17 +136,17 @@
assert.deepEqual( $.uls.data.getLanguagesInRegion( "PA" ),
[
"ace", "bi", "ch", "en-gb", "en", "fj", "haw", "hif", "hif-latn", "ho", "jv", "jv-java",
"ace", "bi", "ch", "en-gb", "en", "fj", "haw", "hif", "ho", "jv", "jv-java",
"mh", "mi", "na", "niu", "pih", "pis", "pt", "rtm", "sm", "tet",
"to", "tpi", "ty", "wls"
],
"languages of region PA are selected correctly" );
assert.deepEqual( $.uls.data.getLanguagesInRegions( ["AM", "WW"] ),
[
"akz", "arn", "aro", "ase", "avk", "ay", "cho", "chr", "chy", "cr", "cr-cans", "cr-latn",
"akz", "arn", "aro", "ase", "avk", "ay", "cho", "chr", "chy", "cr", "cr-latn",
"en-ca", "en", "eo", "es-419", "es-formal", "es", "esu", "fr", "gcf", "gn",
"guc", "haw", "ht", "ia", "ie", "ik", "ike-cans", "ike-latn", "io", "iu", "jam",
"jbo", "kgp", "kl", "lad", "lad-latn", "lad-hebr", "lfn", "mfe", "mic", "mus", "nah", "nl-informal", "nl",
"jbo", "kgp", "kl", "lad-latn", "lad-hebr", "lfn", "mfe", "mic", "mus", "nah", "nl-informal", "nl",
"nov", "nv", "pap", "pdc", "pdt", "ppl", "pt-br", "pt", "qu", "qug", "rap", "sei",
"simple", "srn", "tokipona", "vo", "yi", "yrl", "yua"
],
@@ -157,7 +177,7 @@
var languagesByScriptInAM = $.uls.data.getLanguagesByScriptInRegion( "AM" );
assert.deepEqual( languagesByScriptInAM['Cans'], [
"cr", "cr-cans", "ike-cans", "iu"
"cr", "ike-cans", "iu"
], "correct languages in Cans in AM selected" );
var languagesByScriptInEU = $.uls.data.getLanguagesByScriptInRegion( "EU" );