Make all autonyms unique and add a test for this

In a list of languages to select autonyms must be unique so that
every user would know what to select.

* Batak Toba language in the Batak writing variant (bbc-batk) has
  an autonum in the right script now. This is taken from the Indonesian
  Wikipedia article "Surat Batak".
* Similarly to the practice with pa -> pa-guru:
  * iu for Inuktitut in the Canadian script redirects to ike-cans.
  * ks for Kashmiri in the Arabic writing redirects to ks-arab.
  * tg for Tajik in the Cyrillic script redirects to tg-cyrl.
* zgh has an updated autonym, taken from Ethologue.
* zh-classical redirects to lzh, similarly to zh-yue -> yue.
This commit is contained in:
Amire80
2017-06-09 20:35:47 +03:00
parent e01c2e6440
commit 777d71e0d2
4 changed files with 50 additions and 46 deletions

View File

@@ -316,14 +316,10 @@
[
"AS"
],
"Batak Toba"
"ᯅᯖᯂ᯲ ᯖᯬᯅ"
],
"bbc": [
"Latn",
[
"AS"
],
"Batak Toba"
"bbc-latn"
],
"bcc": [
"Arab",
@@ -1445,11 +1441,7 @@
"italiano"
],
"iu": [
"Cans",
[
"AM"
],
"ᐃᓄᒃᑎᑐᑦ"
"ike-cans"
],
"ja": [
"Jpan",
@@ -1756,11 +1748,7 @@
"कॉशुर"
],
"ks": [
"Arab",
[
"AS"
],
"کٲشُر"
"ks-arab"
],
"ksf": [
"Latn",
@@ -3239,11 +3227,7 @@
"tojikī"
],
"tg": [
"Cyrl",
[
"AS"
],
"тоҷикӣ"
"tg-cyrl"
],
"th": [
"Thai",
@@ -3700,7 +3684,7 @@
[
"AF"
],
"ⵜⴰⵎⴰⵣⵉⵖⵜ"
"ⵜⴰⵎⴰⵣⵉⵖⵜ ⵜⴰⵏⴰⵡⴰⵢⵜ"
],
"zh": [
"Hans",
@@ -3710,11 +3694,7 @@
"中文"
],
"zh-classical": [
"Hant",
[
"AS"
],
"文言"
"lzh"
],
"zh-cn": [
"Hans",
@@ -4076,7 +4056,7 @@
"pdt",
"cr",
"yi",
"iu",
"ike-cans",
"atj"
],
"CC": [
@@ -4356,7 +4336,7 @@
"zh",
"el",
"it",
"ks",
"ks-arab",
"gd",
"yi",
"ml",
@@ -4483,7 +4463,7 @@
"bug",
"bjn",
"ace",
"bbc",
"bbc-latn",
"zh-hant",
"zh",
"sly",
@@ -4531,7 +4511,7 @@
"hne",
"ne",
"sat",
"ks",
"ks-arab",
"gom-deva",
"sd",
"doi",
@@ -4984,10 +4964,10 @@
"brh",
"fa",
"bgn",
"tg",
"tg-cyrl",
"bft",
"khw",
"ks"
"ks-arab"
],
"PL": [
"pl",
@@ -5242,7 +5222,7 @@
"shn"
],
"TJ": [
"tg",
"tg-cyrl",
"ru",
"fa",
"ar"

View File

@@ -51,9 +51,8 @@ languages:
bar: [Latn, [EU], Boarisch]
bat-smg: [sgs]
bbc-latn: [Latn, [AS], Batak Toba]
# FIXME - autonym in the Batak script is unknown
bbc-batk: [Batk, [AS], Batak Toba]
bbc: [Latn, [AS], Batak Toba]
bbc-batk: [Batk, [AS], ᯅᯖᯂ᯲ ᯖᯬᯅ]
bbc: [bbc-latn]
bcc: [Arab, [AS, ME], جهلسری بلوچی]
bcl: [Latn, [AS], Bikol Central]
be-tarask: [Cyrl, [EU], беларуская (тарашкевіца)]
@@ -228,8 +227,7 @@ languages:
io: [Latn, [WW], Ido]
is: [Latn, [EU], íslenska]
it: [Latn, [EU], italiano]
# For variants ike-* is used
iu: [Cans, [AM], ᐃᓄᒃᑎᑐᑦ]
iu: [ike-cans]
ja: [Jpan, [AS], 日本語]
jam: [Latn, [AM], Patois]
jbo: [Latn, [WW], lojban]
@@ -279,7 +277,7 @@ languages:
krl: [Latn, [EU], Karjala]
ks-arab: [Arab, [AS], کٲشُر]
ks-deva: [Deva, [AS], कॉशुर]
ks: [Arab, [AS], کٲشُر]
ks: [ks-arab]
ksf: [Latn, [AF], Bafia]
ksh: [Latn, [EU], Ripoarisch]
# CLDR uses ku-latn and ku-arab. Latin is more common and is used in Wikipedia.
@@ -502,7 +500,7 @@ languages:
tet: [Latn, [AS, PA], tetun]
tg-cyrl: [Cyrl, [AS], тоҷикӣ]
tg-latn: [Latn, [AS], tojikī]
tg: [Cyrl, [AS], тоҷикӣ]
tg: [tg-cyrl]
th: [Thai, [AS], ไทย]
ti: [Ethi, [AF], ትግርኛ]
tig: [Ethi, [AF], ትግረ]
@@ -570,10 +568,10 @@ languages:
yue: [Hant, [AS], 粵語]
za: [Latn, [AS], Vahcuengh]
zea: [Latn, [EU], Zeêuws]
zgh: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ]
zgh: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ ⵜⴰⵏⴰⵡⴰⵢⵜ]
# world? (may apply to many varieties of Chinese)
zh: [Hans, [AS], 中文]
zh-classical: [Hant, [AS], 文言]
zh-classical: [lzh]
zh-cn: [Hans, [AS], 中文(中国大陆)]
zh-hans: [Hans, [AS], 中文(简体)]
zh-hant: [Hant, [AS], 中文(繁體)]

File diff suppressed because one or more lines are too long

View File

@@ -20,7 +20,7 @@
module( 'jquery.uls' );
var orphanScripts, badRedirects, doubleRedirects, languagesWithoutAutonym;
var orphanScripts, badRedirects, doubleRedirects, doubleAutonyms, languagesWithoutAutonym;
/*
* Runs over all script codes mentioned in langdb and checks whether
@@ -59,6 +59,31 @@
return result;
};
/*
* Runs over all languages and checks that all autonyms are unique.
*/
doubleAutonyms = function () {
var language, autonym,
autonyms = [],
duplicateAutonyms = [];
for ( language in $.uls.data.languages ) {
if ( $.uls.data.isRedirect( language ) ) {
continue;
}
autonym = $.uls.data.getAutonym( language );
if ( $.inArray( autonym, autonyms ) > -1 ) {
duplicateAutonyms.push( language );
}
autonyms.push( autonym );
}
return duplicateAutonyms;
};
/*
* Runs over all languages and checks that all redirects point to a language.
* There's no reason to have double redirects.
@@ -99,7 +124,7 @@
assert.ok( $.fn.uls, '$.fn.uls is defined' );
} );
test( '-- $.uls.data testing', 30, function ( assert ) {
test( '-- $.uls.data testing', 31, function ( assert ) {
var autonyms,
languagesToGroup, groupedLanguages;
@@ -126,6 +151,7 @@
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
assert.deepEqual( doubleAutonyms(), [], 'All languages have distinct autonyms.' );
assert.strictEqual(
$.uls.data.getGroupOfScript( 'Beng' ),