A very simple mechanism for importing per-country language lists from CLDR to ULS' langdb. If I understand correctly, we only need languages spoken in a country ordered by number of speakers. The CLDR data already has it and it should be mostly useful. Also added a utility function and a test. Some tweaks to override the CLDR data are still needed: * The data as it is omits some useful languages. For example, Amharic is not listed in Eritrea. * Some countries have a very large number of languages. Ideally it's right, but is not practical currently, for example India with 75. Maybe hand-picking or limiting the choice to top X languages can be useful, but requires thought. * Some language codes are standard, but different from Wikipedia practice, for example "pa_Guru" (we just write "pa"). Maybe a mapping of codes is needed. Change-Id: I3c0cd5a9118997ba39a4f3695978e359f3de6956
136 lines
5.3 KiB
JavaScript
136 lines
5.3 KiB
JavaScript
/**
|
|
* QUnit tests for ULS.
|
|
*
|
|
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
|
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
|
* contributors. See CREDITS for a list.
|
|
*
|
|
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
|
* have to do anything special to choose one license or the other and you don't
|
|
* have to notify anyone which license you are using. You are free to use
|
|
* UniversalLanguageSelector in commercial projects as long as the copyright
|
|
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
|
*
|
|
* @file
|
|
* @ingroup Extensions
|
|
* @licence GNU General Public Licence 2.0 or later
|
|
* @licence MIT License
|
|
*/
|
|
|
|
( function () {
|
|
"use strict";
|
|
|
|
module( "ext.uls", QUnit.newMwEnvironment() );
|
|
|
|
/*
|
|
* Runs over all script codes mentioned in langdb and checks whether
|
|
* they belong to the 'Other' group.
|
|
*/
|
|
var orphanScripts = function () {
|
|
var result = [];
|
|
|
|
for ( var language in $.uls.data.languages ) {
|
|
var script = $.uls.data.script( language );
|
|
if ( $.uls.data.groupOfScript( script ) === 'Other' ) {
|
|
result.push( script );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
/*
|
|
* Runs over all script codes mentioned in langdb and checks whether
|
|
* they have something that looks like an autonym.
|
|
*/
|
|
var languagesWithoutAutonym = function () {
|
|
var result = [];
|
|
|
|
for ( var language in $.uls.data.languages ) {
|
|
if ( typeof $.uls.data.autonym( language ) !== 'string' ) {
|
|
result.push( language );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
test( "-- Initial check", function() {
|
|
expect( 1 );
|
|
ok( $.fn.uls, "$.fn.uls is defined" );
|
|
} );
|
|
|
|
test( "-- $.uls.data testing", function() {
|
|
expect( 24 );
|
|
|
|
strictEqual( $.uls.data.autonyms()['he'], 'עברית', 'Correct autonym is returned for Hebrew using autonyms().' );
|
|
|
|
// This test assumes that we don't want any scripts to be in the 'Other'
|
|
// group. Actually, this may become wrong some day.
|
|
deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
|
|
deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
|
|
|
|
strictEqual(
|
|
$.uls.data.groupOfScript( 'Beng' ),
|
|
'SouthAsian',
|
|
'Bengali script belongs to the SouthAsian group.'
|
|
);
|
|
strictEqual(
|
|
$.uls.data.scriptGroupOfLanguage( 'iu' ),
|
|
'NativeAmerican',
|
|
'The script of the Inupiaq language belongs to the NativeAmerican group.'
|
|
);
|
|
|
|
strictEqual( $.uls.data.script( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' );
|
|
deepEqual( $.uls.data.regions( 'lzz' ), [ 'EU', 'ME' ], 'Correct regions of the Laz language were selected' );
|
|
strictEqual( $.uls.data.regions( 'no-such-language' ), 'UNKNOWN', "The region of an invalid language is 'UNKNOWN'" );
|
|
|
|
var allLanguagesByRegionAndScript = $.uls.data.allLanguagesByRegionAndScript();
|
|
deepEqual( allLanguagesByRegionAndScript['3']['AS']['SouthEastAsian']['Bugi'], ['bug'], 'All languages in the Buginese script in Asia were selected' );
|
|
|
|
deepEqual( $.uls.data.languagesInRegion( 'AU' ), ["en-gb", "en", "hif-latn", "hif", "mi", "na"], "languages of region AU are selected correctly" );
|
|
deepEqual( $.uls.data.languagesInRegions( ['NA', 'WW'] ),
|
|
[
|
|
"akz", "ase", "avk", "cho", "chr", "chy", "cr-cans", "cr-latn", "cr",
|
|
"en-ca", "en", "eo", "es-formal", "es", "esu", "fr",
|
|
"haw", "ht", "ia", "ie", "ik", "ike-cans", "ike-latn", "io",
|
|
"iu", "jam", "jbo", "kl", "lfn", "mic", "mus", "nah", "nov", "nv",
|
|
"pdc", "pdt", "sei", "simple", "srn", "tokipona",
|
|
"vo", "yi", "yua"
|
|
],
|
|
"languages of regions NA and WW are selected correctly"
|
|
);
|
|
|
|
deepEqual( $.uls.data.languagesInScript( 'Knda' ), ["kn", "tcy"], "languages in script Knda are selected correctly" );
|
|
deepEqual( $.uls.data.languagesInScripts( ['Geor', 'Armn'] ),
|
|
["hy", "ka", "xmf"],
|
|
"languages in scripts Geor and Armn are selected correctly"
|
|
);
|
|
|
|
deepEqual( $.uls.data.regionsInGroup( 1 ), ["NA", "LA", "SA"], "regions in group 1 are selected correctly" );
|
|
deepEqual( $.uls.data.regionsInGroup( 4 ), ["WW"], "regions in group 4 are selected correctly" );
|
|
|
|
var languagesByScriptInNA = $.uls.data.languagesByScriptInRegion( 'NA' );
|
|
deepEqual( languagesByScriptInNA['Cans'], ["cr-cans", "cr", "ike-cans", "iu"], "correct languages in Cans in NA selected" );
|
|
|
|
strictEqual( $.uls.data.autonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected' );
|
|
|
|
var languagesByScriptGroupInEMEA = $.uls.data.languagesByScriptGroupInRegions( $.uls.data.regionsInGroup( 2 ) );
|
|
deepEqual( languagesByScriptGroupInEMEA['WestCaucasian'], ['hy', 'ka', 'xmf'], 'Correct languages in WestCaucasian script group selected' );
|
|
|
|
var allLanguagesByScriptGroup = $.uls.data.allLanguagesByScriptGroup();
|
|
deepEqual( allLanguagesByScriptGroup['Greek'], ['el', 'grc', 'pnt', 'ruq-grek', 'tsd'], 'All languages in the Greek script found' );
|
|
|
|
deepEqual( $.uls.data.allRegions(), ['NA', 'LA', 'SA', 'EU', 'ME', 'AF', 'AS', 'PA', 'AU', 'WW'], 'All regions found' );
|
|
|
|
// autonyms: gn: avañe'ẽ, de: deutsch, hu: magyar, fi: suomi
|
|
deepEqual( ['de', 'fi', 'gn', 'hu'].sort( $.uls.data.sortByAutonym ), ['gn', 'de', 'hu', 'fi'], 'Languages are correctly sorted by autonym' );
|
|
|
|
strictEqual( $.uls.data.isRtl( "te" ), false, "Telugu language is not RTL" );
|
|
strictEqual( $.uls.data.isRtl( "dv" ), true, "Divehi language is RTL" );
|
|
|
|
ok( $.inArray( "sah", $.uls.data.languagesInTerritory( "RU" ) ) > -1, "Sakha language is spoken in Russia" );
|
|
} );
|
|
|
|
}() );
|