Add a PHP interface to work with the language data
Additional changes in this PR include, * Added composer.json * Refactored the folder structure. * Added editorconfig. * Added PHPCS and formatted existing code. * Changes to use a single license - GPL-2.0-or-later * ESLint related fixes Bug: T218639
This commit is contained in:
161
tests/js/index.js
Normal file
161
tests/js/index.js
Normal file
@@ -0,0 +1,161 @@
|
||||
var languageData = require( __dirname + '/../../src/index' ),
|
||||
assert = require( 'assert' );
|
||||
|
||||
describe( 'languagedata', function () {
|
||||
var orphanScripts, badRedirects, doubleRedirects, doubleAutonyms, languagesWithoutAutonym;
|
||||
/*
|
||||
* Runs over all script codes mentioned in langdb and checks whether
|
||||
* they belong to the 'Other' group.
|
||||
*/
|
||||
orphanScripts = function () {
|
||||
var language, script,
|
||||
result = [];
|
||||
for ( language in languageData.getLanguages() ) {
|
||||
script = languageData.getScript( language );
|
||||
if ( languageData.getGroupOfScript( script ) === 'Other' ) {
|
||||
result.push( script );
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
/*
|
||||
* Runs over all languages and checks that all redirects have a valid target.
|
||||
*/
|
||||
badRedirects = function () {
|
||||
var language, target,
|
||||
result = [];
|
||||
for ( language in languageData.getLanguages() ) {
|
||||
target = languageData.isRedirect( language );
|
||||
if ( target && !languageData.getLanguages()[ target ] ) {
|
||||
result.push( language );
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
/*
|
||||
* Runs over all languages and checks that all redirects point to a language.
|
||||
* There's no reason to have double redirects.
|
||||
*/
|
||||
doubleRedirects = function () {
|
||||
var language, target,
|
||||
result = [];
|
||||
for ( language in languageData.getLanguages() ) {
|
||||
target = languageData.isRedirect( language );
|
||||
if ( target && languageData.isRedirect( target ) ) {
|
||||
result.push( language );
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
/*
|
||||
* Runs over all languages and checks that all autonyms are unique.
|
||||
*/
|
||||
doubleAutonyms = function () {
|
||||
var language, autonym,
|
||||
autonyms = [],
|
||||
duplicateAutonyms = [];
|
||||
|
||||
for ( language in languageData.getLanguages() ) {
|
||||
if ( languageData.isRedirect( language ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
autonym = languageData.getAutonym( language );
|
||||
|
||||
if ( autonyms.indexOf( autonym ) > -1 ) {
|
||||
duplicateAutonyms.push( language );
|
||||
}
|
||||
|
||||
autonyms.push( autonym );
|
||||
}
|
||||
|
||||
return duplicateAutonyms;
|
||||
};
|
||||
/*
|
||||
* Runs over all script codes mentioned in langdb and checks whether
|
||||
* they have something that looks like an autonym.
|
||||
*/
|
||||
languagesWithoutAutonym = function () {
|
||||
var language,
|
||||
result = [];
|
||||
for ( language in languageData.getLanguages() ) {
|
||||
if ( typeof languageData.getAutonym( language ) !== 'string' ) {
|
||||
result.push( language );
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
it( 'language tags', function () {
|
||||
assert.ok( languageData.isKnown( 'ar' ), 'Language is unknown' );
|
||||
assert.ok( !languageData.isKnown( 'unknownLanguageCode!' ), 'Language is known' );
|
||||
} );
|
||||
|
||||
it( 'autonyms', function () {
|
||||
var autonyms;
|
||||
// Add a language in run time.
|
||||
// This is done early to make sure that it doesn't break other functions.
|
||||
languageData.addLanguage( 'qqq', {
|
||||
script: 'Latn',
|
||||
regions: [ 'SP' ],
|
||||
autonym: 'Language documentation'
|
||||
} );
|
||||
assert.ok( languageData.getAutonym( 'qqq' ), 'Language documentation', 'Language qqq was added with the correct autonym' );
|
||||
autonyms = languageData.getAutonyms();
|
||||
assert.strictEqual( autonyms[ 'zu' ], 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' );
|
||||
assert.strictEqual( autonyms[ 'pa' ], undefined, 'Language "pa" is not listed in autonyms, because it is a redirect' );
|
||||
assert.strictEqual( autonyms[ 'pa-guru' ], 'ਪੰਜਾਬੀ', 'Language "pa-guru" has the correct autonym' );
|
||||
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
|
||||
assert.strictEqual( languageData.getAutonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa.' );
|
||||
assert.strictEqual( languageData.getAutonym( 'pa-guru' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa-guru.' );
|
||||
// autonyms: gn: avañe'ẽ, de: deutsch, hu: magyar, fi: suomi
|
||||
assert.deepEqual( [ 'de', 'fi', 'gn', 'hu' ].sort( languageData.sortByAutonym ), [
|
||||
'gn', 'de', 'hu', 'fi'
|
||||
], 'Languages are correctly sorted by autonym' );
|
||||
} );
|
||||
it( 'regions and groups', function () {
|
||||
var languagesAM;
|
||||
// This test assumes that we don't want any scripts to be in the 'Other'
|
||||
// group. Actually, this may become wrong some day.
|
||||
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
|
||||
|
||||
assert.deepEqual( languageData.getRegions( 'lzz' ), [
|
||||
'EU', 'ME'
|
||||
], 'Correct regions of the Laz language were selected' );
|
||||
assert.strictEqual( languageData.getRegions( 'no-such-language' ), 'UNKNOWN', 'The region of an invalid language is "UNKNOWN"' );
|
||||
assert.ok( languageData.getLanguagesInTerritory( 'RU' ).includes( 'sah' ), 'Sakha language is spoken in Russia' );
|
||||
|
||||
languagesAM = [ 'atj', 'chr', 'chy', 'cr', 'en', 'es', 'fr', 'gn', 'haw', 'ike-cans', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'yi' ];
|
||||
assert.deepEqual(
|
||||
languageData.sortByScriptGroup( languagesAM.sort( languageData.sortByAutonym ) ),
|
||||
[ 'atj', 'gn', 'en', 'es', 'fr', 'haw', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'chy', 'yi', 'ike-cans', 'cr', 'chr' ],
|
||||
'languages in region AM are ordered correctly by script group'
|
||||
);
|
||||
} );
|
||||
it( 'scripts', function () {
|
||||
// This test assumes that we don't want any scripts to be in the 'Other'
|
||||
// group. Actually, this may become wrong some day.
|
||||
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
|
||||
assert.deepEqual( languageData.getLanguagesInScript( 'Guru' ), [ 'pa-guru' ], '"pa-guru" is written in script Guru, and "pa" is skipped as a redirect' );
|
||||
assert.deepEqual( languageData.getLanguagesInScripts( [ 'Geor', 'Armn' ] ), [ 'hy', 'hyw', 'ka', 'xmf' ], 'languages in scripts Geor and Armn are selected correctly' );
|
||||
assert.deepEqual( languageData.getLanguagesInScript( 'Knda' ), [
|
||||
'kn', 'tcy'
|
||||
], 'languages in script Knda are selected correctly' );
|
||||
assert.strictEqual( languageData.getGroupOfScript( 'Beng' ), 'SouthAsian', 'Bengali script belongs to the SouthAsian group.' );
|
||||
assert.strictEqual( languageData.getScriptGroupOfLanguage( 'iu' ), 'NativeAmerican', 'The script of the Inupiaq language belongs to the NativeAmerican group.' );
|
||||
} );
|
||||
it( 'redirects', function () {
|
||||
assert.strictEqual( languageData.isRedirect( 'sr-ec' ), 'sr-cyrl', '"sr-ec" is a redirect to "sr-cyrl"' );
|
||||
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
|
||||
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
|
||||
assert.deepEqual( doubleAutonyms(), [], 'All languages have distinct autonyms.' );
|
||||
assert.strictEqual( languageData.getScript( 'no-such-language' ), 'Zyyy', 'A script for an unknown language is Zyyy - undetermined' );
|
||||
assert.strictEqual( languageData.getScript( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' );
|
||||
} );
|
||||
it( 'directionality', function () {
|
||||
assert.strictEqual( languageData.isRtl( 'te' ), false, 'Telugu language is not RTL' );
|
||||
assert.strictEqual( languageData.isRtl( 'dv' ), true, 'Divehi language is RTL' );
|
||||
assert.strictEqual( languageData.getDir( 'mzn' ), 'rtl', 'Mazandarani language is RTL' );
|
||||
assert.strictEqual( languageData.getDir( 'uk' ), 'ltr', 'Ukrainian language is LTR' );
|
||||
} );
|
||||
} );
|
||||
275
tests/php/LanguageDataTest.php
Normal file
275
tests/php/LanguageDataTest.php
Normal file
@@ -0,0 +1,275 @@
|
||||
<?php
|
||||
require __DIR__ . '/../../src/LanguageData.php';
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Wikimedia\LanguageData;
|
||||
|
||||
/**
|
||||
* @coversDefaultClass \Wikimedia\LanguageData
|
||||
*/
|
||||
class LanguageDataTest extends TestCase {
|
||||
/**
|
||||
* @var LanguageData
|
||||
*/
|
||||
protected $languageData;
|
||||
|
||||
private const UNKNOWN_LANGUAGE_CODE = 'xyz';
|
||||
|
||||
protected function setUp(): void {
|
||||
parent::setUp();
|
||||
$this->languageData = LanguageData::get();
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers isKnown
|
||||
*/
|
||||
public function testIsKnown() {
|
||||
$this->assertTrue( $this->languageData->isKnown( 'en' ) );
|
||||
$this->assertFalse( $this->languageData->isKnown( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers isRedirect
|
||||
*/
|
||||
public function testIsRedirect() {
|
||||
$this->assertFalse( $this->languageData->isRedirect( 'en' ) );
|
||||
$this->assertEquals( $this->languageData->isRedirect( 'aeb' ), 'aeb-arab' );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getScript
|
||||
*/
|
||||
public function testGetScript() {
|
||||
$this->assertEquals( $this->languageData->getScript( 'en' ), 'Latn' );
|
||||
$this->assertFalse( $this->languageData->getScript( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getRegions
|
||||
*/
|
||||
public function testGetRegions() {
|
||||
$this->assertFalse( $this->languageData->getRegions( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
$this->assertEquals( [ 'AF' ], $this->languageData->getRegions( 'aeb' ) );
|
||||
|
||||
$expected = [ 'EU', 'AM', 'AS' ];
|
||||
$regions = $this->languageData->getRegions( 'en' );
|
||||
foreach ( $expected as $region ) {
|
||||
$this->assertContains( $region, $regions );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getAutonym
|
||||
*/
|
||||
public function testGetAutonym() {
|
||||
$this->assertFalse( $this->languageData->getAutonym( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
$this->assertEquals(
|
||||
'تونسي',
|
||||
$this->languageData->getAutonym( 'aeb' ),
|
||||
'Redirects return proper value in getAutonym.'
|
||||
);
|
||||
|
||||
$this->assertEquals( 'English', $this->languageData->getAutonym( 'en' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getAutonyms
|
||||
*/
|
||||
public function testGetAutonyms() {
|
||||
$autonyms = $this->languageData->getAutonyms();
|
||||
$this->assertEquals( 'English', $autonyms['en'] );
|
||||
$this->assertFalse(
|
||||
isset( $autonyms['aeb'] ),
|
||||
'Redirects are not present in getAutonyms.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getLanguagesInScripts
|
||||
*/
|
||||
public function testGetLanguagesInScripts() {
|
||||
$this->assertEmpty(
|
||||
$this->languageData->getLanguagesInScripts( [ self::UNKNOWN_LANGUAGE_CODE ] )
|
||||
);
|
||||
|
||||
$expectedValues = $this->languageData->getLanguagesInScripts( [ 'Latn', 'Grek' ] );
|
||||
|
||||
$this->assertContains( 'zu', $expectedValues );
|
||||
$this->assertContains( 'pnt', $expectedValues );
|
||||
$this->assertNotContains(
|
||||
'sr-el',
|
||||
$expectedValues,
|
||||
'Redirects are not present when fetching languages in scripts.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getGroupOfScript
|
||||
*/
|
||||
public function testGetGroupOfScript() {
|
||||
$this->assertEquals( 'Latin', $this->languageData->getGroupOfScript( 'Latn' ) );
|
||||
$this->assertEquals(
|
||||
LanguageData::OTHER_SCRIPT_GROUP,
|
||||
$this->languageData->getGroupOfScript( self::UNKNOWN_LANGUAGE_CODE )
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getScriptGroupOfLanguage
|
||||
*/
|
||||
public function testGetScriptGroupOfLanguage() {
|
||||
$this->assertEquals(
|
||||
LanguageData::OTHER_SCRIPT_GROUP,
|
||||
$this->languageData->getScriptGroupOfLanguage( self::UNKNOWN_LANGUAGE_CODE )
|
||||
);
|
||||
|
||||
$this->assertEquals(
|
||||
'Latin',
|
||||
$this->languageData->getScriptGroupOfLanguage( 'en' )
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getLanguagesByScriptGroup
|
||||
*/
|
||||
public function testGetLanguagesByScriptGroup() {
|
||||
$actuals = $this->languageData->getLanguagesByScriptGroup( [ 'en', 'sr-el', 'tt-cyrl' ] );
|
||||
|
||||
$this->assertContains( 'tt-cyrl', $actuals['Cyrillic'] );
|
||||
$this->assertContains( 'en', $actuals['Latin'] );
|
||||
$this->assertContains( 'sr-el', $actuals['Latin'] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getLanguagesByScriptGroupInRegions
|
||||
*/
|
||||
public function testGetLanguagesByScriptGroupInRegions() {
|
||||
$actuals = $this->languageData->getLanguagesByScriptGroupInRegions( [ 'AS', 'PA' ] );
|
||||
|
||||
$this->assertContains( 'tpi', $actuals['Latin'] );
|
||||
$this->assertContains( 'ug-arab', $actuals['Arabic'] );
|
||||
$this->assertContains( 'zh-sg', $actuals['CJK'] );
|
||||
$this->assertNotContains(
|
||||
'azb',
|
||||
$actuals['Arabic'],
|
||||
'Redirects are not present when languages grouped by script in a region.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers sortByAutonym
|
||||
*/
|
||||
public function testSortByAutonym() {
|
||||
$sorted = $this->languageData->sortByAutonym(
|
||||
[
|
||||
'atj', 'chr', 'chy',
|
||||
'cr', 'en', 'es',
|
||||
'fr', 'gn', 'haw',
|
||||
'ike-cans', 'ik', 'kl',
|
||||
'nl', 'pt', 'qu',
|
||||
'srn', 'yi', self::UNKNOWN_LANGUAGE_CODE
|
||||
]
|
||||
);
|
||||
|
||||
$this->assertEquals(
|
||||
[
|
||||
'atj', 'gn', 'en',
|
||||
'es', 'fr', 'haw',
|
||||
'ik', 'kl', 'nl',
|
||||
'pt', 'qu', 'srn',
|
||||
'chy', 'yi', 'chr',
|
||||
'ike-cans', 'cr'
|
||||
],
|
||||
$sorted
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers sortByScriptGroup
|
||||
*/
|
||||
public function testSortByScriptGroup() {
|
||||
$sorted = $this->languageData->sortByScriptGroup(
|
||||
$this->languageData->sortByAutonym(
|
||||
[
|
||||
'atj', 'chr', 'chy',
|
||||
'cr', 'en', 'es',
|
||||
'fr', 'gn', 'haw',
|
||||
'ike-cans', 'ik', 'kl',
|
||||
'nl', 'pt', 'qu',
|
||||
'srn', 'yi', self::UNKNOWN_LANGUAGE_CODE
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
$this->assertEquals(
|
||||
[
|
||||
'atj', 'gn', 'en',
|
||||
'es', 'fr', 'haw',
|
||||
'ik', 'kl', 'nl',
|
||||
'pt', 'qu', 'srn',
|
||||
'chy', 'yi', 'chr',
|
||||
'ike-cans', 'cr'
|
||||
],
|
||||
$sorted
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers isRtl
|
||||
*/
|
||||
public function testIsRtl() {
|
||||
$this->assertFalse( $this->languageData->isRtl( 'en' ) );
|
||||
$this->assertFalse( $this->languageData->isRtl( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
$this->assertTrue( $this->languageData->isRtl( 'he' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getDir
|
||||
*/
|
||||
public function testGetDir() {
|
||||
$this->assertEquals( 'ltr', $this->languageData->getDir( 'en' ) );
|
||||
$this->assertEquals( 'rtl', $this->languageData->getDir( 'he' ) );
|
||||
$this->assertFalse( $this->languageData->getDir( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers getLanguagesInTerritory
|
||||
*/
|
||||
public function testGetLanguagesInTerritory() {
|
||||
$actualsAFG = $this->languageData->getLanguagesInTerritory( 'AF' );
|
||||
$actualsAT = $this->languageData->getLanguagesInTerritory( 'AT' );
|
||||
|
||||
$this->assertContains( 'de', $actualsAT );
|
||||
$this->assertContains( 'bar', $actualsAT );
|
||||
$this->assertNotContains( 'he', $actualsAT );
|
||||
|
||||
$this->assertContains( 'ug-arab', $actualsAFG );
|
||||
$this->assertContains( 'tk', $actualsAFG );
|
||||
$this->assertNotContains( 'de', $actualsAFG );
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers addLanguage
|
||||
*/
|
||||
public function testAddLanguage() {
|
||||
$this->assertFalse( $this->languageData->isKnown( 'xyz' ) );
|
||||
$this->assertNotContains(
|
||||
'xyz',
|
||||
$this->languageData->getLanguagesByScriptGroupInRegion( 'AF' )['Latin']
|
||||
);
|
||||
|
||||
$this->languageData->addLanguage( self::UNKNOWN_LANGUAGE_CODE, [
|
||||
'script' => "Latn",
|
||||
'regions' => [
|
||||
"AF"
|
||||
],
|
||||
'autonym' => "Test Language"
|
||||
] );
|
||||
|
||||
$this->assertTrue( $this->languageData->isKnown( self::UNKNOWN_LANGUAGE_CODE ) );
|
||||
$this->assertContains(
|
||||
self::UNKNOWN_LANGUAGE_CODE,
|
||||
$this->languageData->getLanguagesByScriptGroupInRegion( 'AF' )['Latin']
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user