Add a PHP interface to work with the language data

Additional changes in this PR include,

  * Added composer.json
  * Refactored the folder structure.
  * Added editorconfig.
  * Added PHPCS and formatted existing code.
  * Changes to use a single license - GPL-2.0-or-later
  * ESLint related fixes

Bug: T218639
This commit is contained in:
Abijeet
2020-01-22 00:53:38 +05:30
parent 6ca93c0966
commit 09ab6024fe
16 changed files with 2585 additions and 151 deletions

161
tests/js/index.js Normal file
View File

@@ -0,0 +1,161 @@
var languageData = require( __dirname + '/../../src/index' ),
assert = require( 'assert' );
describe( 'languagedata', function () {
var orphanScripts, badRedirects, doubleRedirects, doubleAutonyms, languagesWithoutAutonym;
/*
* Runs over all script codes mentioned in langdb and checks whether
* they belong to the 'Other' group.
*/
orphanScripts = function () {
var language, script,
result = [];
for ( language in languageData.getLanguages() ) {
script = languageData.getScript( language );
if ( languageData.getGroupOfScript( script ) === 'Other' ) {
result.push( script );
}
}
return result;
};
/*
* Runs over all languages and checks that all redirects have a valid target.
*/
badRedirects = function () {
var language, target,
result = [];
for ( language in languageData.getLanguages() ) {
target = languageData.isRedirect( language );
if ( target && !languageData.getLanguages()[ target ] ) {
result.push( language );
}
}
return result;
};
/*
* Runs over all languages and checks that all redirects point to a language.
* There's no reason to have double redirects.
*/
doubleRedirects = function () {
var language, target,
result = [];
for ( language in languageData.getLanguages() ) {
target = languageData.isRedirect( language );
if ( target && languageData.isRedirect( target ) ) {
result.push( language );
}
}
return result;
};
/*
* Runs over all languages and checks that all autonyms are unique.
*/
doubleAutonyms = function () {
var language, autonym,
autonyms = [],
duplicateAutonyms = [];
for ( language in languageData.getLanguages() ) {
if ( languageData.isRedirect( language ) ) {
continue;
}
autonym = languageData.getAutonym( language );
if ( autonyms.indexOf( autonym ) > -1 ) {
duplicateAutonyms.push( language );
}
autonyms.push( autonym );
}
return duplicateAutonyms;
};
/*
* Runs over all script codes mentioned in langdb and checks whether
* they have something that looks like an autonym.
*/
languagesWithoutAutonym = function () {
var language,
result = [];
for ( language in languageData.getLanguages() ) {
if ( typeof languageData.getAutonym( language ) !== 'string' ) {
result.push( language );
}
}
return result;
};
it( 'language tags', function () {
assert.ok( languageData.isKnown( 'ar' ), 'Language is unknown' );
assert.ok( !languageData.isKnown( 'unknownLanguageCode!' ), 'Language is known' );
} );
it( 'autonyms', function () {
var autonyms;
// Add a language in run time.
// This is done early to make sure that it doesn't break other functions.
languageData.addLanguage( 'qqq', {
script: 'Latn',
regions: [ 'SP' ],
autonym: 'Language documentation'
} );
assert.ok( languageData.getAutonym( 'qqq' ), 'Language documentation', 'Language qqq was added with the correct autonym' );
autonyms = languageData.getAutonyms();
assert.strictEqual( autonyms[ 'zu' ], 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' );
assert.strictEqual( autonyms[ 'pa' ], undefined, 'Language "pa" is not listed in autonyms, because it is a redirect' );
assert.strictEqual( autonyms[ 'pa-guru' ], 'ਪੰਜਾਬੀ', 'Language "pa-guru" has the correct autonym' );
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
assert.strictEqual( languageData.getAutonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa.' );
assert.strictEqual( languageData.getAutonym( 'pa-guru' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa-guru.' );
// autonyms: gn: avañe'ẽ, de: deutsch, hu: magyar, fi: suomi
assert.deepEqual( [ 'de', 'fi', 'gn', 'hu' ].sort( languageData.sortByAutonym ), [
'gn', 'de', 'hu', 'fi'
], 'Languages are correctly sorted by autonym' );
} );
it( 'regions and groups', function () {
var languagesAM;
// This test assumes that we don't want any scripts to be in the 'Other'
// group. Actually, this may become wrong some day.
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
assert.deepEqual( languageData.getRegions( 'lzz' ), [
'EU', 'ME'
], 'Correct regions of the Laz language were selected' );
assert.strictEqual( languageData.getRegions( 'no-such-language' ), 'UNKNOWN', 'The region of an invalid language is "UNKNOWN"' );
assert.ok( languageData.getLanguagesInTerritory( 'RU' ).includes( 'sah' ), 'Sakha language is spoken in Russia' );
languagesAM = [ 'atj', 'chr', 'chy', 'cr', 'en', 'es', 'fr', 'gn', 'haw', 'ike-cans', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'yi' ];
assert.deepEqual(
languageData.sortByScriptGroup( languagesAM.sort( languageData.sortByAutonym ) ),
[ 'atj', 'gn', 'en', 'es', 'fr', 'haw', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'chy', 'yi', 'ike-cans', 'cr', 'chr' ],
'languages in region AM are ordered correctly by script group'
);
} );
it( 'scripts', function () {
// This test assumes that we don't want any scripts to be in the 'Other'
// group. Actually, this may become wrong some day.
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
assert.deepEqual( languageData.getLanguagesInScript( 'Guru' ), [ 'pa-guru' ], '"pa-guru" is written in script Guru, and "pa" is skipped as a redirect' );
assert.deepEqual( languageData.getLanguagesInScripts( [ 'Geor', 'Armn' ] ), [ 'hy', 'hyw', 'ka', 'xmf' ], 'languages in scripts Geor and Armn are selected correctly' );
assert.deepEqual( languageData.getLanguagesInScript( 'Knda' ), [
'kn', 'tcy'
], 'languages in script Knda are selected correctly' );
assert.strictEqual( languageData.getGroupOfScript( 'Beng' ), 'SouthAsian', 'Bengali script belongs to the SouthAsian group.' );
assert.strictEqual( languageData.getScriptGroupOfLanguage( 'iu' ), 'NativeAmerican', 'The script of the Inupiaq language belongs to the NativeAmerican group.' );
} );
it( 'redirects', function () {
assert.strictEqual( languageData.isRedirect( 'sr-ec' ), 'sr-cyrl', '"sr-ec" is a redirect to "sr-cyrl"' );
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
assert.deepEqual( doubleAutonyms(), [], 'All languages have distinct autonyms.' );
assert.strictEqual( languageData.getScript( 'no-such-language' ), 'Zyyy', 'A script for an unknown language is Zyyy - undetermined' );
assert.strictEqual( languageData.getScript( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' );
} );
it( 'directionality', function () {
assert.strictEqual( languageData.isRtl( 'te' ), false, 'Telugu language is not RTL' );
assert.strictEqual( languageData.isRtl( 'dv' ), true, 'Divehi language is RTL' );
assert.strictEqual( languageData.getDir( 'mzn' ), 'rtl', 'Mazandarani language is RTL' );
assert.strictEqual( languageData.getDir( 'uk' ), 'ltr', 'Ukrainian language is LTR' );
} );
} );

View File

@@ -0,0 +1,275 @@
<?php
require __DIR__ . '/../../src/LanguageData.php';
use PHPUnit\Framework\TestCase;
use Wikimedia\LanguageData;
/**
* @coversDefaultClass \Wikimedia\LanguageData
*/
class LanguageDataTest extends TestCase {
/**
* @var LanguageData
*/
protected $languageData;
private const UNKNOWN_LANGUAGE_CODE = 'xyz';
protected function setUp(): void {
parent::setUp();
$this->languageData = LanguageData::get();
}
/**
* @covers isKnown
*/
public function testIsKnown() {
$this->assertTrue( $this->languageData->isKnown( 'en' ) );
$this->assertFalse( $this->languageData->isKnown( self::UNKNOWN_LANGUAGE_CODE ) );
}
/**
* @covers isRedirect
*/
public function testIsRedirect() {
$this->assertFalse( $this->languageData->isRedirect( 'en' ) );
$this->assertEquals( $this->languageData->isRedirect( 'aeb' ), 'aeb-arab' );
}
/**
* @covers getScript
*/
public function testGetScript() {
$this->assertEquals( $this->languageData->getScript( 'en' ), 'Latn' );
$this->assertFalse( $this->languageData->getScript( self::UNKNOWN_LANGUAGE_CODE ) );
}
/**
* @covers getRegions
*/
public function testGetRegions() {
$this->assertFalse( $this->languageData->getRegions( self::UNKNOWN_LANGUAGE_CODE ) );
$this->assertEquals( [ 'AF' ], $this->languageData->getRegions( 'aeb' ) );
$expected = [ 'EU', 'AM', 'AS' ];
$regions = $this->languageData->getRegions( 'en' );
foreach ( $expected as $region ) {
$this->assertContains( $region, $regions );
}
}
/**
* @covers getAutonym
*/
public function testGetAutonym() {
$this->assertFalse( $this->languageData->getAutonym( self::UNKNOWN_LANGUAGE_CODE ) );
$this->assertEquals(
'تونسي',
$this->languageData->getAutonym( 'aeb' ),
'Redirects return proper value in getAutonym.'
);
$this->assertEquals( 'English', $this->languageData->getAutonym( 'en' ) );
}
/**
* @covers getAutonyms
*/
public function testGetAutonyms() {
$autonyms = $this->languageData->getAutonyms();
$this->assertEquals( 'English', $autonyms['en'] );
$this->assertFalse(
isset( $autonyms['aeb'] ),
'Redirects are not present in getAutonyms.'
);
}
/**
* @covers getLanguagesInScripts
*/
public function testGetLanguagesInScripts() {
$this->assertEmpty(
$this->languageData->getLanguagesInScripts( [ self::UNKNOWN_LANGUAGE_CODE ] )
);
$expectedValues = $this->languageData->getLanguagesInScripts( [ 'Latn', 'Grek' ] );
$this->assertContains( 'zu', $expectedValues );
$this->assertContains( 'pnt', $expectedValues );
$this->assertNotContains(
'sr-el',
$expectedValues,
'Redirects are not present when fetching languages in scripts.'
);
}
/**
* @covers getGroupOfScript
*/
public function testGetGroupOfScript() {
$this->assertEquals( 'Latin', $this->languageData->getGroupOfScript( 'Latn' ) );
$this->assertEquals(
LanguageData::OTHER_SCRIPT_GROUP,
$this->languageData->getGroupOfScript( self::UNKNOWN_LANGUAGE_CODE )
);
}
/**
* @covers getScriptGroupOfLanguage
*/
public function testGetScriptGroupOfLanguage() {
$this->assertEquals(
LanguageData::OTHER_SCRIPT_GROUP,
$this->languageData->getScriptGroupOfLanguage( self::UNKNOWN_LANGUAGE_CODE )
);
$this->assertEquals(
'Latin',
$this->languageData->getScriptGroupOfLanguage( 'en' )
);
}
/**
* @covers getLanguagesByScriptGroup
*/
public function testGetLanguagesByScriptGroup() {
$actuals = $this->languageData->getLanguagesByScriptGroup( [ 'en', 'sr-el', 'tt-cyrl' ] );
$this->assertContains( 'tt-cyrl', $actuals['Cyrillic'] );
$this->assertContains( 'en', $actuals['Latin'] );
$this->assertContains( 'sr-el', $actuals['Latin'] );
}
/**
* @covers getLanguagesByScriptGroupInRegions
*/
public function testGetLanguagesByScriptGroupInRegions() {
$actuals = $this->languageData->getLanguagesByScriptGroupInRegions( [ 'AS', 'PA' ] );
$this->assertContains( 'tpi', $actuals['Latin'] );
$this->assertContains( 'ug-arab', $actuals['Arabic'] );
$this->assertContains( 'zh-sg', $actuals['CJK'] );
$this->assertNotContains(
'azb',
$actuals['Arabic'],
'Redirects are not present when languages grouped by script in a region.'
);
}
/**
* @covers sortByAutonym
*/
public function testSortByAutonym() {
$sorted = $this->languageData->sortByAutonym(
[
'atj', 'chr', 'chy',
'cr', 'en', 'es',
'fr', 'gn', 'haw',
'ike-cans', 'ik', 'kl',
'nl', 'pt', 'qu',
'srn', 'yi', self::UNKNOWN_LANGUAGE_CODE
]
);
$this->assertEquals(
[
'atj', 'gn', 'en',
'es', 'fr', 'haw',
'ik', 'kl', 'nl',
'pt', 'qu', 'srn',
'chy', 'yi', 'chr',
'ike-cans', 'cr'
],
$sorted
);
}
/**
* @covers sortByScriptGroup
*/
public function testSortByScriptGroup() {
$sorted = $this->languageData->sortByScriptGroup(
$this->languageData->sortByAutonym(
[
'atj', 'chr', 'chy',
'cr', 'en', 'es',
'fr', 'gn', 'haw',
'ike-cans', 'ik', 'kl',
'nl', 'pt', 'qu',
'srn', 'yi', self::UNKNOWN_LANGUAGE_CODE
]
)
);
$this->assertEquals(
[
'atj', 'gn', 'en',
'es', 'fr', 'haw',
'ik', 'kl', 'nl',
'pt', 'qu', 'srn',
'chy', 'yi', 'chr',
'ike-cans', 'cr'
],
$sorted
);
}
/**
* @covers isRtl
*/
public function testIsRtl() {
$this->assertFalse( $this->languageData->isRtl( 'en' ) );
$this->assertFalse( $this->languageData->isRtl( self::UNKNOWN_LANGUAGE_CODE ) );
$this->assertTrue( $this->languageData->isRtl( 'he' ) );
}
/**
* @covers getDir
*/
public function testGetDir() {
$this->assertEquals( 'ltr', $this->languageData->getDir( 'en' ) );
$this->assertEquals( 'rtl', $this->languageData->getDir( 'he' ) );
$this->assertFalse( $this->languageData->getDir( self::UNKNOWN_LANGUAGE_CODE ) );
}
/**
* @covers getLanguagesInTerritory
*/
public function testGetLanguagesInTerritory() {
$actualsAFG = $this->languageData->getLanguagesInTerritory( 'AF' );
$actualsAT = $this->languageData->getLanguagesInTerritory( 'AT' );
$this->assertContains( 'de', $actualsAT );
$this->assertContains( 'bar', $actualsAT );
$this->assertNotContains( 'he', $actualsAT );
$this->assertContains( 'ug-arab', $actualsAFG );
$this->assertContains( 'tk', $actualsAFG );
$this->assertNotContains( 'de', $actualsAFG );
}
/**
* @covers addLanguage
*/
public function testAddLanguage() {
$this->assertFalse( $this->languageData->isKnown( 'xyz' ) );
$this->assertNotContains(
'xyz',
$this->languageData->getLanguagesByScriptGroupInRegion( 'AF' )['Latin']
);
$this->languageData->addLanguage( self::UNKNOWN_LANGUAGE_CODE, [
'script' => "Latn",
'regions' => [
"AF"
],
'autonym' => "Test Language"
] );
$this->assertTrue( $this->languageData->isKnown( self::UNKNOWN_LANGUAGE_CODE ) );
$this->assertContains(
self::UNKNOWN_LANGUAGE_CODE,
$this->languageData->getLanguagesByScriptGroupInRegion( 'AF' )['Latin']
);
}
}