Add a PHP interface to work with the language data
Additional changes in this PR include, * Added composer.json * Refactored the folder structure. * Added editorconfig. * Added PHPCS and formatted existing code. * Changes to use a single license - GPL-2.0-or-later * ESLint related fixes Bug: T218639
This commit is contained in:
371
src/LanguageData.php
Normal file
371
src/LanguageData.php
Normal file
@@ -0,0 +1,371 @@
|
||||
<?php
|
||||
/**
|
||||
* Contains a utility class to query the language data.
|
||||
*
|
||||
* @file
|
||||
* @license GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
namespace Wikimedia;
|
||||
|
||||
/**
|
||||
* Utility class to query the language data.
|
||||
*/
|
||||
class LanguageData {
|
||||
private static $instance;
|
||||
|
||||
public const OTHER_SCRIPT_GROUP = 'Other';
|
||||
|
||||
private const LANGUAGE_DATA_PATH = '../data/language-data.json';
|
||||
|
||||
private $data;
|
||||
|
||||
/**
|
||||
* Returns an instance of the class
|
||||
* @return LanguageData
|
||||
*/
|
||||
public static function get(): LanguageData {
|
||||
if ( self::$instance === null ) {
|
||||
self::$instance = new LanguageData();
|
||||
self::$instance->loadData();
|
||||
}
|
||||
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
private function loadData() {
|
||||
$this->data = json_decode( file_get_contents( __DIR__ . '/' . self::LANGUAGE_DATA_PATH ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a language code is valid
|
||||
* @param string $languageCode
|
||||
* @return bool
|
||||
*/
|
||||
public function isKnown( string $languageCode ): bool {
|
||||
return isset( $this->data->languages->$languageCode );
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this language a redirect to another language?
|
||||
* @param string $languageCode Language code
|
||||
* @return string|bool Target language code if it's a redirect or false if it's not
|
||||
*/
|
||||
public function isRedirect( string $languageCode ) {
|
||||
if (
|
||||
$this->isKnown( $languageCode ) &&
|
||||
count( $this->getLanguage( $languageCode ) ) === 1
|
||||
) {
|
||||
return $this->getLanguage( $languageCode )[0];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the languages
|
||||
* @return object
|
||||
*/
|
||||
public function getLanguages() {
|
||||
return $this->data->languages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script of the language or false
|
||||
* @param string $languageCode
|
||||
* @return string|bool Language script if its a known language, else false.
|
||||
*/
|
||||
public function getScript( string $languageCode ) {
|
||||
if ( !$this->isKnown( $languageCode ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$targetCode = $this->isRedirect( $languageCode );
|
||||
if ( $targetCode ) {
|
||||
return $this->getScript( $targetCode );
|
||||
}
|
||||
|
||||
return $this->getLanguage( $languageCode )[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the regions in which a language is spoken.
|
||||
* @param string $languageCode
|
||||
* @return string[]|bool Array of regions or false if language is unknown.
|
||||
*/
|
||||
public function getRegions( string $languageCode ) {
|
||||
if ( !$this->isKnown( $languageCode ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$targetCode = $this->isRedirect( $languageCode );
|
||||
if ( $targetCode ) {
|
||||
return $this->getRegions( $targetCode );
|
||||
}
|
||||
|
||||
return $this->getLanguage( $languageCode )[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the autonym of the language.
|
||||
* @param string $languageCode Language code
|
||||
* @return string|bool
|
||||
*/
|
||||
public function getAutonym( $languageCode ) {
|
||||
if ( !$this->isKnown( $languageCode ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$targetCode = $this->isRedirect( $languageCode );
|
||||
if ( $targetCode ) {
|
||||
return $this->getAutonym( $targetCode );
|
||||
}
|
||||
|
||||
$language = $this->getLanguage( $languageCode );
|
||||
return count( $language ) >= 2 ? $language[2] : $languageCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all language codes and corresponding autonyms
|
||||
* @return array
|
||||
*/
|
||||
public function getAutonyms(): array {
|
||||
$languages = $this->getLanguages();
|
||||
$languageAutonyms = [];
|
||||
foreach ( $languages as $languageCode => $languageData ) {
|
||||
if ( $this->isRedirect( $languageCode ) ) {
|
||||
continue;
|
||||
}
|
||||
$languageAutonyms[$languageCode] = $this->getAutonym( $languageCode );
|
||||
}
|
||||
|
||||
return $languageAutonyms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all languages written in the given scripts.
|
||||
* @param string[] $scripts
|
||||
* @return string[]
|
||||
*/
|
||||
public function getLanguagesInScripts( array $scripts ): array {
|
||||
$languages = $this->getLanguages();
|
||||
$languagesInScripts = [];
|
||||
foreach ( $languages as $languageCode => $languageData ) {
|
||||
if ( $this->isRedirect( $languageCode ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$script = $this->getScript( $languageCode );
|
||||
if ( in_array( $script, $scripts ) ) {
|
||||
$languagesInScripts[] = $languageCode;
|
||||
}
|
||||
}
|
||||
|
||||
return $languagesInScripts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all languages written in the given script.
|
||||
* @param string $script
|
||||
* @return string[]
|
||||
*/
|
||||
public function getLanguagesInScript( string $script ): array {
|
||||
return $this->getLanguagesInScripts( [ $script ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script group of a script or 'Other' if it doesn't
|
||||
* belong to any group.
|
||||
* @param string $script Script code
|
||||
* @return string script group name
|
||||
*/
|
||||
public function getGroupOfScript( string $script ): string {
|
||||
$scriptGroups = $this->data->scriptgroups;
|
||||
foreach ( $scriptGroups as $scriptGroup => $scriptGroupData ) {
|
||||
if ( in_array( $script, $scriptGroupData ) ) {
|
||||
return $scriptGroup;
|
||||
}
|
||||
}
|
||||
|
||||
return self::OTHER_SCRIPT_GROUP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script group of a language.
|
||||
* @param string $languageCode Language code
|
||||
* @return string script group name
|
||||
*/
|
||||
public function getScriptGroupOfLanguage( string $languageCode ): string {
|
||||
return $this->getGroupOfScript( $this->getScript( $languageCode ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the list of languages passed, grouped by script.
|
||||
* @param string[] $languageCodes Array of language codes to group
|
||||
* @return array Array of language codes grouped by script
|
||||
*/
|
||||
public function getLanguagesByScriptGroup( array $languageCodes ): array {
|
||||
$languagesByScriptGroup = [];
|
||||
|
||||
foreach ( $languageCodes as $languageCode ) {
|
||||
if ( !$this->isKnown( $languageCode ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$targetLanguageCode = $this->isRedirect( $languageCode );
|
||||
if ( $targetLanguageCode === false ) {
|
||||
$targetLanguageCode = $languageCode;
|
||||
}
|
||||
$langScriptGroup = $this->getScriptGroupOfLanguage( $targetLanguageCode );
|
||||
|
||||
if ( !isset( $languagesByScriptGroup[$langScriptGroup] ) ) {
|
||||
$languagesByScriptGroup[$langScriptGroup] = [];
|
||||
}
|
||||
|
||||
$languagesByScriptGroup[$langScriptGroup][] = $languageCode;
|
||||
}
|
||||
|
||||
return $languagesByScriptGroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an associative array of languages in several regions,
|
||||
* grouped by script group.
|
||||
* @param string[] $regions array of region codes
|
||||
* @return array
|
||||
*/
|
||||
public function getLanguagesByScriptGroupInRegions( array $regions ): array {
|
||||
$languagesByScriptGroupInRegions = [];
|
||||
$languages = $this->getLanguages();
|
||||
|
||||
foreach ( $languages as $languageCode => $languageData ) {
|
||||
if ( $this->isRedirect( $languageCode ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$languageRegions = $this->getRegions( $languageCode );
|
||||
foreach ( $regions as $region ) {
|
||||
if ( !in_array( $region, $languageRegions ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$langScriptGroup = $this->getScriptGroupOfLanguage( $languageCode );
|
||||
if ( !isset( $languagesByScriptGroupInRegions[$langScriptGroup] ) ) {
|
||||
$languagesByScriptGroupInRegions[$langScriptGroup] = [];
|
||||
}
|
||||
|
||||
$languagesByScriptGroupInRegions[$langScriptGroup][] = $languageCode;
|
||||
}
|
||||
}
|
||||
|
||||
return $languagesByScriptGroupInRegions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an associative array of languages in a region, grouped by their script.
|
||||
* @param string $region Region code
|
||||
* @return array
|
||||
*/
|
||||
public function getLanguagesByScriptGroupInRegion( $region ): array {
|
||||
return $this->getLanguagesByScriptGroupInRegions( [ $region ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the list of languages sorted by script groups.
|
||||
* @param string[] $languageCodes Array of language codes to sort
|
||||
* @return string[] Array of language codes
|
||||
*/
|
||||
public function sortByScriptGroup( array $languageCodes ) {
|
||||
$groupedLanguageData = $this->getLanguagesByScriptGroup( $languageCodes );
|
||||
ksort( $groupedLanguageData, SORT_STRING | SORT_FLAG_CASE );
|
||||
|
||||
$sortedLanguageData = [];
|
||||
foreach ( $groupedLanguageData as $languageData ) {
|
||||
$sortedLanguageData = array_merge( $sortedLanguageData, $languageData );
|
||||
}
|
||||
|
||||
return $sortedLanguageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort languages by their autonym.
|
||||
* @param string[] $languageCodes
|
||||
* @return string[]
|
||||
*/
|
||||
public function sortByAutonym( array $languageCodes ): array {
|
||||
$sortedLanguages = [];
|
||||
foreach ( $languageCodes as $languageCode ) {
|
||||
$autonym = $this->getAutonym( $languageCode );
|
||||
if ( $autonym !== false ) {
|
||||
$sortedLanguages[$languageCode] = $autonym;
|
||||
}
|
||||
}
|
||||
|
||||
asort( $sortedLanguages, SORT_STRING | SORT_FLAG_CASE );
|
||||
|
||||
return array_keys( $sortedLanguages );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a language is right-to-left.
|
||||
* @param string $languageCode Language code
|
||||
* @return bool
|
||||
*/
|
||||
public function isRtl( string $languageCode ): bool {
|
||||
$script = $this->getScript( $languageCode );
|
||||
return in_array( $script, $this->data->rtlscripts );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the direction of the language. Returns false if the direction is unknown.
|
||||
* @param string $languageCode Language code
|
||||
* @return string|bool
|
||||
*/
|
||||
public function getDir( string $languageCode ) {
|
||||
if ( $this->isKnown( $languageCode ) ) {
|
||||
return $this->isRtl( $languageCode ) ? 'rtl' : 'ltr';
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the languages spoken in a territory.
|
||||
* @param string $territory Territory code
|
||||
* @return string[]|bool list of language codes
|
||||
*/
|
||||
public function getLanguagesInTerritory( string $territory ) {
|
||||
if ( isset( $this->data->territories->$territory ) ) {
|
||||
return $this->data->territories->$territory;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a language in run time and sets its options as provided.
|
||||
* If the target option is provided, the language is defined as a redirect.
|
||||
* Other possible options are script, regions and autonym.
|
||||
* @param string $languageCode New language code.
|
||||
* @param array $options Language properties.
|
||||
*/
|
||||
public function addLanguage( string $languageCode, array $options ): void {
|
||||
$languages = $this->getLanguages();
|
||||
if ( isset( $options['target'] ) ) {
|
||||
$languages->$languageCode = [ $options['target'] ];
|
||||
} else {
|
||||
$languages->$languageCode =
|
||||
[ $options['script'], $options['regions'], $options['autonym'] ];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the language data based on language code. Performs no check, meant for
|
||||
* internal use only.
|
||||
* @param string $languageCode
|
||||
* @return array
|
||||
*/
|
||||
private function getLanguage( string $languageCode ): array {
|
||||
return $this->data->languages->$languageCode;
|
||||
}
|
||||
}
|
||||
305
src/index.js
Normal file
305
src/index.js
Normal file
@@ -0,0 +1,305 @@
|
||||
var languageData = require( '../data/language-data.json' );
|
||||
|
||||
/**
|
||||
* Utility functions for querying language data.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Check whether the languageCode is known to the language database.
|
||||
* For practical purposes it may be same as checking if given language code is valid,
|
||||
* but not guaranteed that all valid language codes are in our database.
|
||||
* @param {string} languageCode language code
|
||||
* @return {boolean}
|
||||
*/
|
||||
function isKnown( languageCode ) {
|
||||
return !!languageData.languages[ languageCode ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this language a redirect to another language?
|
||||
* @param {string} language Language code
|
||||
* @return {string} Target language code if it's a redirect or false if it's not
|
||||
*/
|
||||
function isRedirect( language ) {
|
||||
return ( isKnown( language ) && languageData.languages[ language ].length === 1 ) ?
|
||||
languageData.languages[ language ][ 0 ] : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the languages
|
||||
* @return {Object}
|
||||
*/
|
||||
function getLanguages() {
|
||||
return languageData.languages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script of the language.
|
||||
* @param {string} language Language code
|
||||
* @return {string}
|
||||
*/
|
||||
function getScript( language ) {
|
||||
var target = isRedirect( language );
|
||||
if ( target ) {
|
||||
return getScript( target );
|
||||
}
|
||||
if ( !isKnown( language ) ) {
|
||||
// Undetermined
|
||||
return 'Zyyy';
|
||||
}
|
||||
return languageData.languages[ language ][ 0 ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the regions in which a language is spoken.
|
||||
* @param {string} language Language code
|
||||
* @return {string[]} 'UNKNOWN'
|
||||
*/
|
||||
function getRegions( language ) {
|
||||
var target = isRedirect( language );
|
||||
if ( target ) {
|
||||
return getRegions( target );
|
||||
}
|
||||
return ( isKnown( language ) && languageData.languages[ language ][ 1 ] ) || 'UNKNOWN';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the autonym of the language.
|
||||
* @param {string} language Language code
|
||||
* @return {string}
|
||||
*/
|
||||
function getAutonym( language ) {
|
||||
var target = isRedirect( language );
|
||||
if ( target ) {
|
||||
return getAutonym( target );
|
||||
}
|
||||
return ( isKnown( language ) && languageData.languages[ language ][ 2 ] ) || language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all language codes and corresponding autonyms
|
||||
* @return {Array}
|
||||
*/
|
||||
function getAutonyms() {
|
||||
var language,
|
||||
autonymsByCode = {};
|
||||
for ( language in languageData.languages ) {
|
||||
if ( isRedirect( language ) ) {
|
||||
continue;
|
||||
}
|
||||
autonymsByCode[ language ] = getAutonym( language );
|
||||
}
|
||||
return autonymsByCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all languages written in the given scripts.
|
||||
* @param {string[]} scripts
|
||||
* @return {string[]} languages codes
|
||||
*/
|
||||
function getLanguagesInScripts( scripts ) {
|
||||
var language, i,
|
||||
languagesInScripts = [];
|
||||
for ( language in languageData.languages ) {
|
||||
if ( isRedirect( language ) ) {
|
||||
continue;
|
||||
}
|
||||
for ( i = 0; i < scripts.length; i++ ) {
|
||||
if ( scripts[ i ] === getScript( language ) ) {
|
||||
languagesInScripts.push( language );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return languagesInScripts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all languages written in script.
|
||||
* @param {string} script
|
||||
* @return {string[]} array of strings (languages codes)
|
||||
*/
|
||||
function getLanguagesInScript( script ) {
|
||||
return getLanguagesInScripts( [ script ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script group of a script or 'Other' if it doesn't
|
||||
* belong to any group.
|
||||
* @param {string} script Script code
|
||||
* @return {string} script group name
|
||||
*/
|
||||
function getGroupOfScript( script ) {
|
||||
var scriptGroup;
|
||||
for ( scriptGroup in languageData.scriptgroups ) {
|
||||
if ( languageData.scriptgroups[ scriptGroup ].includes( script ) ) {
|
||||
return scriptGroup;
|
||||
}
|
||||
}
|
||||
return 'Other';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the script group of a language.
|
||||
* @param {string} language Language code
|
||||
* @return {string} script group name
|
||||
*/
|
||||
function getScriptGroupOfLanguage( language ) {
|
||||
return getGroupOfScript( getScript( language ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the given list of languages grouped by script.
|
||||
* @param {string[]} languages Array of language codes to group
|
||||
* @return {string[]} Array of language codes
|
||||
*/
|
||||
function getLanguagesByScriptGroup( languages ) {
|
||||
var languagesByScriptGroup = {},
|
||||
language, languageIndex, resolvedRedirect, langScriptGroup;
|
||||
|
||||
for ( languageIndex = 0; languageIndex < languages.length; languageIndex++ ) {
|
||||
language = languages[ languageIndex ];
|
||||
resolvedRedirect = isRedirect( language ) || language;
|
||||
langScriptGroup = getScriptGroupOfLanguage( resolvedRedirect );
|
||||
if ( !languagesByScriptGroup[ langScriptGroup ] ) {
|
||||
languagesByScriptGroup[ langScriptGroup ] = [];
|
||||
}
|
||||
languagesByScriptGroup[ langScriptGroup ].push( language );
|
||||
}
|
||||
return languagesByScriptGroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an associative array of languages in several regions,
|
||||
* grouped by script group.
|
||||
* @param {string[]} regions array of region codes
|
||||
* @return {Object}
|
||||
*/
|
||||
function getLanguagesByScriptGroupInRegions( regions ) {
|
||||
var language, i, scriptGroup,
|
||||
languagesByScriptGroupInRegions = {};
|
||||
for ( language in languageData.languages ) {
|
||||
if ( isRedirect( language ) ) {
|
||||
continue;
|
||||
}
|
||||
for ( i = 0; i < regions.length; i++ ) {
|
||||
if ( getRegions( language ).includes( regions[ i ] ) ) {
|
||||
scriptGroup = getScriptGroupOfLanguage( language );
|
||||
if ( languagesByScriptGroupInRegions[ scriptGroup ] === undefined ) {
|
||||
languagesByScriptGroupInRegions[ scriptGroup ] = [];
|
||||
}
|
||||
languagesByScriptGroupInRegions[ scriptGroup ].push( language );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return languagesByScriptGroupInRegions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an associative array of languages in a region,
|
||||
* grouped by script group.
|
||||
* @param {string} region Region code
|
||||
* @return {Object}
|
||||
*/
|
||||
function getLanguagesByScriptGroupInRegion( region ) {
|
||||
return getLanguagesByScriptGroupInRegions( [ region ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the list of languages sorted by script groups.
|
||||
* @param {string[]} languages Array of language codes to sort
|
||||
* @return {string[]} Array of language codes
|
||||
*/
|
||||
function sortByScriptGroup( languages ) {
|
||||
var groupedLanguages, scriptGroups, i,
|
||||
allLanguages = [];
|
||||
|
||||
groupedLanguages = getLanguagesByScriptGroup( languages );
|
||||
scriptGroups = Object.keys( groupedLanguages ).sort();
|
||||
|
||||
for ( i = 0; i < scriptGroups.length; i++ ) {
|
||||
allLanguages = allLanguages.concat( groupedLanguages[ scriptGroups[ i ] ] );
|
||||
}
|
||||
|
||||
return allLanguages;
|
||||
}
|
||||
|
||||
/**
|
||||
* A callback for sorting languages by autonym.
|
||||
* Can be used as an argument to a sort function.
|
||||
* @param {string} a Language code
|
||||
* @param {string} b Language code
|
||||
* @return {number}
|
||||
*/
|
||||
function sortByAutonym( a, b ) {
|
||||
var autonymA = getAutonym( a ) || a,
|
||||
autonymB = getAutonym( b ) || b;
|
||||
return ( autonymA.toLowerCase() < autonymB.toLowerCase() ) ? -1 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a language is right-to-left.
|
||||
* @param {string} language Language code
|
||||
* @return {boolean}
|
||||
*/
|
||||
function isRtl( language ) {
|
||||
return languageData.rtlscripts.includes( getScript( language ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the direction of the language
|
||||
* @param {string} language Language code
|
||||
* @return {string}
|
||||
*/
|
||||
function getDir( language ) {
|
||||
return isRtl( language ) ? 'rtl' : 'ltr';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the languages spoken in a territory.
|
||||
* @param {string} territory Territory code
|
||||
* @return {string[]} list of language codes
|
||||
*/
|
||||
function getLanguagesInTerritory( territory ) {
|
||||
return languageData.territories[ territory ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a language in run time and sets its options as provided.
|
||||
* If the target option is provided, the language is defined as a redirect.
|
||||
* Other possible options are script, regions and autonym.
|
||||
*
|
||||
* @param {string} code New language code.
|
||||
* @param {Object} options Language properties.
|
||||
*/
|
||||
function addLanguage( code, options ) {
|
||||
if ( options.target ) {
|
||||
languageData.languages[ code ] = [ options.target ];
|
||||
} else {
|
||||
languageData.languages[ code ] = [ options.script, options.regions, options.autonym ];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
addLanguage,
|
||||
getAutonym,
|
||||
getAutonyms,
|
||||
getDir,
|
||||
getGroupOfScript,
|
||||
getLanguages,
|
||||
getLanguagesByScriptGroup,
|
||||
getLanguagesByScriptGroupInRegion,
|
||||
getLanguagesByScriptGroupInRegions,
|
||||
getLanguagesInScript,
|
||||
getLanguagesInScripts,
|
||||
getLanguagesInTerritory,
|
||||
getRegions,
|
||||
getScript,
|
||||
getScriptGroupOfLanguage,
|
||||
isKnown,
|
||||
isRedirect,
|
||||
isRtl,
|
||||
sortByScriptGroup,
|
||||
sortByAutonym
|
||||
};
|
||||
1046
src/util/spyc.php
Normal file
1046
src/util/spyc.php
Normal file
File diff suppressed because it is too large
Load Diff
111
src/util/ulsdata2json.php
Normal file
111
src/util/ulsdata2json.php
Normal file
@@ -0,0 +1,111 @@
|
||||
<?php
|
||||
/**
|
||||
* Script to create the language data in JSON format for ULS.
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @license GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
include __DIR__ . '/spyc.php';
|
||||
|
||||
print "Reading langdb.yaml...\n";
|
||||
$yamlLangdb = file_get_contents( __DIR__ . '/../../data/langdb.yaml' );
|
||||
$parsedLangdb = spyc_load( $yamlLangdb );
|
||||
|
||||
$supplementalDataFilename = 'supplementalData.xml';
|
||||
$supplementalDataUrl =
|
||||
// phpcs:ignore Generic.Files.LineLength
|
||||
"https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/supplementalData.xml";
|
||||
|
||||
$curl = curl_init( $supplementalDataUrl );
|
||||
$supplementalDataFile = fopen( $supplementalDataFilename, 'w' );
|
||||
|
||||
curl_setopt( $curl, CURLOPT_FILE, $supplementalDataFile );
|
||||
curl_setopt( $curl, CURLOPT_HEADER, 0 );
|
||||
|
||||
print "Trying to download $supplementalDataUrl...\n";
|
||||
$curlSuccess = curl_exec( $curl );
|
||||
curl_close( $curl );
|
||||
fclose( $supplementalDataFile );
|
||||
|
||||
if ( !$curlSuccess ) {
|
||||
die( "Failed to download CLDR data from $supplementalDataUrl.\n" );
|
||||
}
|
||||
print "Downloaded $supplementalDataFilename, trying to parse...\n";
|
||||
|
||||
$supplementalData = simplexml_load_file( $supplementalDataFilename );
|
||||
|
||||
if ( !( $supplementalData instanceof SimpleXMLElement ) ) {
|
||||
die( "Attempt to load CLDR data from $supplementalDataFilename failed.\n" );
|
||||
}
|
||||
|
||||
print "CLDR supplemental data parsed successfully, reading territories info...\n";
|
||||
$parsedLangdb['territories'] = [];
|
||||
|
||||
foreach ( $supplementalData->territoryInfo->territory as $territoryRecord ) {
|
||||
$territoryAtributes = $territoryRecord->attributes();
|
||||
$territoryCodeAttr = $territoryAtributes['type'];
|
||||
$territoryCode = (string)$territoryCodeAttr[0];
|
||||
$parsedLangdb['territories'][$territoryCode] = [];
|
||||
|
||||
foreach ( $territoryRecord->languagePopulation as $languageRecord ) {
|
||||
$languageAttributes = $languageRecord->attributes();
|
||||
$languageCodeAttr = $languageAttributes['type'];
|
||||
// Lower case is a convention for language codes in ULS.
|
||||
// '_' is used in CLDR for compound codes and it's replaced with '-' here.
|
||||
|
||||
$normalisedCode = strtr( strtolower( (string)$languageCodeAttr[0] ), '_', '-' );
|
||||
|
||||
$parsedLangdb['territories'][$territoryCode][] = $normalisedCode;
|
||||
|
||||
// In case of codes with variants, also add the base because ULS might consider
|
||||
// them as separate languages, e.g. zh, zh-hant and zh-hans.
|
||||
if ( strpos( $normalisedCode, '-' ) !== false ) {
|
||||
$parts = explode( '-', $normalisedCode );
|
||||
$parsedLangdb['territories'][$territoryCode][] = $parts[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ( $parsedLangdb['territories'] as $territoryCode => $languages ) {
|
||||
foreach ( $languages as $index => $language ) {
|
||||
if ( !isset( $parsedLangdb['languages'][$language] ) ) {
|
||||
echo "Unknown language $language for territory $territoryCode\n";
|
||||
unset( $parsedLangdb['territories'][$territoryCode][$index] );
|
||||
continue;
|
||||
}
|
||||
|
||||
$data = $parsedLangdb['languages'][$language];
|
||||
if ( count( $data ) === 1 ) {
|
||||
echo "Redirect for language $language to {$data[0]} territory $territoryCode\n";
|
||||
$parsedLangdb['territories'][$territoryCode][$index] = $data[0];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean-up to save space
|
||||
if ( count( $parsedLangdb['territories'][$territoryCode] ) === 0 ) {
|
||||
unset( $parsedLangdb['territories'][$territoryCode] );
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove duplicates we might have created
|
||||
$parsedLangdb['territories'][$territoryCode] =
|
||||
array_unique( $parsedLangdb['territories'][$territoryCode] );
|
||||
|
||||
// We need to renumber or json conversion thinks these are objects
|
||||
$parsedLangdb['territories'][$territoryCode] =
|
||||
array_values( $parsedLangdb['territories'][$territoryCode] );
|
||||
}
|
||||
|
||||
print "Writing JSON langdb...\n";
|
||||
$jsonVerbose = json_encode( $parsedLangdb, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE );
|
||||
// For making diff review easier.
|
||||
file_put_contents( '../language-data.json', $jsonVerbose );
|
||||
|
||||
print "Done.\n";
|
||||
Reference in New Issue
Block a user