Update language name search index
I noticed some language names are not searchable. I made it so that autonyms from language-data are added to the search index. Without this, languages not present in Names.php or in the CLDR extension are not searchable via the API except by language code. Change-Id: I51a9e2eb15fb40963e6edbf1db76133d84de7291
This commit is contained in:
@@ -36,12 +36,26 @@ class LanguageNameIndexer extends Maintenance {
|
||||
// Avoid local configuration leaking to this script
|
||||
$wgExtraLanguageNames = [];
|
||||
|
||||
$languages = Language::fetchLanguageNames( null, 'all' );
|
||||
$languageNames = [];
|
||||
// Add languages from language-data
|
||||
$ulsLanguages = $this->getLanguageData()[ 'languages' ];
|
||||
foreach ( $ulsLanguages as $languageCode => $languageEntry ) {
|
||||
// Redirect have only one item
|
||||
if ( isset( $languageEntry[ 2 ] ) ) {
|
||||
$languageNames[ 'autonyms' ][ $languageCode ] = $languageEntry[ 2 ];
|
||||
}
|
||||
}
|
||||
|
||||
// Languages and their names in different languages from Names.php and the cldr extension
|
||||
// This comes after $ulsLanguages so that for example the als/gsw mixup is using the code
|
||||
// used in the Wikimedia world.
|
||||
$mwLanguages = Language::fetchLanguageNames( null, 'all' );
|
||||
foreach ( array_keys( $mwLanguages ) as $languageCode ) {
|
||||
$languageNames[ $languageCode ] = LanguageNames::getNames( $languageCode, 0, 2 );
|
||||
}
|
||||
|
||||
$buckets = [];
|
||||
foreach ( $languages as $sourceLanguage => $autonym ) {
|
||||
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
||||
|
||||
foreach ( $languageNames as $translations ) {
|
||||
foreach ( $translations as $targetLanguage => $translation ) {
|
||||
// Remove directionality markers used in Names.php: users are not
|
||||
// going to type these.
|
||||
@@ -128,6 +142,15 @@ class LanguageNameIndexer extends Maintenance {
|
||||
$this->generateFile( $buckets );
|
||||
}
|
||||
|
||||
private function getLanguageData() {
|
||||
$file = __DIR__ . '/../lib/jquery.uls/src/jquery.uls.data.js';
|
||||
$contents = file_get_contents( $file );
|
||||
preg_match( '/.*\$\.uls\.data = (.*?)} \( jQuery \)/s', $contents, $matches );
|
||||
$json = $matches[ 1 ];
|
||||
$data = json_decode( $json, true );
|
||||
return $data;
|
||||
}
|
||||
|
||||
private function generateFile( array $buckets ) {
|
||||
$template = <<<PHP
|
||||
<?php
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -103,10 +103,12 @@ class LanguageSearchTest extends PHPUnit\Framework\TestCase {
|
||||
'zh' => 'chinese',
|
||||
'zh-cn' => 'chinese (china)',
|
||||
'zh-hk' => 'chinese (hong kong)',
|
||||
'zh-mo' => 'chinese (macau)',
|
||||
'zh-my' => 'chinese (malaysia)',
|
||||
'zh-min-nan' => 'chinese (min nan)',
|
||||
'zh-sg' => 'chinese (singapore)',
|
||||
'zh-tw' => 'chinese (taiwan)',
|
||||
'zh-hans' => 'chinese simplified',
|
||||
'zh-hans' => 'chinese simplificate',
|
||||
'zh-hant' => 'chinese traditional',
|
||||
'zh-classical' => 'chinese — classical chinese',
|
||||
'gan' => 'chinese — gan chinese',
|
||||
|
||||
Reference in New Issue
Block a user