Update language search index

The changes are mostly automatic and add languages
that were recently added to language-data
or updated in the CLDR.

I went over the whole long diff and couldn't find
any issues that would affect the usage of the ULS
search box.

Tests are updated to reflect the current names,
but here, too, the language finding functionality
is not supposed to be affected.

Two kinds of tests were affected:
* The name of Hindi (Latin) in Malayalam: I removed
  the test because Hindi (Latin) is not used in
  MediaWiki anyway, so it doesn't matter.
* Some names of Chinese varieties. I updated
  the tests and also added aliases for languages
  whose names were changed, so that searching
  for "chinese" would find them (so it's good
  that we had those tests!).

Change-Id: I22344dadb0b01d7704ab7d76271ab27077daedb3
This commit is contained in:
Amir E. Aharoni
2024-07-11 19:39:44 -04:00
committed by Abijeet
parent dc9fd2e2ce
commit bd0cccacdf
3 changed files with 2417 additions and 550 deletions

View File

@@ -101,6 +101,8 @@ class LanguageNameIndexer extends Maintenance {
$specialLanguages = [ $specialLanguages = [
// Catalan, sometimes searched as "Valencià" // Catalan, sometimes searched as "Valencià"
'ca' => [ 'valencia' ], 'ca' => [ 'valencia' ],
// Compatibility with the old name and other Chinese varieties
'cdo' => [ 'chinese min dong' ],
// Spanish, the transliteration of the autonym is often used for searching // Spanish, the transliteration of the autonym is often used for searching
'es' => [ 'castellano' ], 'es' => [ 'castellano' ],
// Armenian, the transliteration of the autonym is often used for searching // Armenian, the transliteration of the autonym is often used for searching
@@ -117,6 +119,8 @@ class LanguageNameIndexer extends Maintenance {
// are not mapped to any English name // are not mapped to any English name
'zh-hans' => [ 'chinese simplified' ], 'zh-hans' => [ 'chinese simplified' ],
'zh-hant' => [ 'chinese traditional' ], 'zh-hant' => [ 'chinese traditional' ],
// Compatibility with the old name and other Chinese varieties
'zh-min-nan' => [ 'chinese min nan' ],
]; ];
foreach ( $specialLanguages as $targetLanguage => $translations ) { foreach ( $specialLanguages as $targetLanguage => $translations ) {

File diff suppressed because it is too large Load Diff

View File

@@ -40,7 +40,6 @@ class LanguageSearchTest extends PHPUnit\Framework\TestCase {
return [ return [
[ 'ഹിന്ദി', [ [ 'ഹിന്ദി', [
'hi' => 'ഹിന്ദി', 'hi' => 'ഹിന്ദി',
'hi-latn' => 'ഹിന്ദി (ലാറ്റിൻ)'
] ]
], ],
[ 'മല', [ [ 'മല', [
@@ -106,29 +105,25 @@ class LanguageSearchTest extends PHPUnit\Framework\TestCase {
] ]
], ],
[ 'chinese', [ [ 'chinese', [
'zh' => 'chinese', // Presence of CLDR extension affects the results
'zh' => class_exists( LanguageNames::class ) ? 'chinese' : 'chines',
'zh-cn' => 'chinese (china)', 'zh-cn' => 'chinese (china)',
'zh-hk' => 'chinese (hong kong)', 'zh-hk' => 'chinese (hong kong)',
'zh-mo' => 'chinese (macau)', 'zh-mo' => 'chinese (macau)',
'zh-my' => 'chinese (malaysia)', 'zh-my' => 'chinese (malaysia)',
'zh-min-nan' => 'chinese (min nan)',
'zh-sg' => 'chinese (singapore)', 'zh-sg' => 'chinese (singapore)',
'zh-tw' => 'chinese (taiwan)', 'zh-tw' => 'chinese (taiwan)',
'cdo' => 'chinese min dong',
'zh-min-nan' => 'chinese min nan',
'zh-hans' => 'chinese simplificate', 'zh-hans' => 'chinese simplificate',
'zh-hant' => 'chinese traditional', 'zh-hant' => 'chinese traditional',
'zh-classical' => 'chinese — classical chinese',
'gan' => 'chinese — gan chinese',
'hak' => 'chinese — hakka chinese', 'hak' => 'chinese — hakka chinese',
'gan' => 'chinese — isi-gan chinese',
'nan' => 'chinese — isi-min nan chinese', 'nan' => 'chinese — isi-min nan chinese',
'wuu' => 'chinese — isi-wu chinese', 'wuu' => 'chinese — isi-wu chinese',
'hsn' => 'chinese — isi-xiang chinese', 'hsn' => 'chinese — isi-xiang chinese',
'lzh' => 'chinese — literary chinese', 'zh-classical' => 'chinese — literary chinese',
'zh' => 'chinese', 'lzh' => 'chinesesch — klassescht chinesesch',
'zh-min-nan' => 'chinese (min nan)',
'cdo' => 'chinese — min dong chinese',
'cdo-hani' => 'chinese — min dong chinese (hanji)',
'wuu-hans' => 'chinese — wu chinese (simplified)',
'wuu-hant' => 'chinese — wu chinese (traditional)'
] ]
], ],
[ 'finnisj', [ [ 'finnisj', [