Add special language names to facilitate searching

This adds several custom languages.

The addition of Punjabi addresses Bug T178070.

The addition of Chinese addresses Bug T73891.

Georgian and Catalan (Valencian) variant spellings
are added because these are the most frequent languages
that are not found in the ULS search box.

Bug: T73891
Bug: T178070
Change-Id: Ifbb08b560e454643d246379c19f725bde61917e9
This commit is contained in:
Amire80
2017-10-24 17:08:29 +05:30
parent 18c09bc6d3
commit 101532cfa6
3 changed files with 55 additions and 4 deletions

View File

@@ -36,6 +36,7 @@ class LanguageNameIndexer extends Maintenance {
$buckets = [];
foreach ( $languages as $sourceLanguage => $autonym ) {
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
foreach ( $translations as $targetLanguage => $translation ) {
$translation = mb_strtolower( $translation );
// Remove directionality markers used in Names.php: users are not
@@ -46,6 +47,29 @@ class LanguageNameIndexer extends Maintenance {
}
}
// Some languages don't have a conveniently searchable name in CLDR.
// For example, the name of Western Punjabi doesn't start with
// the string "punjabi" in any language, so it cannot be found
// by people who search in English.
// To resolve this, some languages are added here locally.
$specialLanguages = [
// Catalan, sometimes searched as "Valencià"
'ca' => 'valencia',
// Georgian, the transliteration of the autonym is often used for searching
'ka' => 'kartuli',
// Western Punjabi, doesn't start with the word "Punjabi" in any language
'pnb' => 'punjabi western',
// Simplified and Traditional Chinese, because zh-hans and zh-hant
// are not mapped to any English name
'zh-hans' => 'chinese simplified',
'zh-hant' => 'chinese traditional',
];
foreach ( $specialLanguages as $targetLanguage => $translation ) {
$bucket = LanguageNameSearch::getIndex( $translation );
$buckets[$bucket][$translation] = $targetLanguage;
}
$lengths = array_values( array_map( 'count', $buckets ) );
$count = count( $buckets );
$min = min( $lengths );

View File

@@ -2923,6 +2923,8 @@ class LanguageNameSearchData {
'chex' => 'cs',
'cheva' => 'ny',
'chukot' => 'chk',
'chinese simplified' => 'zh-hans',
'chinese traditional' => 'zh-hant',
],
100 => [
'dansk' => 'da',
@@ -2933,6 +2935,7 @@ class LanguageNameSearchData {
'dorerin naoero' => 'na',
'diné bizaad' => 'nv',
'deitsch' => 'pdc',
'davvisámegiella' => 'se',
'deens' => 'da',
'duits' => 'de',
'divehi' => 'dv',
@@ -3172,7 +3175,6 @@ class LanguageNameSearchData {
'divehigiella' => 'dv',
'dzongkhagiella' => 'dz',
'dárogiella' => 'no',
'davvisámegiella' => 'se',
'durkagiella' => 'tr',
'dovdameahttun giella' => 'und',
'divehijski' => 'dv',
@@ -8464,6 +8466,7 @@ class LanguageNameSearchData {
'kreol (nigeriya)' => 'pcm',
'ký hiệu blissymbols' => 'zbl',
'không có nội dung ngôn ngữ' => 'zxx',
'kartuli' => 'ka',
],
108 => [
'la .lojban.' => 'jbo',
@@ -12634,6 +12637,7 @@ class LanguageNameSearchData {
'portugal (braziliya)' => 'pt-br',
'portugal (yevropa)' => 'pt-pt',
'portugänapük' => 'pt',
'punjabi western' => 'pnb',
],
113 => [
'qafár af' => 'aa',
@@ -13051,7 +13055,6 @@ class LanguageNameSearchData {
'sicilianu' => 'scn',
'scots' => 'sco',
'sassaresu' => 'sdc',
'sámegiella' => 'se',
'sängö' => 'sg',
'srpskohrvatski / српскохрватски' => 'sh',
'simple english' => 'simple',
@@ -14838,7 +14841,6 @@ class LanguageNameSearchData {
'tojikī' => 'tg-latn',
'türkmençe' => 'tk',
'tagalog' => 'tl',
'toki pona' => 'tokipona',
'tok pisin' => 'tpi',
'türkçe' => 'tr',
'tatarça' => 'tt-latn',
@@ -17079,6 +17081,7 @@ class LanguageNameSearchData {
'valis' => 'wae',
'volamo' => 'wal',
'valbiri' => 'wbp',
'valencia' => 'ca',
],
119 => [
'west-vlams' => 'vls',
@@ -36749,7 +36752,6 @@ class LanguageNameSearchData {
'タリシュ語' => 'tly',
'ツワナ語' => 'tn',
'トンガ語' => 'to',
'トキポナ' => 'tokipona',
'トク・ピシン語' => 'tpi',
'トルコ語' => 'tr',
'トゥロヨ語' => 'tru',
@@ -36788,6 +36790,7 @@ class LanguageNameSearchData {
'シャウィーア語(アラビア文字)' => 'shy-arab',
'シャウィーア語(ラテン文字)' => 'shy-latn',
'シャウィーア語(ティフナグ文字)' => 'shy-tfng',
'トキポナ' => 'tokipona',
'アチョリ語' => 'ach',
'アダングメ語' => 'ada',
'アヴェスタ語' => 'ae',

View File

@@ -58,6 +58,30 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
'ml' => 'മലയാളം',
]
],
[ 'punja', [
'pa' => 'punjaabi sennii',
'pnb' => 'punjabi western',
]
],
[ 'kartuli', [
'ka' => 'kartuli',
]
],
[ 'valencia', [
'ca' => 'valencia',
]
],
[ 'chinese', [
'zh-hans' => 'chinese simplified',
'zh-hant' => 'chinese traditional',
'zh' => 'chinesesch',
'zh-cn' => 'chinese (china)',
'zh-hk' => 'chinese (hong kong)',
'zh-min-nan' => 'chinese (min nan)',
'zh-sg' => 'chinese (singapore)',
'zh-tw' => 'chinese (taiwan)'
]
],
[ 'finish', [
'fi' => 'finnish'
]