diff --git a/data/LanguageNameIndexer.php b/data/LanguageNameIndexer.php index a4b67ec7..37659509 100644 --- a/data/LanguageNameIndexer.php +++ b/data/LanguageNameIndexer.php @@ -36,6 +36,7 @@ class LanguageNameIndexer extends Maintenance { $buckets = []; foreach ( $languages as $sourceLanguage => $autonym ) { $translations = LanguageNames::getNames( $sourceLanguage, 0, 2 ); + foreach ( $translations as $targetLanguage => $translation ) { $translation = mb_strtolower( $translation ); // Remove directionality markers used in Names.php: users are not @@ -46,6 +47,29 @@ class LanguageNameIndexer extends Maintenance { } } + // Some languages don't have a conveniently searchable name in CLDR. + // For example, the name of Western Punjabi doesn't start with + // the string "punjabi" in any language, so it cannot be found + // by people who search in English. + // To resolve this, some languages are added here locally. + $specialLanguages = [ + // Catalan, sometimes searched as "Valencià" + 'ca' => 'valencia', + // Georgian, the transliteration of the autonym is often used for searching + 'ka' => 'kartuli', + // Western Punjabi, doesn't start with the word "Punjabi" in any language + 'pnb' => 'punjabi western', + // Simplified and Traditional Chinese, because zh-hans and zh-hant + // are not mapped to any English name + 'zh-hans' => 'chinese simplified', + 'zh-hant' => 'chinese traditional', + ]; + + foreach ( $specialLanguages as $targetLanguage => $translation ) { + $bucket = LanguageNameSearch::getIndex( $translation ); + $buckets[$bucket][$translation] = $targetLanguage; + } + $lengths = array_values( array_map( 'count', $buckets ) ); $count = count( $buckets ); $min = min( $lengths ); diff --git a/data/LanguageNameSearchData.php b/data/LanguageNameSearchData.php index 44b7b005..cf34b4eb 100644 --- a/data/LanguageNameSearchData.php +++ b/data/LanguageNameSearchData.php @@ -2923,6 +2923,8 @@ class LanguageNameSearchData { 'chex' => 'cs', 'cheva' => 'ny', 'chukot' => 'chk', + 'chinese simplified' => 'zh-hans', + 'chinese traditional' => 'zh-hant', ], 100 => [ 'dansk' => 'da', @@ -2933,6 +2935,7 @@ class LanguageNameSearchData { 'dorerin naoero' => 'na', 'diné bizaad' => 'nv', 'deitsch' => 'pdc', + 'davvisámegiella' => 'se', 'deens' => 'da', 'duits' => 'de', 'divehi' => 'dv', @@ -3172,7 +3175,6 @@ class LanguageNameSearchData { 'divehigiella' => 'dv', 'dzongkhagiella' => 'dz', 'dárogiella' => 'no', - 'davvisámegiella' => 'se', 'durkagiella' => 'tr', 'dovdameahttun giella' => 'und', 'divehijski' => 'dv', @@ -8464,6 +8466,7 @@ class LanguageNameSearchData { 'kreol (nigeriya)' => 'pcm', 'ký hiệu blissymbols' => 'zbl', 'không có nội dung ngôn ngữ' => 'zxx', + 'kartuli' => 'ka', ], 108 => [ 'la .lojban.' => 'jbo', @@ -12634,6 +12637,7 @@ class LanguageNameSearchData { 'portugal (braziliya)' => 'pt-br', 'portugal (yevropa)' => 'pt-pt', 'portugänapük' => 'pt', + 'punjabi western' => 'pnb', ], 113 => [ 'qafár af' => 'aa', @@ -13051,7 +13055,6 @@ class LanguageNameSearchData { 'sicilianu' => 'scn', 'scots' => 'sco', 'sassaresu' => 'sdc', - 'sámegiella' => 'se', 'sängö' => 'sg', 'srpskohrvatski / српскохрватски' => 'sh', 'simple english' => 'simple', @@ -14838,7 +14841,6 @@ class LanguageNameSearchData { 'tojikī' => 'tg-latn', 'türkmençe' => 'tk', 'tagalog' => 'tl', - 'toki pona' => 'tokipona', 'tok pisin' => 'tpi', 'türkçe' => 'tr', 'tatarça' => 'tt-latn', @@ -17079,6 +17081,7 @@ class LanguageNameSearchData { 'valis' => 'wae', 'volamo' => 'wal', 'valbiri' => 'wbp', + 'valencia' => 'ca', ], 119 => [ 'west-vlams' => 'vls', @@ -36749,7 +36752,6 @@ class LanguageNameSearchData { 'タリシュ語' => 'tly', 'ツワナ語' => 'tn', 'トンガ語' => 'to', - 'トキポナ' => 'tokipona', 'トク・ピシン語' => 'tpi', 'トルコ語' => 'tr', 'トゥロヨ語' => 'tru', @@ -36788,6 +36790,7 @@ class LanguageNameSearchData { 'シャウィーア語(アラビア文字)' => 'shy-arab', 'シャウィーア語(ラテン文字)' => 'shy-latn', 'シャウィーア語(ティフナグ文字)' => 'shy-tfng', + 'トキポナ' => 'tokipona', 'アチョリ語' => 'ach', 'アダングメ語' => 'ada', 'アヴェスタ語' => 'ae', diff --git a/tests/phpunit/LanguageSearchTest.php b/tests/phpunit/LanguageSearchTest.php index 13e07834..bd993f4d 100644 --- a/tests/phpunit/LanguageSearchTest.php +++ b/tests/phpunit/LanguageSearchTest.php @@ -58,6 +58,30 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase { 'ml' => 'മലയാളം', ] ], + [ 'punja', [ + 'pa' => 'punjaabi sennii', + 'pnb' => 'punjabi western', + ] + ], + [ 'kartuli', [ + 'ka' => 'kartuli', + ] + ], + [ 'valencia', [ + 'ca' => 'valencia', + ] + ], + [ 'chinese', [ + 'zh-hans' => 'chinese simplified', + 'zh-hant' => 'chinese traditional', + 'zh' => 'chinesesch', + 'zh-cn' => 'chinese (china)', + 'zh-hk' => 'chinese (hong kong)', + 'zh-min-nan' => 'chinese (min nan)', + 'zh-sg' => 'chinese (singapore)', + 'zh-tw' => 'chinese (taiwan)' + ] + ], [ 'finish', [ 'fi' => 'finnish' ]