Merge "Add special language names to facilitate searching"

This commit is contained in:
jenkins-bot
2017-10-25 09:25:55 +00:00
committed by Gerrit Code Review
3 changed files with 55 additions and 4 deletions

View File

@@ -36,6 +36,7 @@ class LanguageNameIndexer extends Maintenance {
$buckets = [];
foreach ( $languages as $sourceLanguage => $autonym ) {
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
foreach ( $translations as $targetLanguage => $translation ) {
$translation = mb_strtolower( $translation );
// Remove directionality markers used in Names.php: users are not
@@ -46,6 +47,29 @@ class LanguageNameIndexer extends Maintenance {
}
}
// Some languages don't have a conveniently searchable name in CLDR.
// For example, the name of Western Punjabi doesn't start with
// the string "punjabi" in any language, so it cannot be found
// by people who search in English.
// To resolve this, some languages are added here locally.
$specialLanguages = [
// Catalan, sometimes searched as "Valencià"
'ca' => 'valencia',
// Georgian, the transliteration of the autonym is often used for searching
'ka' => 'kartuli',
// Western Punjabi, doesn't start with the word "Punjabi" in any language
'pnb' => 'punjabi western',
// Simplified and Traditional Chinese, because zh-hans and zh-hant
// are not mapped to any English name
'zh-hans' => 'chinese simplified',
'zh-hant' => 'chinese traditional',
];
foreach ( $specialLanguages as $targetLanguage => $translation ) {
$bucket = LanguageNameSearch::getIndex( $translation );
$buckets[$bucket][$translation] = $targetLanguage;
}
$lengths = array_values( array_map( 'count', $buckets ) );
$count = count( $buckets );
$min = min( $lengths );

View File

@@ -2923,6 +2923,8 @@ class LanguageNameSearchData {
'chex' => 'cs',
'cheva' => 'ny',
'chukot' => 'chk',
'chinese simplified' => 'zh-hans',
'chinese traditional' => 'zh-hant',
],
100 => [
'dansk' => 'da',
@@ -2933,6 +2935,7 @@ class LanguageNameSearchData {
'dorerin naoero' => 'na',
'diné bizaad' => 'nv',
'deitsch' => 'pdc',
'davvisámegiella' => 'se',
'deens' => 'da',
'duits' => 'de',
'divehi' => 'dv',
@@ -3172,7 +3175,6 @@ class LanguageNameSearchData {
'divehigiella' => 'dv',
'dzongkhagiella' => 'dz',
'dárogiella' => 'no',
'davvisámegiella' => 'se',
'durkagiella' => 'tr',
'dovdameahttun giella' => 'und',
'divehijski' => 'dv',
@@ -8464,6 +8466,7 @@ class LanguageNameSearchData {
'kreol (nigeriya)' => 'pcm',
'ký hiệu blissymbols' => 'zbl',
'không có nội dung ngôn ngữ' => 'zxx',
'kartuli' => 'ka',
],
108 => [
'la .lojban.' => 'jbo',
@@ -12634,6 +12637,7 @@ class LanguageNameSearchData {
'portugal (braziliya)' => 'pt-br',
'portugal (yevropa)' => 'pt-pt',
'portugänapük' => 'pt',
'punjabi western' => 'pnb',
],
113 => [
'qafár af' => 'aa',
@@ -13051,7 +13055,6 @@ class LanguageNameSearchData {
'sicilianu' => 'scn',
'scots' => 'sco',
'sassaresu' => 'sdc',
'sámegiella' => 'se',
'sängö' => 'sg',
'srpskohrvatski / српскохрватски' => 'sh',
'simple english' => 'simple',
@@ -14838,7 +14841,6 @@ class LanguageNameSearchData {
'tojikī' => 'tg-latn',
'türkmençe' => 'tk',
'tagalog' => 'tl',
'toki pona' => 'tokipona',
'tok pisin' => 'tpi',
'türkçe' => 'tr',
'tatarça' => 'tt-latn',
@@ -17079,6 +17081,7 @@ class LanguageNameSearchData {
'valis' => 'wae',
'volamo' => 'wal',
'valbiri' => 'wbp',
'valencia' => 'ca',
],
119 => [
'west-vlams' => 'vls',
@@ -36749,7 +36752,6 @@ class LanguageNameSearchData {
'タリシュ語' => 'tly',
'ツワナ語' => 'tn',
'トンガ語' => 'to',
'トキポナ' => 'tokipona',
'トク・ピシン語' => 'tpi',
'トルコ語' => 'tr',
'トゥロヨ語' => 'tru',
@@ -36788,6 +36790,7 @@ class LanguageNameSearchData {
'シャウィーア語(アラビア文字)' => 'shy-arab',
'シャウィーア語(ラテン文字)' => 'shy-latn',
'シャウィーア語(ティフナグ文字)' => 'shy-tfng',
'トキポナ' => 'tokipona',
'アチョリ語' => 'ach',
'アダングメ語' => 'ada',
'アヴェスタ語' => 'ae',

View File

@@ -58,6 +58,30 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
'ml' => 'മലയാളം',
]
],
[ 'punja', [
'pa' => 'punjaabi sennii',
'pnb' => 'punjabi western',
]
],
[ 'kartuli', [
'ka' => 'kartuli',
]
],
[ 'valencia', [
'ca' => 'valencia',
]
],
[ 'chinese', [
'zh-hans' => 'chinese simplified',
'zh-hant' => 'chinese traditional',
'zh' => 'chinesesch',
'zh-cn' => 'chinese (china)',
'zh-hk' => 'chinese (hong kong)',
'zh-min-nan' => 'chinese (min nan)',
'zh-sg' => 'chinese (singapore)',
'zh-tw' => 'chinese (taiwan)'
]
],
[ 'finish', [
'fi' => 'finnish'
]