Merge "Add special language names to facilitate searching"
This commit is contained in:
@@ -36,6 +36,7 @@ class LanguageNameIndexer extends Maintenance {
|
||||
$buckets = [];
|
||||
foreach ( $languages as $sourceLanguage => $autonym ) {
|
||||
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
||||
|
||||
foreach ( $translations as $targetLanguage => $translation ) {
|
||||
$translation = mb_strtolower( $translation );
|
||||
// Remove directionality markers used in Names.php: users are not
|
||||
@@ -46,6 +47,29 @@ class LanguageNameIndexer extends Maintenance {
|
||||
}
|
||||
}
|
||||
|
||||
// Some languages don't have a conveniently searchable name in CLDR.
|
||||
// For example, the name of Western Punjabi doesn't start with
|
||||
// the string "punjabi" in any language, so it cannot be found
|
||||
// by people who search in English.
|
||||
// To resolve this, some languages are added here locally.
|
||||
$specialLanguages = [
|
||||
// Catalan, sometimes searched as "Valencià"
|
||||
'ca' => 'valencia',
|
||||
// Georgian, the transliteration of the autonym is often used for searching
|
||||
'ka' => 'kartuli',
|
||||
// Western Punjabi, doesn't start with the word "Punjabi" in any language
|
||||
'pnb' => 'punjabi western',
|
||||
// Simplified and Traditional Chinese, because zh-hans and zh-hant
|
||||
// are not mapped to any English name
|
||||
'zh-hans' => 'chinese simplified',
|
||||
'zh-hant' => 'chinese traditional',
|
||||
];
|
||||
|
||||
foreach ( $specialLanguages as $targetLanguage => $translation ) {
|
||||
$bucket = LanguageNameSearch::getIndex( $translation );
|
||||
$buckets[$bucket][$translation] = $targetLanguage;
|
||||
}
|
||||
|
||||
$lengths = array_values( array_map( 'count', $buckets ) );
|
||||
$count = count( $buckets );
|
||||
$min = min( $lengths );
|
||||
|
||||
@@ -2923,6 +2923,8 @@ class LanguageNameSearchData {
|
||||
'chex' => 'cs',
|
||||
'cheva' => 'ny',
|
||||
'chukot' => 'chk',
|
||||
'chinese simplified' => 'zh-hans',
|
||||
'chinese traditional' => 'zh-hant',
|
||||
],
|
||||
100 => [
|
||||
'dansk' => 'da',
|
||||
@@ -2933,6 +2935,7 @@ class LanguageNameSearchData {
|
||||
'dorerin naoero' => 'na',
|
||||
'diné bizaad' => 'nv',
|
||||
'deitsch' => 'pdc',
|
||||
'davvisámegiella' => 'se',
|
||||
'deens' => 'da',
|
||||
'duits' => 'de',
|
||||
'divehi' => 'dv',
|
||||
@@ -3172,7 +3175,6 @@ class LanguageNameSearchData {
|
||||
'divehigiella' => 'dv',
|
||||
'dzongkhagiella' => 'dz',
|
||||
'dárogiella' => 'no',
|
||||
'davvisámegiella' => 'se',
|
||||
'durkagiella' => 'tr',
|
||||
'dovdameahttun giella' => 'und',
|
||||
'divehijski' => 'dv',
|
||||
@@ -8464,6 +8466,7 @@ class LanguageNameSearchData {
|
||||
'kreol (nigeriya)' => 'pcm',
|
||||
'ký hiệu blissymbols' => 'zbl',
|
||||
'không có nội dung ngôn ngữ' => 'zxx',
|
||||
'kartuli' => 'ka',
|
||||
],
|
||||
108 => [
|
||||
'la .lojban.' => 'jbo',
|
||||
@@ -12634,6 +12637,7 @@ class LanguageNameSearchData {
|
||||
'portugal (braziliya)' => 'pt-br',
|
||||
'portugal (yevropa)' => 'pt-pt',
|
||||
'portugänapük' => 'pt',
|
||||
'punjabi western' => 'pnb',
|
||||
],
|
||||
113 => [
|
||||
'qafár af' => 'aa',
|
||||
@@ -13051,7 +13055,6 @@ class LanguageNameSearchData {
|
||||
'sicilianu' => 'scn',
|
||||
'scots' => 'sco',
|
||||
'sassaresu' => 'sdc',
|
||||
'sámegiella' => 'se',
|
||||
'sängö' => 'sg',
|
||||
'srpskohrvatski / српскохрватски' => 'sh',
|
||||
'simple english' => 'simple',
|
||||
@@ -14838,7 +14841,6 @@ class LanguageNameSearchData {
|
||||
'tojikī' => 'tg-latn',
|
||||
'türkmençe' => 'tk',
|
||||
'tagalog' => 'tl',
|
||||
'toki pona' => 'tokipona',
|
||||
'tok pisin' => 'tpi',
|
||||
'türkçe' => 'tr',
|
||||
'tatarça' => 'tt-latn',
|
||||
@@ -17079,6 +17081,7 @@ class LanguageNameSearchData {
|
||||
'valis' => 'wae',
|
||||
'volamo' => 'wal',
|
||||
'valbiri' => 'wbp',
|
||||
'valencia' => 'ca',
|
||||
],
|
||||
119 => [
|
||||
'west-vlams' => 'vls',
|
||||
@@ -36749,7 +36752,6 @@ class LanguageNameSearchData {
|
||||
'タリシュ語' => 'tly',
|
||||
'ツワナ語' => 'tn',
|
||||
'トンガ語' => 'to',
|
||||
'トキポナ' => 'tokipona',
|
||||
'トク・ピシン語' => 'tpi',
|
||||
'トルコ語' => 'tr',
|
||||
'トゥロヨ語' => 'tru',
|
||||
@@ -36788,6 +36790,7 @@ class LanguageNameSearchData {
|
||||
'シャウィーア語(アラビア文字)' => 'shy-arab',
|
||||
'シャウィーア語(ラテン文字)' => 'shy-latn',
|
||||
'シャウィーア語(ティフナグ文字)' => 'shy-tfng',
|
||||
'トキポナ' => 'tokipona',
|
||||
'アチョリ語' => 'ach',
|
||||
'アダングメ語' => 'ada',
|
||||
'アヴェスタ語' => 'ae',
|
||||
|
||||
@@ -58,6 +58,30 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
||||
'ml' => 'മലയാളം',
|
||||
]
|
||||
],
|
||||
[ 'punja', [
|
||||
'pa' => 'punjaabi sennii',
|
||||
'pnb' => 'punjabi western',
|
||||
]
|
||||
],
|
||||
[ 'kartuli', [
|
||||
'ka' => 'kartuli',
|
||||
]
|
||||
],
|
||||
[ 'valencia', [
|
||||
'ca' => 'valencia',
|
||||
]
|
||||
],
|
||||
[ 'chinese', [
|
||||
'zh-hans' => 'chinese simplified',
|
||||
'zh-hant' => 'chinese traditional',
|
||||
'zh' => 'chinesesch',
|
||||
'zh-cn' => 'chinese (china)',
|
||||
'zh-hk' => 'chinese (hong kong)',
|
||||
'zh-min-nan' => 'chinese (min nan)',
|
||||
'zh-sg' => 'chinese (singapore)',
|
||||
'zh-tw' => 'chinese (taiwan)'
|
||||
]
|
||||
],
|
||||
[ 'finish', [
|
||||
'fi' => 'finnish'
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user