Merge "Add special language names to facilitate searching"
This commit is contained in:
@@ -36,6 +36,7 @@ class LanguageNameIndexer extends Maintenance {
|
|||||||
$buckets = [];
|
$buckets = [];
|
||||||
foreach ( $languages as $sourceLanguage => $autonym ) {
|
foreach ( $languages as $sourceLanguage => $autonym ) {
|
||||||
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
||||||
|
|
||||||
foreach ( $translations as $targetLanguage => $translation ) {
|
foreach ( $translations as $targetLanguage => $translation ) {
|
||||||
$translation = mb_strtolower( $translation );
|
$translation = mb_strtolower( $translation );
|
||||||
// Remove directionality markers used in Names.php: users are not
|
// Remove directionality markers used in Names.php: users are not
|
||||||
@@ -46,6 +47,29 @@ class LanguageNameIndexer extends Maintenance {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some languages don't have a conveniently searchable name in CLDR.
|
||||||
|
// For example, the name of Western Punjabi doesn't start with
|
||||||
|
// the string "punjabi" in any language, so it cannot be found
|
||||||
|
// by people who search in English.
|
||||||
|
// To resolve this, some languages are added here locally.
|
||||||
|
$specialLanguages = [
|
||||||
|
// Catalan, sometimes searched as "Valencià"
|
||||||
|
'ca' => 'valencia',
|
||||||
|
// Georgian, the transliteration of the autonym is often used for searching
|
||||||
|
'ka' => 'kartuli',
|
||||||
|
// Western Punjabi, doesn't start with the word "Punjabi" in any language
|
||||||
|
'pnb' => 'punjabi western',
|
||||||
|
// Simplified and Traditional Chinese, because zh-hans and zh-hant
|
||||||
|
// are not mapped to any English name
|
||||||
|
'zh-hans' => 'chinese simplified',
|
||||||
|
'zh-hant' => 'chinese traditional',
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ( $specialLanguages as $targetLanguage => $translation ) {
|
||||||
|
$bucket = LanguageNameSearch::getIndex( $translation );
|
||||||
|
$buckets[$bucket][$translation] = $targetLanguage;
|
||||||
|
}
|
||||||
|
|
||||||
$lengths = array_values( array_map( 'count', $buckets ) );
|
$lengths = array_values( array_map( 'count', $buckets ) );
|
||||||
$count = count( $buckets );
|
$count = count( $buckets );
|
||||||
$min = min( $lengths );
|
$min = min( $lengths );
|
||||||
|
|||||||
@@ -2923,6 +2923,8 @@ class LanguageNameSearchData {
|
|||||||
'chex' => 'cs',
|
'chex' => 'cs',
|
||||||
'cheva' => 'ny',
|
'cheva' => 'ny',
|
||||||
'chukot' => 'chk',
|
'chukot' => 'chk',
|
||||||
|
'chinese simplified' => 'zh-hans',
|
||||||
|
'chinese traditional' => 'zh-hant',
|
||||||
],
|
],
|
||||||
100 => [
|
100 => [
|
||||||
'dansk' => 'da',
|
'dansk' => 'da',
|
||||||
@@ -2933,6 +2935,7 @@ class LanguageNameSearchData {
|
|||||||
'dorerin naoero' => 'na',
|
'dorerin naoero' => 'na',
|
||||||
'diné bizaad' => 'nv',
|
'diné bizaad' => 'nv',
|
||||||
'deitsch' => 'pdc',
|
'deitsch' => 'pdc',
|
||||||
|
'davvisámegiella' => 'se',
|
||||||
'deens' => 'da',
|
'deens' => 'da',
|
||||||
'duits' => 'de',
|
'duits' => 'de',
|
||||||
'divehi' => 'dv',
|
'divehi' => 'dv',
|
||||||
@@ -3172,7 +3175,6 @@ class LanguageNameSearchData {
|
|||||||
'divehigiella' => 'dv',
|
'divehigiella' => 'dv',
|
||||||
'dzongkhagiella' => 'dz',
|
'dzongkhagiella' => 'dz',
|
||||||
'dárogiella' => 'no',
|
'dárogiella' => 'no',
|
||||||
'davvisámegiella' => 'se',
|
|
||||||
'durkagiella' => 'tr',
|
'durkagiella' => 'tr',
|
||||||
'dovdameahttun giella' => 'und',
|
'dovdameahttun giella' => 'und',
|
||||||
'divehijski' => 'dv',
|
'divehijski' => 'dv',
|
||||||
@@ -8464,6 +8466,7 @@ class LanguageNameSearchData {
|
|||||||
'kreol (nigeriya)' => 'pcm',
|
'kreol (nigeriya)' => 'pcm',
|
||||||
'ký hiệu blissymbols' => 'zbl',
|
'ký hiệu blissymbols' => 'zbl',
|
||||||
'không có nội dung ngôn ngữ' => 'zxx',
|
'không có nội dung ngôn ngữ' => 'zxx',
|
||||||
|
'kartuli' => 'ka',
|
||||||
],
|
],
|
||||||
108 => [
|
108 => [
|
||||||
'la .lojban.' => 'jbo',
|
'la .lojban.' => 'jbo',
|
||||||
@@ -12634,6 +12637,7 @@ class LanguageNameSearchData {
|
|||||||
'portugal (braziliya)' => 'pt-br',
|
'portugal (braziliya)' => 'pt-br',
|
||||||
'portugal (yevropa)' => 'pt-pt',
|
'portugal (yevropa)' => 'pt-pt',
|
||||||
'portugänapük' => 'pt',
|
'portugänapük' => 'pt',
|
||||||
|
'punjabi western' => 'pnb',
|
||||||
],
|
],
|
||||||
113 => [
|
113 => [
|
||||||
'qafár af' => 'aa',
|
'qafár af' => 'aa',
|
||||||
@@ -13051,7 +13055,6 @@ class LanguageNameSearchData {
|
|||||||
'sicilianu' => 'scn',
|
'sicilianu' => 'scn',
|
||||||
'scots' => 'sco',
|
'scots' => 'sco',
|
||||||
'sassaresu' => 'sdc',
|
'sassaresu' => 'sdc',
|
||||||
'sámegiella' => 'se',
|
|
||||||
'sängö' => 'sg',
|
'sängö' => 'sg',
|
||||||
'srpskohrvatski / српскохрватски' => 'sh',
|
'srpskohrvatski / српскохрватски' => 'sh',
|
||||||
'simple english' => 'simple',
|
'simple english' => 'simple',
|
||||||
@@ -14838,7 +14841,6 @@ class LanguageNameSearchData {
|
|||||||
'tojikī' => 'tg-latn',
|
'tojikī' => 'tg-latn',
|
||||||
'türkmençe' => 'tk',
|
'türkmençe' => 'tk',
|
||||||
'tagalog' => 'tl',
|
'tagalog' => 'tl',
|
||||||
'toki pona' => 'tokipona',
|
|
||||||
'tok pisin' => 'tpi',
|
'tok pisin' => 'tpi',
|
||||||
'türkçe' => 'tr',
|
'türkçe' => 'tr',
|
||||||
'tatarça' => 'tt-latn',
|
'tatarça' => 'tt-latn',
|
||||||
@@ -17079,6 +17081,7 @@ class LanguageNameSearchData {
|
|||||||
'valis' => 'wae',
|
'valis' => 'wae',
|
||||||
'volamo' => 'wal',
|
'volamo' => 'wal',
|
||||||
'valbiri' => 'wbp',
|
'valbiri' => 'wbp',
|
||||||
|
'valencia' => 'ca',
|
||||||
],
|
],
|
||||||
119 => [
|
119 => [
|
||||||
'west-vlams' => 'vls',
|
'west-vlams' => 'vls',
|
||||||
@@ -36749,7 +36752,6 @@ class LanguageNameSearchData {
|
|||||||
'タリシュ語' => 'tly',
|
'タリシュ語' => 'tly',
|
||||||
'ツワナ語' => 'tn',
|
'ツワナ語' => 'tn',
|
||||||
'トンガ語' => 'to',
|
'トンガ語' => 'to',
|
||||||
'トキポナ' => 'tokipona',
|
|
||||||
'トク・ピシン語' => 'tpi',
|
'トク・ピシン語' => 'tpi',
|
||||||
'トルコ語' => 'tr',
|
'トルコ語' => 'tr',
|
||||||
'トゥロヨ語' => 'tru',
|
'トゥロヨ語' => 'tru',
|
||||||
@@ -36788,6 +36790,7 @@ class LanguageNameSearchData {
|
|||||||
'シャウィーア語(アラビア文字)' => 'shy-arab',
|
'シャウィーア語(アラビア文字)' => 'shy-arab',
|
||||||
'シャウィーア語(ラテン文字)' => 'shy-latn',
|
'シャウィーア語(ラテン文字)' => 'shy-latn',
|
||||||
'シャウィーア語(ティフナグ文字)' => 'shy-tfng',
|
'シャウィーア語(ティフナグ文字)' => 'shy-tfng',
|
||||||
|
'トキポナ' => 'tokipona',
|
||||||
'アチョリ語' => 'ach',
|
'アチョリ語' => 'ach',
|
||||||
'アダングメ語' => 'ada',
|
'アダングメ語' => 'ada',
|
||||||
'アヴェスタ語' => 'ae',
|
'アヴェスタ語' => 'ae',
|
||||||
|
|||||||
@@ -58,6 +58,30 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
|||||||
'ml' => 'മലയാളം',
|
'ml' => 'മലയാളം',
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
|
[ 'punja', [
|
||||||
|
'pa' => 'punjaabi sennii',
|
||||||
|
'pnb' => 'punjabi western',
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[ 'kartuli', [
|
||||||
|
'ka' => 'kartuli',
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[ 'valencia', [
|
||||||
|
'ca' => 'valencia',
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[ 'chinese', [
|
||||||
|
'zh-hans' => 'chinese simplified',
|
||||||
|
'zh-hant' => 'chinese traditional',
|
||||||
|
'zh' => 'chinesesch',
|
||||||
|
'zh-cn' => 'chinese (china)',
|
||||||
|
'zh-hk' => 'chinese (hong kong)',
|
||||||
|
'zh-min-nan' => 'chinese (min nan)',
|
||||||
|
'zh-sg' => 'chinese (singapore)',
|
||||||
|
'zh-tw' => 'chinese (taiwan)'
|
||||||
|
]
|
||||||
|
],
|
||||||
[ 'finish', [
|
[ 'finish', [
|
||||||
'fi' => 'finnish'
|
'fi' => 'finnish'
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user