Perform search on every word of language name
See e.g. T132021. This favours coverage over quality. Change-Id: I3fc8fb1702802bc002c3d7e2941563840914f325
This commit is contained in:
@@ -43,12 +43,27 @@ class LanguageNameIndexer extends Maintenance {
|
||||
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
||||
|
||||
foreach ( $translations as $targetLanguage => $translation ) {
|
||||
$translation = mb_strtolower( $translation );
|
||||
// Remove directionality markers used in Names.php: users are not
|
||||
// going to type these.
|
||||
$translation = str_replace( "\xE2\x80\x8E", '', $translation );
|
||||
$bucket = LanguageNameSearch::getIndex( $translation );
|
||||
$buckets[$bucket][$translation] = $targetLanguage;
|
||||
$translation = mb_strtolower( $translation );
|
||||
$translation = trim( $translation );
|
||||
|
||||
// Clean up "gjermanishte zvicerane (dialekti i alpeve)" to "gjermanishte zvicerane".
|
||||
// The original name is still shown, but avoid us creating entries such as
|
||||
// "(dialekti" or "alpeve)".
|
||||
$basicForm = preg_replace( '/\(.+\)$/', '', $translation );
|
||||
$words = preg_split( '/[\s]+/u', $basicForm, -1, PREG_SPLIT_NO_EMPTY );
|
||||
|
||||
foreach ( $words as $index => $word ) {
|
||||
$bucket = LanguageNameSearch::getIndex( $word );
|
||||
|
||||
$display = $translation;
|
||||
if ( $index > 0 && count( $words ) > 1 ) {
|
||||
$display = "$word <$translation>";
|
||||
}
|
||||
$buckets[$bucket][$display] = $targetLanguage;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,11 +19,7 @@
|
||||
*/
|
||||
class LanguageNameSearch {
|
||||
public static function search( $searchKey, $typos = 0 ) {
|
||||
// Use code's mb_strtolower compatibily code for MW < 1.27
|
||||
$language = Language::factory( 'en' );
|
||||
|
||||
// @todo: Shouldn't this be unicode aware?
|
||||
$searchKey = $language->lc( $searchKey );
|
||||
$searchKey = mb_strtolower( $searchKey );
|
||||
$index = self::getIndex( $searchKey );
|
||||
|
||||
if ( !isset( LanguageNameSearchData::$buckets[$index] ) ) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
||||
]
|
||||
],
|
||||
[ 'punja', [
|
||||
'pa' => 'punjaabi sennii',
|
||||
'pa' => 'punjabi <èdè punjabi>',
|
||||
'pnb' => 'punjabi western',
|
||||
]
|
||||
],
|
||||
@@ -79,7 +79,15 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
||||
'zh-hk' => 'chinese (hong kong)',
|
||||
'zh-min-nan' => 'chinese (min nan)',
|
||||
'zh-sg' => 'chinese (singapore)',
|
||||
'zh-tw' => 'chinese (taiwan)'
|
||||
'zh-tw' => 'chinese (taiwan)',
|
||||
'cdo' => 'chinese <min dong chinese>',
|
||||
'gan' => 'chinese <isi-gan chinese>',
|
||||
'hak' => 'chinese <isi-hakka chinese>',
|
||||
'lzh' => 'chinesesch <klassescht chinesesch>',
|
||||
'nan' => 'chinese <isi-min nan chinese>',
|
||||
'wuu' => 'chinese <isi-wu chinese>',
|
||||
'zh-classical' => 'chinese <classical chinese>',
|
||||
'hsn' => 'chinese <isi-xiang chinese>',
|
||||
]
|
||||
],
|
||||
[ 'finish', [
|
||||
|
||||
Reference in New Issue
Block a user