Perform search on every word of language name
See e.g. T132021. This favours coverage over quality. Change-Id: I3fc8fb1702802bc002c3d7e2941563840914f325
This commit is contained in:
@@ -43,12 +43,27 @@ class LanguageNameIndexer extends Maintenance {
|
|||||||
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
|
||||||
|
|
||||||
foreach ( $translations as $targetLanguage => $translation ) {
|
foreach ( $translations as $targetLanguage => $translation ) {
|
||||||
$translation = mb_strtolower( $translation );
|
|
||||||
// Remove directionality markers used in Names.php: users are not
|
// Remove directionality markers used in Names.php: users are not
|
||||||
// going to type these.
|
// going to type these.
|
||||||
$translation = str_replace( "\xE2\x80\x8E", '', $translation );
|
$translation = str_replace( "\xE2\x80\x8E", '', $translation );
|
||||||
$bucket = LanguageNameSearch::getIndex( $translation );
|
$translation = mb_strtolower( $translation );
|
||||||
$buckets[$bucket][$translation] = $targetLanguage;
|
$translation = trim( $translation );
|
||||||
|
|
||||||
|
// Clean up "gjermanishte zvicerane (dialekti i alpeve)" to "gjermanishte zvicerane".
|
||||||
|
// The original name is still shown, but avoid us creating entries such as
|
||||||
|
// "(dialekti" or "alpeve)".
|
||||||
|
$basicForm = preg_replace( '/\(.+\)$/', '', $translation );
|
||||||
|
$words = preg_split( '/[\s]+/u', $basicForm, -1, PREG_SPLIT_NO_EMPTY );
|
||||||
|
|
||||||
|
foreach ( $words as $index => $word ) {
|
||||||
|
$bucket = LanguageNameSearch::getIndex( $word );
|
||||||
|
|
||||||
|
$display = $translation;
|
||||||
|
if ( $index > 0 && count( $words ) > 1 ) {
|
||||||
|
$display = "$word <$translation>";
|
||||||
|
}
|
||||||
|
$buckets[$bucket][$display] = $targetLanguage;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,11 +19,7 @@
|
|||||||
*/
|
*/
|
||||||
class LanguageNameSearch {
|
class LanguageNameSearch {
|
||||||
public static function search( $searchKey, $typos = 0 ) {
|
public static function search( $searchKey, $typos = 0 ) {
|
||||||
// Use code's mb_strtolower compatibily code for MW < 1.27
|
$searchKey = mb_strtolower( $searchKey );
|
||||||
$language = Language::factory( 'en' );
|
|
||||||
|
|
||||||
// @todo: Shouldn't this be unicode aware?
|
|
||||||
$searchKey = $language->lc( $searchKey );
|
|
||||||
$index = self::getIndex( $searchKey );
|
$index = self::getIndex( $searchKey );
|
||||||
|
|
||||||
if ( !isset( LanguageNameSearchData::$buckets[$index] ) ) {
|
if ( !isset( LanguageNameSearchData::$buckets[$index] ) ) {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
|||||||
]
|
]
|
||||||
],
|
],
|
||||||
[ 'punja', [
|
[ 'punja', [
|
||||||
'pa' => 'punjaabi sennii',
|
'pa' => 'punjabi <èdè punjabi>',
|
||||||
'pnb' => 'punjabi western',
|
'pnb' => 'punjabi western',
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@@ -79,7 +79,15 @@ class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
|||||||
'zh-hk' => 'chinese (hong kong)',
|
'zh-hk' => 'chinese (hong kong)',
|
||||||
'zh-min-nan' => 'chinese (min nan)',
|
'zh-min-nan' => 'chinese (min nan)',
|
||||||
'zh-sg' => 'chinese (singapore)',
|
'zh-sg' => 'chinese (singapore)',
|
||||||
'zh-tw' => 'chinese (taiwan)'
|
'zh-tw' => 'chinese (taiwan)',
|
||||||
|
'cdo' => 'chinese <min dong chinese>',
|
||||||
|
'gan' => 'chinese <isi-gan chinese>',
|
||||||
|
'hak' => 'chinese <isi-hakka chinese>',
|
||||||
|
'lzh' => 'chinesesch <klassescht chinesesch>',
|
||||||
|
'nan' => 'chinese <isi-min nan chinese>',
|
||||||
|
'wuu' => 'chinese <isi-wu chinese>',
|
||||||
|
'zh-classical' => 'chinese <classical chinese>',
|
||||||
|
'hsn' => 'chinese <isi-xiang chinese>',
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
[ 'finish', [
|
[ 'finish', [
|
||||||
|
|||||||
Reference in New Issue
Block a user