Perform search on every word of language name

See e.g. T132021. This favours coverage over quality.

Change-Id: I3fc8fb1702802bc002c3d7e2941563840914f325
This commit is contained in:
Niklas Laxström
2017-10-31 16:59:06 +01:00
committed by Amire80
parent a9dc4a0f1a
commit a353c5ab65
4 changed files with 8824 additions and 12 deletions

View File

@@ -43,12 +43,27 @@ class LanguageNameIndexer extends Maintenance {
$translations = LanguageNames::getNames( $sourceLanguage, 0, 2 );
foreach ( $translations as $targetLanguage => $translation ) {
$translation = mb_strtolower( $translation );
// Remove directionality markers used in Names.php: users are not
// going to type these.
$translation = str_replace( "\xE2\x80\x8E", '', $translation );
$bucket = LanguageNameSearch::getIndex( $translation );
$buckets[$bucket][$translation] = $targetLanguage;
$translation = mb_strtolower( $translation );
$translation = trim( $translation );
// Clean up "gjermanishte zvicerane (dialekti i alpeve)" to "gjermanishte zvicerane".
// The original name is still shown, but avoid us creating entries such as
// "(dialekti" or "alpeve)".
$basicForm = preg_replace( '/\(.+\)$/', '', $translation );
$words = preg_split( '/[\s]+/u', $basicForm, -1, PREG_SPLIT_NO_EMPTY );
foreach ( $words as $index => $word ) {
$bucket = LanguageNameSearch::getIndex( $word );
$display = $translation;
if ( $index > 0 && count( $words ) > 1 ) {
$display = "$word <$translation>";
}
$buckets[$bucket][$display] = $targetLanguage;
}
}
}