UniversalLanguageSelector: Fix BCP 47 language code detection

Some browsers use formats like "zh-Hant-TW", the current detection
 cannot handle them correctly.

Bug: T337665
Change-Id: I0db11a20e219d4cf66832aeee67d59bf3ec41a62
This commit is contained in:
Winston Sung
2023-05-29 07:31:53 +00:00
committed by jenkins-bot
parent 9aaf214c71
commit a24e94265e

View File

@@ -25,6 +25,7 @@ use ExtensionRegistry;
use Html; use Html;
use IBufferingStatsdDataFactory; use IBufferingStatsdDataFactory;
use IContextSource; use IContextSource;
use LanguageCode;
use MediaWiki\Babel\Babel; use MediaWiki\Babel\Babel;
use MediaWiki\Extension\BetaFeatures\BetaFeatures; use MediaWiki\Extension\BetaFeatures\BetaFeatures;
use MediaWiki\Hook\BeforePageDisplayHook; use MediaWiki\Hook\BeforePageDisplayHook;
@@ -254,11 +255,11 @@ class Hooks implements
} }
// The element id will be 'pt-uls' // The element id will be 'pt-uls'
$langCode = $context->getLanguage()->getCode(); $mwLangCode = $context->getLanguage()->getCode();
return [ return [
'uls' => [ 'uls' => [
'text' => $this->languageNameUtils->getLanguageName( $langCode ), 'text' => $this->languageNameUtils->getLanguageName( $mwLangCode ),
'href' => '#', 'href' => '#',
// Skin meta data to allow skin (e.g. Vector) to add icons // Skin meta data to allow skin (e.g. Vector) to add icons
'icon' => 'wikimedia-language', 'icon' => 'wikimedia-language',
@@ -271,28 +272,53 @@ class Hooks implements
} }
/** /**
* @param float[] $preferred * @param float[] $preferred Mapping of
* @return string * 'Preferred languages by lowercased BCP 47 language codes' => 'weight'
* @return string MediaWiki internal language code or empty string if there's no matched
* language code
*/ */
protected function getDefaultLanguage( array $preferred ) { protected function getDefaultLanguage( array $preferred ) {
/** @var array supported List of Supported languages by MediaWiki internal language codes */
$supported = $this->languageNameUtils $supported = $this->languageNameUtils
->getLanguageNames( LanguageNameUtils::AUTONYMS, LanguageNameUtils::SUPPORTED ); ->getLanguageNames( LanguageNameUtils::AUTONYMS, LanguageNameUtils::SUPPORTED );
// look for a language that is acceptable to the client // Convert BCP 47 language code to MediaWiki internal language code and
// look for a MediaWiki internal language code that is acceptable to the client
// and known to the wiki. // and known to the wiki.
foreach ( $preferred as $code => $weight ) { // @begin Note: Remove this when minimum supported version is 1.40
if ( isset( $supported[$code] ) ) { if ( method_exists( LanguageCode::class, 'bcp47ToInternal' ) ) {
return $code; // @end
foreach ( $preferred as $bcp47LangCode => $weight ) {
$mwLangCode = LanguageCode::bcp47ToInternal( $bcp47LangCode );
if ( isset( $supported[$mwLangCode] ) ) {
return $mwLangCode;
} }
} }
// @begin Note: Remove this when minimum supported version is 1.40
} else {
static $invertedLookup = [];
foreach ( LanguageCode::getNonstandardLanguageCodeMapping() as $internal => $bcp47 ) {
$invertedLookup[strtolower( $bcp47 )] = $internal;
}
foreach ( $preferred as $bcp47LangCode => $weight ) {
$mwLangCode = $invertedLookup[$bcp47LangCode] ?? $bcp47LangCode;
if ( isset( $supported[$mwLangCode] ) ) {
return $mwLangCode;
}
}
}
// @end
// Some browsers might only send codes like de-de. // Some browsers might:
// Try with bare code. // - Sent codes like 'zh-hant-tw':
foreach ( $preferred as $code => $weight ) { // FIXME: Try 'zh-tw', 'zh-hant', 'zh' respectively
$parts = explode( '-', $code, 2 ); // - Only send codes like 'de-de':
$code = $parts[0]; // Try with bare code 'de'
if ( isset( $supported[$code] ) ) { foreach ( $preferred as $bcp47LangCode => $weight ) {
return $code; $parts = explode( '-', $bcp47LangCode, 2 );
$mwLangCode = $parts[0];
if ( isset( $supported[$mwLangCode] ) ) {
return $mwLangCode;
} }
} }