From a24e94265e54e33e4ca9de2992112ac09747d3e3 Mon Sep 17 00:00:00 2001 From: Winston Sung Date: Mon, 29 May 2023 07:31:53 +0000 Subject: [PATCH] UniversalLanguageSelector: Fix BCP 47 language code detection Some browsers use formats like "zh-Hant-TW", the current detection cannot handle them correctly. Bug: T337665 Change-Id: I0db11a20e219d4cf66832aeee67d59bf3ec41a62 --- includes/Hooks.php | 56 +++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/includes/Hooks.php b/includes/Hooks.php index 1617155f..3e696087 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -25,6 +25,7 @@ use ExtensionRegistry; use Html; use IBufferingStatsdDataFactory; use IContextSource; +use LanguageCode; use MediaWiki\Babel\Babel; use MediaWiki\Extension\BetaFeatures\BetaFeatures; use MediaWiki\Hook\BeforePageDisplayHook; @@ -254,11 +255,11 @@ class Hooks implements } // The element id will be 'pt-uls' - $langCode = $context->getLanguage()->getCode(); + $mwLangCode = $context->getLanguage()->getCode(); return [ 'uls' => [ - 'text' => $this->languageNameUtils->getLanguageName( $langCode ), + 'text' => $this->languageNameUtils->getLanguageName( $mwLangCode ), 'href' => '#', // Skin meta data to allow skin (e.g. Vector) to add icons 'icon' => 'wikimedia-language', @@ -271,28 +272,53 @@ class Hooks implements } /** - * @param float[] $preferred - * @return string + * @param float[] $preferred Mapping of + * 'Preferred languages by lowercased BCP 47 language codes' => 'weight' + * @return string MediaWiki internal language code or empty string if there's no matched + * language code */ protected function getDefaultLanguage( array $preferred ) { + /** @var array supported List of Supported languages by MediaWiki internal language codes */ $supported = $this->languageNameUtils ->getLanguageNames( LanguageNameUtils::AUTONYMS, LanguageNameUtils::SUPPORTED ); - // look for a language that is acceptable to the client + // Convert BCP 47 language code to MediaWiki internal language code and + // look for a MediaWiki internal language code that is acceptable to the client // and known to the wiki. - foreach ( $preferred as $code => $weight ) { - if ( isset( $supported[$code] ) ) { - return $code; + // @begin Note: Remove this when minimum supported version is 1.40 + if ( method_exists( LanguageCode::class, 'bcp47ToInternal' ) ) { + // @end + foreach ( $preferred as $bcp47LangCode => $weight ) { + $mwLangCode = LanguageCode::bcp47ToInternal( $bcp47LangCode ); + if ( isset( $supported[$mwLangCode] ) ) { + return $mwLangCode; + } + } + // @begin Note: Remove this when minimum supported version is 1.40 + } else { + static $invertedLookup = []; + foreach ( LanguageCode::getNonstandardLanguageCodeMapping() as $internal => $bcp47 ) { + $invertedLookup[strtolower( $bcp47 )] = $internal; + } + foreach ( $preferred as $bcp47LangCode => $weight ) { + $mwLangCode = $invertedLookup[$bcp47LangCode] ?? $bcp47LangCode; + if ( isset( $supported[$mwLangCode] ) ) { + return $mwLangCode; + } } } + // @end - // Some browsers might only send codes like de-de. - // Try with bare code. - foreach ( $preferred as $code => $weight ) { - $parts = explode( '-', $code, 2 ); - $code = $parts[0]; - if ( isset( $supported[$code] ) ) { - return $code; + // Some browsers might: + // - Sent codes like 'zh-hant-tw': + // FIXME: Try 'zh-tw', 'zh-hant', 'zh' respectively + // - Only send codes like 'de-de': + // Try with bare code 'de' + foreach ( $preferred as $bcp47LangCode => $weight ) { + $parts = explode( '-', $bcp47LangCode, 2 ); + $mwLangCode = $parts[0]; + if ( isset( $supported[$mwLangCode] ) ) { + return $mwLangCode; } }