addDescription( 'Script to create language names index.' ); } public function execute() { $languages = Language::fetchLanguageNames( null, 'all' ); $buckets = []; foreach ( $languages as $sourceLanguage => $autonym ) { $translations = LanguageNames::getNames( $sourceLanguage, 0, 2 ); foreach ( $translations as $targetLanguage => $translation ) { $translation = mb_strtolower( $translation ); // Remove directionality markers used in Names.php: users are not // going to type these. $translation = str_replace( "\xE2\x80\x8E", '', $translation ); $bucket = LanguageNameSearch::getIndex( $translation ); $buckets[$bucket][$translation] = $targetLanguage; } } $lengths = array_values( array_map( 'count', $buckets ) ); $count = count( $buckets ); $min = min( $lengths ); $max = max( $lengths ); $median = $lengths[ceil( $count / 2 )]; $avg = array_sum( $lengths ) / $count; $this->output( "Bucket stats:\n - $count buckets\n - smallest has $min entries\n" ); $this->output( " - largest has $max entries\n - median size is $median entries\n" ); $this->output( " - average size is $avg entries\n" ); $this->generateFile( $buckets ); } private function generateFile( array $buckets ) { $template = <<s $data = preg_replace( '/(=>)\s+(\[)/m', '\1 \2', $data ); // Convert spaces to tabs. Since we are not top-level need more tabs. $data = preg_replace( '/^ /m', "\t\t\t", $data ); $data = preg_replace( '/^ /m', "\t\t", $data ); $template = str_replace( '___', $data, $template ); file_put_contents( __DIR__ . '/LanguageNameSearchData.php', $template ); } } $maintClass = 'LanguageNameIndexer'; require_once RUN_MAINTENANCE_IF_MAIN;