Cross-language language name search
Implementation of Also Written As language name search algorithm. See http://etherpad.wikimedia.org/l10n-uls-language-search Change-Id: Iff84408c531b650a44d031b63d5c823737cceafc
This commit is contained in:
59
data/LanguageNameIndexer.php
Normal file
59
data/LanguageNameIndexer.php
Normal file
@@ -0,0 +1,59 @@
|
||||
<?php
|
||||
/**
|
||||
* Script to create language names index.
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||
* have to do anything special to choose one license or the other and you don't
|
||||
* have to notify anyone which license you are using. You are free to use
|
||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @licence GNU General Public Licence 2.0 or later
|
||||
* @licence MIT License
|
||||
*/
|
||||
|
||||
// Standard boilerplate to define $IP
|
||||
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
|
||||
$IP = getenv( 'MW_INSTALL_PATH' );
|
||||
} else {
|
||||
$dir = __DIR__;
|
||||
$IP = "$dir/../../..";
|
||||
}
|
||||
require_once ( "$IP/maintenance/Maintenance.php" );
|
||||
class LanguageNameIndexer extends Maintenance {
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->addDescription( "Script to create language names index." );
|
||||
}
|
||||
|
||||
public function execute() {
|
||||
$languages = Language::fetchLanguageNames( null, 'all' );
|
||||
$all = array();
|
||||
$buckets = array();
|
||||
foreach ( $languages as $code => $name ) {
|
||||
$all[$code][strtolower( $name )] = true;
|
||||
$langnames = LanguageNames::getNames( $code, 0, 2 );
|
||||
foreach ( $langnames as $code => $name ) {
|
||||
$all[$code][] = strtolower( $name );
|
||||
}
|
||||
}
|
||||
|
||||
foreach ( $all as $code => $names ) {
|
||||
foreach ( $names as $index => $name ) {
|
||||
$bucket = LanguageNameSearch::getIndex( $name );
|
||||
$buckets[$bucket][$name] = $code;
|
||||
}
|
||||
}
|
||||
$this->output( "Total buckets: " . count( $buckets ) . "\n" );
|
||||
file_put_contents( 'langnames.ser', serialize( $buckets ) );
|
||||
}
|
||||
}
|
||||
|
||||
$maintClass = 'LanguageNameIndexer';
|
||||
require_once( RUN_MAINTENANCE_IF_MAIN );
|
||||
Reference in New Issue
Block a user