Cross-language language name search
Implementation of Also Written As language name search algorithm. See http://etherpad.wikimedia.org/l10n-uls-language-search Change-Id: Iff84408c531b650a44d031b63d5c823737cceafc
This commit is contained in:
@@ -22,6 +22,10 @@ if ( !defined( 'MEDIAWIKI' ) ) {
|
|||||||
echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
|
echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
|
||||||
die( -1 );
|
die( -1 );
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Version number used in extension credits and in other placed where needed.
|
||||||
|
*/
|
||||||
|
define( 'ULS_VERSION', '2012-07-20' );
|
||||||
|
|
||||||
$wgExtensionCredits['other'][] = array(
|
$wgExtensionCredits['other'][] = array(
|
||||||
'path' => __FILE__,
|
'path' => __FILE__,
|
||||||
@@ -48,11 +52,14 @@ $wgExtensionMessagesFiles['UniversalLanguageSelector'] = "$dir/UniversalLanguage
|
|||||||
|
|
||||||
// Register auto load for the page class
|
// Register auto load for the page class
|
||||||
$wgAutoloadClasses['UniversalLanguageSelectorHooks'] = "$dir/UniversalLanguageSelector.hooks.php";
|
$wgAutoloadClasses['UniversalLanguageSelectorHooks'] = "$dir/UniversalLanguageSelector.hooks.php";
|
||||||
|
$wgAutoloadClasses['ApiLanguageSearch'] = "$dir/api/ApiLanguageSearch.php";
|
||||||
|
$wgAutoloadClasses['LanguageNameSearch'] = "$dir/data/LanguageNameSearch.php";
|
||||||
|
|
||||||
$wgHooks['BeforePageDisplay'][] = 'UniversalLanguageSelectorHooks::addModules';
|
$wgHooks['BeforePageDisplay'][] = 'UniversalLanguageSelectorHooks::addModules';
|
||||||
$wgHooks['PersonalUrls'][] = 'UniversalLanguageSelectorHooks::addTrigger';
|
$wgHooks['PersonalUrls'][] = 'UniversalLanguageSelectorHooks::addTrigger';
|
||||||
$wgHooks['SkinAfterContent'][] = 'UniversalLanguageSelectorHooks::addTemplate';
|
$wgHooks['SkinAfterContent'][] = 'UniversalLanguageSelectorHooks::addTemplate';
|
||||||
$wgHooks['ResourceLoaderTestModules'][] = 'UniversalLanguageSelectorHooks::addTestModules';
|
$wgHooks['ResourceLoaderTestModules'][] = 'UniversalLanguageSelectorHooks::addTestModules';
|
||||||
|
$wgAPIModules['languagesearch'] = 'ApiLanguageSearch';
|
||||||
|
|
||||||
$wgResourceModules['ext.uls.init'] = array(
|
$wgResourceModules['ext.uls.init'] = array(
|
||||||
'scripts' => 'resources/ext.uls.init.js',
|
'scripts' => 'resources/ext.uls.init.js',
|
||||||
|
|||||||
66
api/ApiLanguageSearch.php
Normal file
66
api/ApiLanguageSearch.php
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Language name search API
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||||
|
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||||
|
* contributors. See CREDITS for a list.
|
||||||
|
*
|
||||||
|
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||||
|
* have to do anything special to choose one license or the other and you don't
|
||||||
|
* have to notify anyone which license you are using. You are free to use
|
||||||
|
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||||
|
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||||
|
*
|
||||||
|
* @file
|
||||||
|
* @ingroup Extensions
|
||||||
|
* @licence GNU General Public Licence 2.0 or later
|
||||||
|
* @licence MIT License
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @ingroup API
|
||||||
|
*/
|
||||||
|
class ApiLanguageSearch extends ApiBase {
|
||||||
|
|
||||||
|
public function getCustomPrinter() {
|
||||||
|
return $this->getMain()->createPrinterByName( 'json' );
|
||||||
|
}
|
||||||
|
|
||||||
|
public function execute() {
|
||||||
|
$params = $this->extractRequestParams();
|
||||||
|
$search = $params['search'];
|
||||||
|
$searches = LanguageNameSearch::search( $search );
|
||||||
|
$result = $this->getResult();
|
||||||
|
$result->addValue( null, $this->getModuleName(), $searches );
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAllowedParams() {
|
||||||
|
return array(
|
||||||
|
'search' => array(
|
||||||
|
ApiBase::PARAM_REQUIRED => true
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getParamDescription() {
|
||||||
|
return array(
|
||||||
|
'search' => 'Search string',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDescription() {
|
||||||
|
return 'Search for language names in any script';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getExamples() {
|
||||||
|
return array(
|
||||||
|
'api.php?action=languagesearch&search=Te',
|
||||||
|
'api.php?action=languagesearch&search=ഫി',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
public function getVersion() {
|
||||||
|
return __CLASS__ . ': ' . ULS_VERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
59
data/LanguageNameIndexer.php
Normal file
59
data/LanguageNameIndexer.php
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Script to create language names index.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||||
|
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||||
|
* contributors. See CREDITS for a list.
|
||||||
|
*
|
||||||
|
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||||
|
* have to do anything special to choose one license or the other and you don't
|
||||||
|
* have to notify anyone which license you are using. You are free to use
|
||||||
|
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||||
|
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||||
|
*
|
||||||
|
* @file
|
||||||
|
* @ingroup Extensions
|
||||||
|
* @licence GNU General Public Licence 2.0 or later
|
||||||
|
* @licence MIT License
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Standard boilerplate to define $IP
|
||||||
|
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
|
||||||
|
$IP = getenv( 'MW_INSTALL_PATH' );
|
||||||
|
} else {
|
||||||
|
$dir = __DIR__;
|
||||||
|
$IP = "$dir/../../..";
|
||||||
|
}
|
||||||
|
require_once ( "$IP/maintenance/Maintenance.php" );
|
||||||
|
class LanguageNameIndexer extends Maintenance {
|
||||||
|
public function __construct() {
|
||||||
|
parent::__construct();
|
||||||
|
$this->addDescription( "Script to create language names index." );
|
||||||
|
}
|
||||||
|
|
||||||
|
public function execute() {
|
||||||
|
$languages = Language::fetchLanguageNames( null, 'all' );
|
||||||
|
$all = array();
|
||||||
|
$buckets = array();
|
||||||
|
foreach ( $languages as $code => $name ) {
|
||||||
|
$all[$code][strtolower( $name )] = true;
|
||||||
|
$langnames = LanguageNames::getNames( $code, 0, 2 );
|
||||||
|
foreach ( $langnames as $code => $name ) {
|
||||||
|
$all[$code][] = strtolower( $name );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ( $all as $code => $names ) {
|
||||||
|
foreach ( $names as $index => $name ) {
|
||||||
|
$bucket = LanguageNameSearch::getIndex( $name );
|
||||||
|
$buckets[$bucket][$name] = $code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->output( "Total buckets: " . count( $buckets ) . "\n" );
|
||||||
|
file_put_contents( 'langnames.ser', serialize( $buckets ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$maintClass = 'LanguageNameIndexer';
|
||||||
|
require_once( RUN_MAINTENANCE_IF_MAIN );
|
||||||
80
data/LanguageNameSearch.php
Normal file
80
data/LanguageNameSearch.php
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Cross-Language Language name search
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||||
|
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||||
|
* contributors. See CREDITS for a list.
|
||||||
|
*
|
||||||
|
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||||
|
* have to do anything special to choose one license or the other and you don't
|
||||||
|
* have to notify anyone which license you are using. You are free to use
|
||||||
|
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||||
|
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||||
|
*
|
||||||
|
* @file
|
||||||
|
* @ingroup Extensions
|
||||||
|
* @licence GNU General Public Licence 2.0 or later
|
||||||
|
* @licence MIT License
|
||||||
|
*/
|
||||||
|
class LanguageNameSearch {
|
||||||
|
static $languagenames;
|
||||||
|
public function init() {
|
||||||
|
self::$languagenames = unserialize( file_get_contents( __DIR__ . '/langnames.ser' ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function search( $searchKey ) {
|
||||||
|
$results = array();
|
||||||
|
if ( self::$languagenames === null ) {
|
||||||
|
self::init();
|
||||||
|
}
|
||||||
|
$bucket = self::$languagenames[self::getIndex( $searchKey )];
|
||||||
|
foreach ( $bucket as $name => $code ) {
|
||||||
|
// Prefix search
|
||||||
|
if ( strpos( $name, $searchKey, 0 ) === 0 ) {
|
||||||
|
$results[$code] = $name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $results;
|
||||||
|
}
|
||||||
|
public static function getIndex( $name ) {
|
||||||
|
$codepoint = self::getCodepoint( $name );
|
||||||
|
if ( $codepoint < 1000 ) {
|
||||||
|
$bucket = $codepoint;
|
||||||
|
} else {
|
||||||
|
$bucket = $codepoint % 1000;
|
||||||
|
}
|
||||||
|
if ( !isset( $buckets[$bucket] ) ) {
|
||||||
|
$buckets[$bucket] = array();
|
||||||
|
}
|
||||||
|
return $bucket;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Get the code point of first letter of string
|
||||||
|
*
|
||||||
|
* @return integer Code point of first letter of string
|
||||||
|
*/
|
||||||
|
static function getCodepoint( $str ) {
|
||||||
|
$unicode = array();
|
||||||
|
$values = array();
|
||||||
|
$lookingFor = 1;
|
||||||
|
for ( $i = 0; $i < strlen( $str ); $i++ ) {
|
||||||
|
$thisValue = ord( $str[$i] );
|
||||||
|
if ( $thisValue < 128 ) {
|
||||||
|
return $thisValue;
|
||||||
|
} else { // Codepoints larger than 127 are represented by multi-byte sequences,
|
||||||
|
if ( count( $values ) === 0 ) {
|
||||||
|
// 224 is the lowest non-overlong-encoded codepoint.
|
||||||
|
$lookingFor = ( $thisValue < 224 ) ? 2 : 3;
|
||||||
|
}
|
||||||
|
$values[] = $thisValue;
|
||||||
|
if ( count( $values ) === $lookingFor ) {
|
||||||
|
// Refer http://en.wikipedia.org/wiki/UTF-8#Description
|
||||||
|
$number = ( $lookingFor === 3 ) ? ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ) : ( ( $values[0] % 32 ) * 64 ) + ( $values[
|
||||||
|
1] % 64 );
|
||||||
|
return $number;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -96,7 +96,8 @@
|
|||||||
|
|
||||||
that.$languageFilter.languagefilter( {
|
that.$languageFilter.languagefilter( {
|
||||||
$target: $lcd, //$( 'ul.uls-language-filter-result' ),
|
$target: $lcd, //$( 'ul.uls-language-filter-result' ),
|
||||||
languages: that.languages
|
languages: that.languages,
|
||||||
|
searchAPI: that.options.searchAPI
|
||||||
} );
|
} );
|
||||||
|
|
||||||
// Create region selectors, one per region
|
// Create region selectors, one per region
|
||||||
@@ -174,7 +175,8 @@
|
|||||||
|
|
||||||
$.fn.uls.defaults = {
|
$.fn.uls.defaults = {
|
||||||
menu: '.uls-menu',
|
menu: '.uls-menu',
|
||||||
onSelect: null // Callback function to be called when a language is selected
|
onSelect: null, // Callback function to be called when a language is selected
|
||||||
|
searchAPI: null // Language search API
|
||||||
};
|
};
|
||||||
|
|
||||||
$.fn.uls.Constructor = ULS;
|
$.fn.uls.Constructor = ULS;
|
||||||
|
|||||||
@@ -26,7 +26,8 @@
|
|||||||
setlang : language
|
setlang : language
|
||||||
} );
|
} );
|
||||||
window.location.href = uri.toString();
|
window.location.href = uri.toString();
|
||||||
}
|
},
|
||||||
|
searchAPI: mw.util.wikiScript( 'api' ) + "?action=languagesearch"
|
||||||
} );
|
} );
|
||||||
} );
|
} );
|
||||||
} )( jQuery );
|
} )( jQuery );
|
||||||
|
|||||||
@@ -59,14 +59,27 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Also do a search to search API
|
||||||
|
if( this.options.searchAPI && query){
|
||||||
|
this.searchAPI( query );
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
render: function( langCode ) {
|
searchAPI: function( query ) {
|
||||||
|
var that = this;
|
||||||
|
$.get( that.options.searchAPI, { search: query }, function( result ) {
|
||||||
|
$.each( result['languagesearch'], function( code, name ) {
|
||||||
|
that.render( code, name );
|
||||||
|
} );
|
||||||
|
} );
|
||||||
|
},
|
||||||
|
|
||||||
|
render: function( langCode, languageName ) {
|
||||||
var $target = this.options.$target;
|
var $target = this.options.$target;
|
||||||
if ( !$target ) {
|
if ( !$target ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$target.append( langCode );
|
$target.append( langCode, null, languageName );
|
||||||
},
|
},
|
||||||
|
|
||||||
escapeRegex: function( value ) {
|
escapeRegex: function( value ) {
|
||||||
@@ -110,7 +123,8 @@
|
|||||||
$.fn.languagefilter.defaults = {
|
$.fn.languagefilter.defaults = {
|
||||||
$target: null, // Where to append the results
|
$target: null, // Where to append the results
|
||||||
languages: null, // Languages as code:name format. Default values come from data.languages.
|
languages: null, // Languages as code:name format. Default values come from data.languages.
|
||||||
clickhandler: null
|
clickhandler: null,
|
||||||
|
searchAPI: null
|
||||||
};
|
};
|
||||||
|
|
||||||
$.fn.languagefilter.Constructor = LanguageFilter;
|
$.fn.languagefilter.Constructor = LanguageFilter;
|
||||||
|
|||||||
@@ -31,27 +31,39 @@
|
|||||||
LanguageCategoryDisplay.prototype = {
|
LanguageCategoryDisplay.prototype = {
|
||||||
constructor: LanguageCategoryDisplay,
|
constructor: LanguageCategoryDisplay,
|
||||||
|
|
||||||
append: function( langCode, regionCode ) {
|
append: function( langCode, regionCode, languageName ) {
|
||||||
var that = this;
|
var that = this;
|
||||||
this.addToRegion( langCode, regionCode );
|
this.addToRegion( langCode, regionCode, languageName );
|
||||||
|
},
|
||||||
|
/**
|
||||||
|
* Check whether a language code is already displayed or not.
|
||||||
|
* @param langCode
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
exists: function( langCode ) {
|
||||||
|
return this.$element.find( 'li' ).filter(function() {
|
||||||
|
return $(this).data('code') === langCode;
|
||||||
|
} ).length > 0;
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add the language to a region.
|
* Add the language to a region.
|
||||||
* If the region parameter is given , add to that region alone
|
* If the region parameter is given , add to that region alone
|
||||||
* Otherwise to all regions that this language belongs.
|
* Otherwise to all regions that this language belongs.
|
||||||
* @param langCode
|
* @param langCode
|
||||||
* @param region
|
* @param region Optional region
|
||||||
|
* @param languageName Optional languageName
|
||||||
*/
|
*/
|
||||||
addToRegion: function( langCode, region ) {
|
addToRegion: function( langCode, region, languageName) {
|
||||||
var that = this,
|
var that = this;
|
||||||
language = that.options.languages[langCode];
|
if ( that.exists( langCode ) ) {
|
||||||
|
return;
|
||||||
var langName = $.uls.data.autonym( langCode )
|
}
|
||||||
|
var language = $.uls.data.languages[langCode],
|
||||||
|
langName = languageName
|
||||||
|
|| $.uls.data.autonym( langCode )
|
||||||
|| that.options.languages[langCode]
|
|| that.options.languages[langCode]
|
||||||
|| langCode;
|
|| langCode,
|
||||||
|
regions = [];
|
||||||
var regions = [];
|
|
||||||
if ( region ) {
|
if ( region ) {
|
||||||
regions.push( region );
|
regions.push( region );
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
59
tests/phpunit/LanguageSearchTest.php
Normal file
59
tests/phpunit/LanguageSearchTest.php
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* PHPUnit tests for UniversalLanguageSelector extension.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||||
|
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||||
|
* contributors. See CREDITS for a list.
|
||||||
|
*
|
||||||
|
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||||
|
* have to do anything special to choose one license or the other and you don't
|
||||||
|
* have to notify anyone which license you are using. You are free to use
|
||||||
|
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||||
|
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||||
|
*
|
||||||
|
* @file
|
||||||
|
* @ingroup Extensions
|
||||||
|
* @licence GNU General Public Licence 2.0 or later
|
||||||
|
* @licence MIT License
|
||||||
|
*/
|
||||||
|
require_once( __DIR__ . '/../../data/LanguageNameSearch.php' );
|
||||||
|
class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider searchDataProvider
|
||||||
|
*/
|
||||||
|
public function testSearch( $searchKey, $result ) {
|
||||||
|
$this->assertEquals( $result, LanguageNameSearch::search( $searchKey ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public function searchDataProvider() {
|
||||||
|
return array(
|
||||||
|
array( "ഹിന്ദി", array(
|
||||||
|
'hi' => 'ഹിന്ദി'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
array( "മല", array(
|
||||||
|
'ml' => "മലയാളം",
|
||||||
|
'mg' => 'മലഗാസി',
|
||||||
|
'ms' => 'മലയ',
|
||||||
|
)
|
||||||
|
),
|
||||||
|
array( "Φινλαν", array(
|
||||||
|
'fi' => 'Φινλανδικά',
|
||||||
|
)
|
||||||
|
),
|
||||||
|
array( "blah", array(
|
||||||
|
)
|
||||||
|
),
|
||||||
|
array( "الفرنسية", array(
|
||||||
|
'fr' => 'الفرنسية',
|
||||||
|
'fr-ca' => 'الفرنسية الكندية',
|
||||||
|
'fr-ch' => 'الفرنسية السويسرية',
|
||||||
|
'frm' => 'الفرنسية الوسطى',
|
||||||
|
'fro' => 'الفرنسية القديمة',
|
||||||
|
)
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user