Cross-language language name search
Implementation of Also Written As language name search algorithm. See http://etherpad.wikimedia.org/l10n-uls-language-search Change-Id: Iff84408c531b650a44d031b63d5c823737cceafc
This commit is contained in:
@@ -22,6 +22,10 @@ if ( !defined( 'MEDIAWIKI' ) ) {
|
||||
echo( "This file is an extension to the MediaWiki software and cannot be used standalone.\n" );
|
||||
die( -1 );
|
||||
}
|
||||
/**
|
||||
* Version number used in extension credits and in other placed where needed.
|
||||
*/
|
||||
define( 'ULS_VERSION', '2012-07-20' );
|
||||
|
||||
$wgExtensionCredits['other'][] = array(
|
||||
'path' => __FILE__,
|
||||
@@ -48,11 +52,14 @@ $wgExtensionMessagesFiles['UniversalLanguageSelector'] = "$dir/UniversalLanguage
|
||||
|
||||
// Register auto load for the page class
|
||||
$wgAutoloadClasses['UniversalLanguageSelectorHooks'] = "$dir/UniversalLanguageSelector.hooks.php";
|
||||
$wgAutoloadClasses['ApiLanguageSearch'] = "$dir/api/ApiLanguageSearch.php";
|
||||
$wgAutoloadClasses['LanguageNameSearch'] = "$dir/data/LanguageNameSearch.php";
|
||||
|
||||
$wgHooks['BeforePageDisplay'][] = 'UniversalLanguageSelectorHooks::addModules';
|
||||
$wgHooks['PersonalUrls'][] = 'UniversalLanguageSelectorHooks::addTrigger';
|
||||
$wgHooks['SkinAfterContent'][] = 'UniversalLanguageSelectorHooks::addTemplate';
|
||||
$wgHooks['ResourceLoaderTestModules'][] = 'UniversalLanguageSelectorHooks::addTestModules';
|
||||
$wgAPIModules['languagesearch'] = 'ApiLanguageSearch';
|
||||
|
||||
$wgResourceModules['ext.uls.init'] = array(
|
||||
'scripts' => 'resources/ext.uls.init.js',
|
||||
|
||||
66
api/ApiLanguageSearch.php
Normal file
66
api/ApiLanguageSearch.php
Normal file
@@ -0,0 +1,66 @@
|
||||
<?php
|
||||
/**
|
||||
* Language name search API
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||
* have to do anything special to choose one license or the other and you don't
|
||||
* have to notify anyone which license you are using. You are free to use
|
||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @licence GNU General Public Licence 2.0 or later
|
||||
* @licence MIT License
|
||||
*/
|
||||
|
||||
/**
|
||||
* @ingroup API
|
||||
*/
|
||||
class ApiLanguageSearch extends ApiBase {
|
||||
|
||||
public function getCustomPrinter() {
|
||||
return $this->getMain()->createPrinterByName( 'json' );
|
||||
}
|
||||
|
||||
public function execute() {
|
||||
$params = $this->extractRequestParams();
|
||||
$search = $params['search'];
|
||||
$searches = LanguageNameSearch::search( $search );
|
||||
$result = $this->getResult();
|
||||
$result->addValue( null, $this->getModuleName(), $searches );
|
||||
}
|
||||
|
||||
public function getAllowedParams() {
|
||||
return array(
|
||||
'search' => array(
|
||||
ApiBase::PARAM_REQUIRED => true
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
public function getParamDescription() {
|
||||
return array(
|
||||
'search' => 'Search string',
|
||||
);
|
||||
}
|
||||
|
||||
public function getDescription() {
|
||||
return 'Search for language names in any script';
|
||||
}
|
||||
|
||||
public function getExamples() {
|
||||
return array(
|
||||
'api.php?action=languagesearch&search=Te',
|
||||
'api.php?action=languagesearch&search=ഫി',
|
||||
);
|
||||
}
|
||||
public function getVersion() {
|
||||
return __CLASS__ . ': ' . ULS_VERSION;
|
||||
}
|
||||
|
||||
}
|
||||
59
data/LanguageNameIndexer.php
Normal file
59
data/LanguageNameIndexer.php
Normal file
@@ -0,0 +1,59 @@
|
||||
<?php
|
||||
/**
|
||||
* Script to create language names index.
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||
* have to do anything special to choose one license or the other and you don't
|
||||
* have to notify anyone which license you are using. You are free to use
|
||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @licence GNU General Public Licence 2.0 or later
|
||||
* @licence MIT License
|
||||
*/
|
||||
|
||||
// Standard boilerplate to define $IP
|
||||
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
|
||||
$IP = getenv( 'MW_INSTALL_PATH' );
|
||||
} else {
|
||||
$dir = __DIR__;
|
||||
$IP = "$dir/../../..";
|
||||
}
|
||||
require_once ( "$IP/maintenance/Maintenance.php" );
|
||||
class LanguageNameIndexer extends Maintenance {
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->addDescription( "Script to create language names index." );
|
||||
}
|
||||
|
||||
public function execute() {
|
||||
$languages = Language::fetchLanguageNames( null, 'all' );
|
||||
$all = array();
|
||||
$buckets = array();
|
||||
foreach ( $languages as $code => $name ) {
|
||||
$all[$code][strtolower( $name )] = true;
|
||||
$langnames = LanguageNames::getNames( $code, 0, 2 );
|
||||
foreach ( $langnames as $code => $name ) {
|
||||
$all[$code][] = strtolower( $name );
|
||||
}
|
||||
}
|
||||
|
||||
foreach ( $all as $code => $names ) {
|
||||
foreach ( $names as $index => $name ) {
|
||||
$bucket = LanguageNameSearch::getIndex( $name );
|
||||
$buckets[$bucket][$name] = $code;
|
||||
}
|
||||
}
|
||||
$this->output( "Total buckets: " . count( $buckets ) . "\n" );
|
||||
file_put_contents( 'langnames.ser', serialize( $buckets ) );
|
||||
}
|
||||
}
|
||||
|
||||
$maintClass = 'LanguageNameIndexer';
|
||||
require_once( RUN_MAINTENANCE_IF_MAIN );
|
||||
80
data/LanguageNameSearch.php
Normal file
80
data/LanguageNameSearch.php
Normal file
@@ -0,0 +1,80 @@
|
||||
<?php
|
||||
/**
|
||||
* Cross-Language Language name search
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||
* have to do anything special to choose one license or the other and you don't
|
||||
* have to notify anyone which license you are using. You are free to use
|
||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @licence GNU General Public Licence 2.0 or later
|
||||
* @licence MIT License
|
||||
*/
|
||||
class LanguageNameSearch {
|
||||
static $languagenames;
|
||||
public function init() {
|
||||
self::$languagenames = unserialize( file_get_contents( __DIR__ . '/langnames.ser' ) );
|
||||
}
|
||||
|
||||
public static function search( $searchKey ) {
|
||||
$results = array();
|
||||
if ( self::$languagenames === null ) {
|
||||
self::init();
|
||||
}
|
||||
$bucket = self::$languagenames[self::getIndex( $searchKey )];
|
||||
foreach ( $bucket as $name => $code ) {
|
||||
// Prefix search
|
||||
if ( strpos( $name, $searchKey, 0 ) === 0 ) {
|
||||
$results[$code] = $name;
|
||||
}
|
||||
}
|
||||
return $results;
|
||||
}
|
||||
public static function getIndex( $name ) {
|
||||
$codepoint = self::getCodepoint( $name );
|
||||
if ( $codepoint < 1000 ) {
|
||||
$bucket = $codepoint;
|
||||
} else {
|
||||
$bucket = $codepoint % 1000;
|
||||
}
|
||||
if ( !isset( $buckets[$bucket] ) ) {
|
||||
$buckets[$bucket] = array();
|
||||
}
|
||||
return $bucket;
|
||||
}
|
||||
/**
|
||||
* Get the code point of first letter of string
|
||||
*
|
||||
* @return integer Code point of first letter of string
|
||||
*/
|
||||
static function getCodepoint( $str ) {
|
||||
$unicode = array();
|
||||
$values = array();
|
||||
$lookingFor = 1;
|
||||
for ( $i = 0; $i < strlen( $str ); $i++ ) {
|
||||
$thisValue = ord( $str[$i] );
|
||||
if ( $thisValue < 128 ) {
|
||||
return $thisValue;
|
||||
} else { // Codepoints larger than 127 are represented by multi-byte sequences,
|
||||
if ( count( $values ) === 0 ) {
|
||||
// 224 is the lowest non-overlong-encoded codepoint.
|
||||
$lookingFor = ( $thisValue < 224 ) ? 2 : 3;
|
||||
}
|
||||
$values[] = $thisValue;
|
||||
if ( count( $values ) === $lookingFor ) {
|
||||
// Refer http://en.wikipedia.org/wiki/UTF-8#Description
|
||||
$number = ( $lookingFor === 3 ) ? ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ) : ( ( $values[0] % 32 ) * 64 ) + ( $values[
|
||||
1] % 64 );
|
||||
return $number;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +96,8 @@
|
||||
|
||||
that.$languageFilter.languagefilter( {
|
||||
$target: $lcd, //$( 'ul.uls-language-filter-result' ),
|
||||
languages: that.languages
|
||||
languages: that.languages,
|
||||
searchAPI: that.options.searchAPI
|
||||
} );
|
||||
|
||||
// Create region selectors, one per region
|
||||
@@ -174,7 +175,8 @@
|
||||
|
||||
$.fn.uls.defaults = {
|
||||
menu: '.uls-menu',
|
||||
onSelect: null // Callback function to be called when a language is selected
|
||||
onSelect: null, // Callback function to be called when a language is selected
|
||||
searchAPI: null // Language search API
|
||||
};
|
||||
|
||||
$.fn.uls.Constructor = ULS;
|
||||
|
||||
@@ -26,7 +26,8 @@
|
||||
setlang : language
|
||||
} );
|
||||
window.location.href = uri.toString();
|
||||
}
|
||||
},
|
||||
searchAPI: mw.util.wikiScript( 'api' ) + "?action=languagesearch"
|
||||
} );
|
||||
} );
|
||||
} )( jQuery );
|
||||
|
||||
@@ -59,14 +59,27 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also do a search to search API
|
||||
if( this.options.searchAPI && query){
|
||||
this.searchAPI( query );
|
||||
}
|
||||
},
|
||||
|
||||
render: function( langCode ) {
|
||||
searchAPI: function( query ) {
|
||||
var that = this;
|
||||
$.get( that.options.searchAPI, { search: query }, function( result ) {
|
||||
$.each( result['languagesearch'], function( code, name ) {
|
||||
that.render( code, name );
|
||||
} );
|
||||
} );
|
||||
},
|
||||
|
||||
render: function( langCode, languageName ) {
|
||||
var $target = this.options.$target;
|
||||
if ( !$target ) {
|
||||
return;
|
||||
}
|
||||
$target.append( langCode );
|
||||
$target.append( langCode, null, languageName );
|
||||
},
|
||||
|
||||
escapeRegex: function( value ) {
|
||||
@@ -110,7 +123,8 @@
|
||||
$.fn.languagefilter.defaults = {
|
||||
$target: null, // Where to append the results
|
||||
languages: null, // Languages as code:name format. Default values come from data.languages.
|
||||
clickhandler: null
|
||||
clickhandler: null,
|
||||
searchAPI: null
|
||||
};
|
||||
|
||||
$.fn.languagefilter.Constructor = LanguageFilter;
|
||||
|
||||
@@ -31,27 +31,39 @@
|
||||
LanguageCategoryDisplay.prototype = {
|
||||
constructor: LanguageCategoryDisplay,
|
||||
|
||||
append: function( langCode, regionCode ) {
|
||||
append: function( langCode, regionCode, languageName ) {
|
||||
var that = this;
|
||||
this.addToRegion( langCode, regionCode );
|
||||
this.addToRegion( langCode, regionCode, languageName );
|
||||
},
|
||||
/**
|
||||
* Check whether a language code is already displayed or not.
|
||||
* @param langCode
|
||||
* @return boolean
|
||||
*/
|
||||
exists: function( langCode ) {
|
||||
return this.$element.find( 'li' ).filter(function() {
|
||||
return $(this).data('code') === langCode;
|
||||
} ).length > 0;
|
||||
},
|
||||
|
||||
/**
|
||||
* Add the language to a region.
|
||||
* If the region parameter is given , add to that region alone
|
||||
* Otherwise to all regions that this language belongs.
|
||||
* @param langCode
|
||||
* @param region
|
||||
* @param region Optional region
|
||||
* @param languageName Optional languageName
|
||||
*/
|
||||
addToRegion: function( langCode, region ) {
|
||||
var that = this,
|
||||
language = that.options.languages[langCode];
|
||||
|
||||
var langName = $.uls.data.autonym( langCode )
|
||||
addToRegion: function( langCode, region, languageName) {
|
||||
var that = this;
|
||||
if ( that.exists( langCode ) ) {
|
||||
return;
|
||||
}
|
||||
var language = $.uls.data.languages[langCode],
|
||||
langName = languageName
|
||||
|| $.uls.data.autonym( langCode )
|
||||
|| that.options.languages[langCode]
|
||||
|| langCode;
|
||||
|
||||
var regions = [];
|
||||
|| langCode,
|
||||
regions = [];
|
||||
if ( region ) {
|
||||
regions.push( region );
|
||||
} else {
|
||||
|
||||
59
tests/phpunit/LanguageSearchTest.php
Normal file
59
tests/phpunit/LanguageSearchTest.php
Normal file
@@ -0,0 +1,59 @@
|
||||
<?php
|
||||
/**
|
||||
* PHPUnit tests for UniversalLanguageSelector extension.
|
||||
*
|
||||
* Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
|
||||
* Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
|
||||
* contributors. See CREDITS for a list.
|
||||
*
|
||||
* UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You don't
|
||||
* have to do anything special to choose one license or the other and you don't
|
||||
* have to notify anyone which license you are using. You are free to use
|
||||
* UniversalLanguageSelector in commercial projects as long as the copyright
|
||||
* header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
|
||||
*
|
||||
* @file
|
||||
* @ingroup Extensions
|
||||
* @licence GNU General Public Licence 2.0 or later
|
||||
* @licence MIT License
|
||||
*/
|
||||
require_once( __DIR__ . '/../../data/LanguageNameSearch.php' );
|
||||
class LanguageSearchTest extends PHPUnit_Framework_TestCase {
|
||||
|
||||
/**
|
||||
* @dataProvider searchDataProvider
|
||||
*/
|
||||
public function testSearch( $searchKey, $result ) {
|
||||
$this->assertEquals( $result, LanguageNameSearch::search( $searchKey ) );
|
||||
}
|
||||
|
||||
public function searchDataProvider() {
|
||||
return array(
|
||||
array( "ഹിന്ദി", array(
|
||||
'hi' => 'ഹിന്ദി'
|
||||
)
|
||||
),
|
||||
array( "മല", array(
|
||||
'ml' => "മലയാളം",
|
||||
'mg' => 'മലഗാസി',
|
||||
'ms' => 'മലയ',
|
||||
)
|
||||
),
|
||||
array( "Φινλαν", array(
|
||||
'fi' => 'Φινλανδικά',
|
||||
)
|
||||
),
|
||||
array( "blah", array(
|
||||
)
|
||||
),
|
||||
array( "الفرنسية", array(
|
||||
'fr' => 'الفرنسية',
|
||||
'fr-ca' => 'الفرنسية الكندية',
|
||||
'fr-ch' => 'الفرنسية السويسرية',
|
||||
'frm' => 'الفرنسية الوسطى',
|
||||
'fro' => 'الفرنسية القديمة',
|
||||
)
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user