Created script groups, introduce $.uls.data

Gave script groups clear names and sorted them.
Documentation in comments.
Add tests to check that no script has been left behind.
Corrected Lath to Latn (thanks to testing).

Rename langdb.js to ext.uls.data.js

Add ResourceLoader module ext.uls.data

Change-Id: I91fafa94ffd1eaf2d12c954fe3a71064276533f9
This commit is contained in:
Amir E. Aharoni
2012-06-27 11:04:24 +03:00
committed by Santhosh Thottingal
parent b9ee6595d3
commit 03df48a609
9 changed files with 111 additions and 15 deletions

4
data/ext.uls.data.js Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -196,7 +196,9 @@ languages:
kw: [Latn, [EU]]
ky: [Cyrl, [AS]]
la: [Latn, [EU]]
# Most identified with Turkey, Bulgaria, Greece, Spain and Israel, but also spoken in Latin America and elsewhere. Wikipedia is mostly in Latn, but also in Hebr. (comparable to az.)
# Most identified with Turkey, Bulgaria, Greece, Spain and Israel,
# but also spoken in Latin America and elsewhere.
# Wikipedia is mostly in Latn, but also in Hebr. (Comparable to az.)
lad: [Latn, [ME, EU, LA]]
lb: [Latn, [EU]]
lbe: [Cyrl, [EU]]
@@ -207,7 +209,7 @@ languages:
lij: [Latn, [EU]]
liv: [Latn, [EU]]
lmo: [Latn, [EU]]
ln: [Lath, [AF]]
ln: [Latn, [AF]]
lo: [Laoo, [AS]]
loz: [Latn, [AF]]
lt: [Latn, [EU]]
@@ -406,11 +408,39 @@ languages:
zh-yue: [Hans, [AS]]
zu: [Latn, [AF]]
# http://unicode.org/iso15924/iso15924-codes.html
# The codes are taken from http://unicode.org/iso15924/iso15924-codes.html .
#
# The classification is roughly based on http://www.unicode.org/charts/
# with some practical corrections.
scriptgroups:
Latn: 1
Cyrl: 2
Arab: 3
# Other is reserved
# Large groups, one script in each
Cyrillic: [Cyrl]
Arabic: [Arab]
# It's probalby different enough from Latin and Cyrillic, but user testing
# may prove otherwise.
Greek: [Grek]
# Couldn't find a better place for Goth.
Latin: [Latn, Goth]
WestCaucasian: [Armn, Geor]
# Maybe it can be unified with Arabic.
# Maybe Thaana can be moved here from SouthAsian.
# Maybe it can be unified with African.
MiddleEastern: [Hebr, Syrc]
African: [Ethi, Tfng]
# India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives.
#
# Thaana (Thaa, the script of Maldives) is here, even though it's RTL,
# because it's closer geographically to India. Maybe it should be moved
# to MiddleEastern or to Arabic, if that would be easier to users.
#
# Tibetan (Tibt) is here, even though it's classified as "Central Asian" by
# Unicode, because linguistically and geographically it's closely related to
# the Brahmic family.
SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Sinh, Taml, Telu, Tibt, Thaa]
SouthEastAsian: [Bugi, Java, Khmr, Laoo, Mymr, Thai]
CJK: [Hans, Hant, Kore, Jpan, Yiii]
NativeAmerican: [Cher, Cans]
regiongroups:
# north-america

View File

@@ -1,8 +1,13 @@
<?php
/**
* Script to create the language data in json format for ULS
*/
include __DIR__ . '/spyc.php';
$data = file_get_contents( 'langdb.yaml' );
$parsed = spyc_load( $data );
$json = json_encode( $parsed );
$js = "window.langdb = $json;";
file_put_contents( 'langdb.js', $js );
$js = "( function ( $ ) {\n"
."\t$.uls = {};\n"
."\t$.uls.data = $json;\n"
."} )( jQuery );\n";
file_put_contents( 'ext.uls.data.js', $js );