From 9ed4cff14082847c63346948ee69ce627144128f Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Tue, 1 Aug 2017 13:49:03 +0530 Subject: [PATCH] first commit --- .eslintrc.json | 18 + GPL-LICENSE | 342 +++ MIT-LICENSE | 22 + README.md | 10 + data/langdb.yaml | 631 +++++ data/spyc.php | 1046 ++++++++ data/ulsdata2json.php | 118 + index.js | 261 ++ language-data.json | 5282 +++++++++++++++++++++++++++++++++++++++++ package.json | 41 + test/index.js | 137 ++ 11 files changed, 7908 insertions(+) create mode 100644 .eslintrc.json create mode 100644 GPL-LICENSE create mode 100644 MIT-LICENSE create mode 100644 README.md create mode 100644 data/langdb.yaml create mode 100644 data/spyc.php create mode 100644 data/ulsdata2json.php create mode 100644 index.js create mode 100644 language-data.json create mode 100644 package.json create mode 100644 test/index.js diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..12375f6 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,18 @@ +{ + "extends": "wikimedia", + "env": { + "es6": true, + "browser": true, + "jquery": true, + "qunit": true, + "node": true, + "mocha": true + }, + "globals": { + "require": false + }, + "rules": { + "dot-notation": 0, + "wrap-iife": 0 + } +} diff --git a/GPL-LICENSE b/GPL-LICENSE new file mode 100644 index 0000000..019694a --- /dev/null +++ b/GPL-LICENSE @@ -0,0 +1,342 @@ +== GNU GENERAL PUBLIC LICENSE == + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + +=== Preamble === + +The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + +We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + +Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and +modification follow. + +== TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION == + +'''0.''' This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + +'''1.''' You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + +'''2.''' You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + '''a)''' You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + '''b)''' You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + '''c)''' If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + +'''3.''' You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + '''a)''' Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + '''b)''' Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + '''c)''' Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + +'''4.''' You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + +'''5.''' You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +'''6.''' Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + +'''7.''' If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + +'''8.''' If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + +'''9.''' The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + +'''10.''' If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + +=== NO WARRANTY === + +'''11.''' BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + +'''12.''' IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + '''END OF TERMS AND CONDITIONS''' + +== How to Apply These Terms to Your New Programs == + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/MIT-LICENSE b/MIT-LICENSE new file mode 100644 index 0000000..8a9a70a --- /dev/null +++ b/MIT-LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris, +Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other +contributors. See CREDITS for a list. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5e39186 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +CLDR based language data and utilities +====================================== + +The language data with following details are populated from the current version of [CLDR supplemental data](http://unicode.org/repos/cldr/trunk/common/supplemental/supplementalData.xml) +1. The script in which a language is written. +2. The script code +3. The language code +4. The regions in which the language is spoken +5. The autonym - language name written in its own script +6. The directionality of the text diff --git a/data/langdb.yaml b/data/langdb.yaml new file mode 100644 index 0000000..e9326fc --- /dev/null +++ b/data/langdb.yaml @@ -0,0 +1,631 @@ +languages: + aa: [Latn, [AF], Qafár af] + ab: [Cyrl, [EU], Аҧсшәа] + abs: [Latn, [AS], Bahasa Ambon] + ace: [Latn, [AS, PA], Acèh] + acf: [Latn, [AM], kwéyòl] + ady: [Cyrl, [EU, ME], Адыгабзэ] + ady-cyrl: [ady] + ady-latn: [Latn, [EU, ME], Adygabze] + aeb: [aeb-arab] + aeb-arab: [Arab, [AF], تونسي] + aeb-latn: [Latn, [AF], Tûnsî] + af: [Latn, [AF], Afrikaans] + ahr: [Deva, [AS], अहिराणी] + ak: [Latn, [AF], Akan] + akz: [Latn, [AM], Albaamo innaaɬiilka] + aln: [Latn, [EU], Gegë] + am: [Ethi, [AF], አማርኛ] + an: [Latn, [EU], aragonés] + ang: [Latn, [EU], Ænglisc] + anp: [Deva, [AS], अङ्गिका] + ar: [Arab, [ME], العربية] + arc: [Syrc, [ME], ܐܪܡܝܐ] + arn: [Latn, [AM], mapudungun] + aro: [Latn, [AM], Araona] + arq: [Arab, [AF], جازايرية] + ary: [Latn, [ME], Maġribi] + arz: [Arab, [ME], مصرى] + as: [Beng, [AS], অসমীয়া] + ase: [Sgnw, [AM], American sign language] + ast: [Latn, [EU], asturianu] + atj: [Latn, [AM], atikamekw] + av: [Cyrl, [EU], авар] + avk: [Latn, [WW], Kotava] + ay: [Latn, [AM], Aymar aru] + # The Latin script is the default in the North Azerbaijani Wikipedia, + # which uses the macro code 'az'. Its own standard code is 'azj'. + # The Arabic script is the default in the South Azerbaijani Wikipedia, + # which correctly uses the code 'azb'. + # CLDR uses az-latn and az-arab. + az: [az-latn] + az-arab: [Arab, [AS, ME], تۆرکجه] + az-latn: [Latn, [EU, ME], azərbaycanca] + az-cyrl: [Cyrl, [EU, ME], азәрбајҹанҹа] + azb: [az-arab] + azj: [az-latn] + ba: [Cyrl, [EU], башҡортса] + ban: [Bali, [AS], ᬩᬲᬩᬮᬶ] + bar: [Latn, [EU], Boarisch] + bat-smg: [sgs] + bbc-latn: [Latn, [AS], Batak Toba] + # FIXME - autonym in the Batak script is unknown + bbc-batk: [Batk, [AS], Batak Toba] + bbc: [Latn, [AS], Batak Toba] + bcc: [Arab, [AS, ME], جهلسری بلوچی] + bcl: [Latn, [AS], Bikol Central] + be-tarask: [Cyrl, [EU], беларуская (тарашкевіца)] + be-x-old: [be-tarask] + be: [Cyrl, [EU], беларуская] + bew: [Latn, [AS], Bahasa Betawi] + bfa: [Latn, [AF], Bari] + bft: [Arab, [AS], بلتی] + bfq: [Taml, [AS], படகா] + bg: [Cyrl, [EU], български] + bgn: [Arab, [AS, ME], روچ کپتین بلوچی] + # FIXME - currently says Bhojpuri, but it's a macrolanguage. + bh: [bho] + bho: [Deva, [AS], भोजपुरी] + bi: [Latn, [PA], Bislama] + bjn: [Latn, [AS], Bahasa Banjar] + bm: [Latn, [AF], bamanankan] + bn: [Beng, [AS], বাংলা] + bo: [Tibt, [AS], བོད་ཡིག] + bpy: [Beng, [AS], বিষ্ণুপ্রিয়া মণিপুরী] + bqi: [Arab, [AS, ME], بختیاری] + br: [Latn, [EU], brezhoneg] + brh: [Latn, [ME, AS], Bráhuí] + brx: [Deva, [AS], बड़ो] + bs: [Latn, [EU], bosanski] + bto: [Latn, [AS], Iriga Bicolano] + bug: [Bugi, [AS], ᨅᨔ ᨕᨘᨁᨗ] + bxr: [Cyrl, [AS], буряад] + ca: [Latn, [EU], català] + cbk-zam: [Latn, [AS], Chavacano de Zamboanga] + cdo: [Latn, [AS], Mìng-dĕ̤ng-ngṳ̄] + ce: [Cyrl, [EU], нохчийн] + ceb: [Latn, [AS], Cebuano] + ch: [Latn, [PA], Chamoru] + # FIXME chm is actually a macro language that includes + # mhr and mrj, but CLDR Territory-Language information + # uses chm instead of mhr, so for practical reasons + # it should redirect there. + # A better fix would be this: + # https://phabricator.wikimedia.org/T136164 + chm: [mhr] + cho: [Latn, [AM], Choctaw] + chr: [Cher, [AM], ᏣᎳᎩ] + chy: [Latn, [AM], Tsetsêhestâhese] + ciw: [Latn, [AM], Ojibwemowin] + cjy: [cjy-hant] + cjy-hans: [Hans, [AS], 晋语(简化字)] + cjy-hant: [Hant, [AS], 晉語] + ckb: [Arab, [ME], کوردیی ناوەندی] + cnh: [Latn, [AS], Lai holh] + co: [Latn, [EU], corsu] + cps: [Latn, [AS], Capiceño] + cr: [Cans, [AM], ᓀᐦᐃᔭᐍᐏᐣ] + cr-cans: [cr] + cr-latn: [Latn, [AM], Nēhiyawēwin] + crh: [Latn, [EU], qırımtatarca] + crh-cyrl: [Cyrl, [EU], къырымтатарджа] + crh-latn: [crh] + cs: [Latn, [EU], čeština] + csb: [Latn, [EU], kaszëbsczi] + # FIXME: which script to prefer?.. + cu: [Cyrl, [EU], словѣньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ] + cv: [Cyrl, [EU], Чӑвашла] + cy: [Latn, [EU], Cymraeg] + da: [Latn, [EU], dansk] + de-at: [Latn, [EU], Österreichisches Deutsch] + de-ch: [Latn, [EU], Schweizer Hochdeutsch] + de-formal: [Latn, [EU], Deutsch (Sie-Form)] + de: [Latn, [EU], Deutsch] + din: [Latn, [AF], Thuɔŋjäŋ] + diq: [Latn, [EU, AS], Zazaki] + dsb: [Latn, [EU], dolnoserbski] + dtp: [Latn, [AS], Dusun Bundu-liwan] + dty: [Deva, [AS], डोटेली] + dv: [Thaa, [AS], ދިވެހިބަސް] + dz: [Tibt, [AS], ཇོང་ཁ] + ee: [Latn, [AF], eʋegbe] + egl: [Latn, [EU], Emiliàn] + el: [Grek, [EU], Ελληνικά] + eml: [Latn, [EU], emiliàn e rumagnòl] + en-ca: [Latn, [AM], Canadian English] + en-gb: [Latn, [EU, AS, PA], British English] + en: [Latn, [EU, AM, AF, ME, AS, PA, WW], English] + eo: [Latn, [WW], Esperanto] + es-419: [Latn, [AM], español de America Latina] + # world? + es-formal: [Latn, [EU, AM, AF, WW], español (formal)] + # world? + es: [Latn, [EU, AM, AF, WW], español] + esu: [Latn, [AM], "Yup'ik"] + et: [Latn, [EU], eesti] + eu: [Latn, [EU], euskara] + ext: [Latn, [EU], estremeñu] + fa: [Arab, [AS, ME], فارسی] + fax: [Latn, [EU], Fala] + ff: [Latn, [AF], Fulfulde] + fi: [Latn, [EU], suomi] + fil: [tl] + fit: [Latn, [EU], meänkieli] + fiu-vro: [vro] + fj: [Latn, [PA], Na Vosa Vakaviti] + fo: [Latn, [EU], føroyskt] + fr: [Latn, [EU, AM, WW], français] + frc: [Latn, [AM], français cadien] + frp: [Latn, [EU], arpetan] + frr: [Latn, [EU], Nordfriisk] + fur: [Latn, [EU], furlan] + fy: [Latn, [EU], Frysk] + ga: [Latn, [EU], Gaeilge] + gag: [Latn, [EU], Gagauz] + gah: [Latn, [AS], Alekano] + gan-hans: [Hans, [AS], 赣语(简体)] + gan-hant: [gan] + gan: [Hant, [AS], 贛語] + gbz: [Latn, [AS], Dari-e Mazdeyasnā] + gcf: [Latn, [AM], Guadeloupean Creole French] + gd: [Latn, [EU], Gàidhlig] + gl: [Latn, [EU], galego] + glk: [Arab, [AS, ME], گیلکی] + gn: [Latn, [AM], "Avañe'ẽ"] + gom: [gom-deva] + gom-deva: [Deva, [AS], गोवा कोंकणी] + gom-latn: [Latn, [AS], Gova Konknni] + got: [Goth, [EU], 𐌲𐌿𐍄𐌹𐍃𐌺] + grc: [Grek, [EU], Ἀρχαία ἑλληνικὴ] + gsw: [Latn, [EU], Alemannisch] + gu: [Gujr, [AS], ગુજરાતી] + guc: [Latn, [AM], Wayúu] + gur: [Latn, [AF], Gurenɛ] + gv: [Latn, [EU], Gaelg] + # CLDR uses ha-latn and ha-arab. Latin is more common and is used in Wikipedia. + ha-arab: [Arab, [AF], هَوُسَ] + ha-latn: [Latn, [AF], Hausa] + ha: [ha-latn] + hak: [Latn, [AS], Hak-kâ-fa] + haw: [Latn, [AM, PA], Hawai`i] + he: [Hebr, [ME], עברית] + hi: [Deva, [AS], हिन्दी] + hif: [Latn, [PA, AS], Fiji Hindi] + hif-deva: [Deva, [AS], फ़ीजी हिन्दी] + hif-latn: [hif] + hil: [Latn, [AS], Ilonggo] + hne: [Deva, [AS], छत्तीसगढ़ी] + ho: [Latn, [PA], Hiri Motu] + hr: [Latn, [EU], hrvatski] + hrx: [Latn, [AM], Hunsrik] + hsb: [Latn, [EU], hornjoserbsce] + hsn: [Hans, [AS], 湘语] + ht: [Latn, [AM], Kreyòl ayisyen] + hu-formal: [Latn, [EU], Magyar (magázó)] + hu: [Latn, [EU], magyar] + hy: [Armn, [EU, ME], Հայերեն] + hz: [Latn, [AF], Otsiherero] + ia: [Latn, [WW], interlingua] + id: [Latn, [AS], Bahasa Indonesia] + ie: [Latn, [WW], Interlingue] + ig: [Latn, [AF], Igbo] + ii: [Yiii, [AS], ꆇꉙ] + ik: [Latn, [AM], Iñupiak] + ike-cans: [Cans, [AM], ᐃᓄᒃᑎᑐᑦ] + ike-latn: [Latn, [AM], inuktitut] + ilo: [Latn, [AS], Ilokano] + inh: [Cyrl, [EU], ГӀалгӀай] + io: [Latn, [WW], Ido] + is: [Latn, [EU], íslenska] + it: [Latn, [EU], italiano] + # For variants ike-* is used + iu: [Cans, [AM], ᐃᓄᒃᑎᑐᑦ] + ja: [Jpan, [AS], 日本語] + jam: [Latn, [AM], Patois] + jbo: [Latn, [WW], lojban] + jdt: [jdt-cyrl] + jdt-cyrl: [Cyrl, [EU, AS], жугьури] + jut: [Latn, [EU], jysk] + jv: [Latn, [AS, PA], Basa Jawa] + # For support in webfonts. + jv-java: [Java, [AS, PA], ꦧꦱꦗꦮ] + ka: [Geor, [EU], ქართული] + kaa: [Latn, [AS], Qaraqalpaqsha] + # Can also be Tfng, but the Wikipedia is mostly Latn + kab: [Latn, [AF, EU], Taqbaylit] + kac: [Latn, [AS], Jinghpaw] + kbd-cyrl: [kbd] + kbd-latn: [Latn, [EU], Qabardjajəbza] + kbd: [Cyrl, [EU, ME], Адыгэбзэ] + kbp: [Latn, [AF], Kabɩyɛ] + kea: [Latn, [AF], Kabuverdianu] + kg: [Latn, [AF], Kongo] + kgp: [Latn, [AM], Kaingáng] + khw: [Arab, [ME, AS], کھوار] + ki: [Latn, [AF], Gĩkũyũ] + kiu: [Latn, [EU, ME], Kırmancki] + kj: [Latn, [AF], Kwanyama] + kjh: [Cyrl, [AS], хакас] + kk: [kk-cyrl] + kk-arab: [Arab, [EU, AS], قازاقشا (تٶتە)] + kk-cn: [kk-arab] + kk-cyrl: [Cyrl, [EU, AS], қазақша] + kk-kz: [kk-cyrl] + kk-latn: [Latn, [EU, AS, ME], qazaqşa] + kk-tr: [kk-latn] + kl: [Latn, [AM, EU], kalaallisut] + km: [Khmr, [AS], ភាសាខ្មែរ] + kn: [Knda, [AS], ಕನ್ನಡ] + knn: [Deva, [AS], महाराष्ट्रीय कोंकणी] + # Here Hang may be even more appropriate, because kp has more resistance to Han + ko-kp: [Kore, [AS], 한국어 (조선)] + # Kore is an alias for Hangul+Han. Maybe Hang is more appropriate? + ko: [Kore, [AS], 한국어] + koi: [Cyrl, [EU], Перем Коми] + kr: [Latn, [AF], Kanuri] + krc: [Cyrl, [EU], къарачай-малкъар] + kri: [Latn, [AF], Krio] + krj: [Latn, [ME, EU], Kinaray-a] + krl: [Latn, [EU], Karjala] + ks-arab: [Arab, [AS], کٲشُر] + ks-deva: [Deva, [AS], कॉशुर] + ks: [Arab, [AS], کٲشُر] + ksf: [Latn, [AF], Bafia] + ksh: [Latn, [EU], Ripoarisch] + # CLDR uses ku-latn and ku-arab. Latin is more common and is used in Wikipedia. + ku: [ku-latn] + ku-arab: [Arab, [EU, ME], كوردي] + ku-latn: [Latn, [EU, ME], Kurdî] + kv: [Cyrl, [EU], коми] + kw: [Latn, [EU], kernowek] + ky: [Cyrl, [AS], Кыргызча] + la: [Latn, [EU], Latina] + lad: [lad-latn] + lad-latn: [Latn, [ME, EU, AM], Ladino] + lad-hebr: [Hebr, [ME, EU, AM], לאדינו] + lb: [Latn, [EU], Lëtzebuergesch] + lbe: [Cyrl, [EU], лакку] + lez: [Cyrl, [EU], лезги] + lfn: [Latn, [WW], Lingua Franca Nova] + lg: [Latn, [AF], Luganda] + li: [Latn, [EU], Limburgs] + lij: [Latn, [EU], Ligure] + liv: [Latn, [EU], Līvõ kēļ] + lki: [Arab, [AS, ME], لەکی‎] + lkt: [Latn, [AM], Lakȟótiyapi] + lld: [Latn, [EU], Ladin] + lmo: [Latn, [EU], lumbaart] + ln: [Latn, [AF], lingála] + lo: [Laoo, [AS], ລາວ] + loz: [Latn, [AF], Silozi] + lt: [Latn, [EU], lietuvių] + lrc: [Arab, [AS, ME], لۊری شومالی] + ltg: [Latn, [EU], latgaļu] + lud: [Latn, [EU], lüüdi] + lus: [Latn, [AS], Mizo ţawng] + lut: [Latn, [AM], dxʷləšucid] + luz: [Arab, [ME], لئری دوٙمینی] + lv: [Latn, [EU], latviešu] + lzh: [Hant, [AS], 文言] + # Also Geor, but the incubator is in Latn + lzz: [Latn, [EU, ME], Lazuri] + mai: [Deva, [AS], मैथिली] + map-bms: [Latn, [AS], Basa Banyumasan] + mdf: [Cyrl, [EU], мокшень] + mfe: [Latn, [AM], Morisyen] + mg: [Latn, [AF], Malagasy] + mh: [Latn, [PA], Ebon] + mhr: [Cyrl, [EU], олык марий] + mi: [Latn, [PA], Māori] + mic: [Latn, [AM], "Mi'kmaq"] + min: [Latn, [AS], Baso Minangkabau] + miq: [Latn, [AM], Mískitu] + mk: [Cyrl, [EU], македонски] + ml: [Mlym, [AS, ME], മലയാളം] + # Hmm, can also have Mong some day in some way + mn: [Cyrl, [AS], монгол] + mnc: [Mong, [AS], ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ] + mni: [Beng, [AS], মেইতেই লোন্] + mnw: [Mymr, [AS], ဘာသာ မန်] + mo: [Cyrl, [EU], молдовеняскэ] + mr: [Deva, [AS, ME], मराठी] + mrj: [Cyrl, [EU], кырык мары] + ms: [Latn, [AS], Bahasa Melayu] + mt: [Latn, [EU], Malti] + mui: [Latn, [AS], Musi] + mus: [Latn, [AM], Mvskoke] + mwl: [Latn, [EU], Mirandés] + mwv: [Latn, [AS], Behase Mentawei] + mww: [mww-latn] + mww-latn: [Latn, [AS], Hmoob Dawb] + my: [Mymr, [AS], မြန်မာဘာသာ] + myv: [Cyrl, [EU], эрзянь] + mzn: [Arab, [ME, AS], مازِرونی] + na: [Latn, [PA], Dorerin Naoero] + nah: [Latn, [AM], Nāhuatl] + nan: [Latn, [AS], Bân-lâm-gú] + nap: [Latn, [EU], Napulitano] + nb: [Latn, [EU], norsk (bokmål)] + nds-nl: [Latn, [EU], Nedersaksisch] + nds: [Latn, [EU], Plattdüütsch] + ne: [Deva, [AS], नेपाली] + new: [Deva, [AS], नेपाल भाषा] + ng: [Latn, [AF], Oshiwambo] + niu: [Latn, [PA], ko e vagahau Niuē] + njo: [Latn, [AS], Ao] + nl-informal: [Latn, [EU, AM], Nederlands (informeel)] + nl: [Latn, [EU, AM], Nederlands] + nn: [Latn, [EU], norsk (nynorsk)] + # There's also nb for Bokmål and nn for Nynorsk + "no": [Latn, [EU], norsk] + nov: [Latn, [WW], Novial] + nqo: [Nkoo, [AF], ߒߞߏ] + nrm: [Latn, [EU], Nouormand] + nso: [Latn, [AF], Sesotho sa Leboa] + nv: [Latn, [AM], Diné bizaad] + ny: [Latn, [AF], Chi-Chewa] + oc: [Latn, [EU], occitan] + olo: [Latn, [AS, EU], Livvin-Karjalan] + om: [Latn, [AF], Oromoo] + or: [Orya, [AS], ଓଡ଼ିଆ] + os: [Cyrl, [EU], Ирон] + # Bug: 60815 + ota: [Arab, [AS, EU], لسان عثمانى] + pa: [pa-guru] + pa-guru: [Guru, [AS], ਪੰਜਾਬੀ] + pag: [Latn, [AS], Pangasinan] + pam: [Latn, [AS], Kapampangan] + pap: [Latn, [AM], Papiamentu] + pbb: [Latn, [AM], Nasa Yuwe] + pcd: [Latn, [EU], Picard] + pdc: [Latn, [EU, AM], Deitsch] + pdt: [Latn, [EU, AM], Plautdietsch] + pfl: [Latn, [EU], Pälzisch] + pi: [Deva, [AS], पालि] + pih: [Latn, [PA], Norfuk / Pitkern] + pis: [Latn, [PA], Pijin] + pko: [Latn, [AF], Pökoot] + pl: [Latn, [EU], polski] + pms: [Latn, [EU], Piemontèis] + pnb: [Arab, [AS, ME], پنجابی] + pnt: [Grek, [EU], Ποντιακά] + ppl: [Latn, [AM], Nawat] + prg: [Latn, [EU], Prūsiskan] + prs: [Arab, [AS, ME], دری] + ps: [Arab, [AS, ME], پښتو] + pt-br: [Latn, [AM], português do Brasil] + pt: [Latn, [EU, AM, AS, PA, AF, WW], português] + qu: [Latn, [AM], Runa Simi] + qug: [Latn, [AM], Runa shimi] + rap: [Latn, [AM], arero rapa nui] + rcf: [Latn, [AF], Kreol Réyoné] + rgn: [Latn, [EU], Rumagnôl] + rif: [Latn, [AF], Tarifit] + rki: [Mymr, [AS], ရခိုင်] + rm: [Latn, [EU], rumantsch] + # Also known as Fíntika Rómma + rmf: [Latn, [EU], kaalengo tšimb] + rmy: [Latn, [EU], Romani] + rn: [Latn, [AF], Kirundi] + ro: [Latn, [EU], română] + roa-rup: [rup] + roa-tara: [Latn, [EU], tarandíne] + rtm: [Latn, [PA], Faeag Rotuma] + # world? + ru: [Cyrl, [EU, AS, ME], русский] + rue: [Cyrl, [EU], русиньскый] + rup: [Latn, [EU], armãneashti] + ruq: [Cyrl, [EU], Влахесте] + ruq-cyrl: [ruq] + # FIXME: broken autonym + ruq-grek: [Grek, [EU], Megleno-Romanian (Greek script)] + ruq-latn: [Latn, [EU], Vlăheşte] + rut: [Cyrl, [EU], мыхаӀбишды] + rw: [Latn, [AF], Kinyarwanda] + # Bug: 60815 + rwr: [Deva, [AS], मारवाड़ी] + ryu: [Kana, [AS], ʔucināguci] + sa: [Deva, [AS], संस्कृतम्] + sah: [Cyrl, [EU, AS], саха тыла] + # Currently Latn, potentially Olck + sat: [Latn, [AS], Santali] + saz: [Saur, [AS], ꢱꣃꢬꢵꢯ꣄ꢡ꣄ꢬꢵ] + sc: [Latn, [EU], sardu] + scn: [Latn, [EU], sicilianu] + sco: [Latn, [EU], Scots] + sd: [Arab, [AS], سنڌي] + sdc: [Latn, [EU], Sassaresu] + sdh: [Arab, [ME], کوردی خوارگ] + se: [Latn, [EU], sámegiella] + ses: [Latn, [AF], Koyraboro Senni] + sei: [Latn, [AM], Cmique Itom] + sg: [Latn, [AF], Sängö] + sgs: [Latn, [EU], žemaitėška] + sh: [Latn, [EU], srpskohrvatski] + shi-latn: [Latn, [AF], Tašlḥiyt] + shi-tfng: [Tfng, [AF], ⵜⴰⵛⵍⵃⵉⵜ] + shi: [shi-latn] + shn: [Mymr, [AS], လိၵ်ႈတႆး] + si: [Sinh, [AS], සිංහල] + simple: [Latn, [WW], Simple English] + sk: [Latn, [EU], slovenčina] + sl: [Latn, [EU], slovenščina] + sli: [Latn, [EU], Schläsch] + slr: [Latn, [AS], Salırça] + sly: [Latn, [AS], Bahasa Selayar] + syc: [Syrc, [ME], ܣܘܪܝܝܐ] + sm: [Latn, [PA], Gagana Samoa] + sma: [Latn, [EU], åarjelsaemien] + smj: [Latn, [EU], julevsámegiella] + smn: [Latn, [EU], anarâškielâ] + sms: [Latn, [EU], sää´mǩiõll] + sn: [Latn, [AF], chiShona] + so: [Latn, [AF], Soomaaliga] + son: [Latn, [AF], soŋay] + sq: [Latn, [EU], shqip] + sr: [sr-cyrl] + sr-ec: [sr-cyrl] + sr-cyrl: [Cyrl, [EU], српски] + sr-el: [sr-latn] + sr-latn: [Latn, [EU], srpski] + srn: [Latn, [AM, EU], Sranantongo] + ss: [Latn, [AF], SiSwati] + st: [Latn, [AF], Sesotho] + stq: [Latn, [EU], Seeltersk] + su: [Latn, [AS], Basa Sunda] + sv: [Latn, [EU], svenska] + sw: [Latn, [AF], Kiswahili] + swb: [Latn, [AF], Shikomoro] + sxu: [Latn, [EU], Säggssch] + szl: [Latn, [EU], ślůnski] + ta: [Taml, [AS], தமிழ்] + tcy: [Knda, [AS], ತುಳು] + te: [Telu, [AS], తెలుగు] + tet: [Latn, [AS, PA], tetun] + tg-cyrl: [Cyrl, [AS], тоҷикӣ] + tg-latn: [Latn, [AS], tojikī] + tg: [Cyrl, [AS], тоҷикӣ] + th: [Thai, [AS], ไทย] + ti: [Ethi, [AF], ትግርኛ] + tk: [Latn, [AS], Türkmençe] + tkr: [Cyrl, [AS], ЦӀаьхна миз] + tl: [Latn, [AS], Tagalog] + # A very complicated case. Names.php is Cyrl. In TWN they argue about Cyrl, Latn, and Arab. I can't find reliable external sources. --Amir + tly: [Cyrl, [EU, AS, ME], толышә зывон] + tn: [Latn, [AF], Setswana] + to: [Latn, [PA], lea faka-Tonga] + tokipona: [Latn, [WW], Toki Pona] + tpi: [Latn, [PA, AS], Tok Pisin] + tr: [Latn, [EU, ME], Türkçe] + trp: [Latn, [AS], Kokborok (Tripuri)] + tru: [Latn, [AS], Ṫuroyo] + ts: [Latn, [AF], Xitsonga] + tsd: [Grek, [EU], Τσακωνικά] + tt: [Cyrl, [EU], татарча] + tt-cyrl: [tt] + tt-latn: [Latn, [EU], tatarça] + ttt: [Cyrl, [AS], Tati] + tum: [Latn, [AF], chiTumbuka] + tw: [Latn, [AF], Twi] + twd: [Latn, [EU], Tweants] + ty: [Latn, [PA], reo tahiti] + tyv: [Cyrl, [AS], тыва дыл] + tzl: [Latn, [WW], Talossan] + tzm: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ] + udm: [Cyrl, [EU], удмурт] + # CLDR uses ug-arab, ug-latn and ug-cyrl. + # Arabic seems to have the largest number of users and is the main script in the Wikipedia. + ug: [ug-arab] + ug-arab: [Arab, [AS], ئۇيغۇرچە] + ug-latn: [Latn, [AS], uyghurche] + ug-cyrl: [Cyrl, [AS], уйғурчә] + uk: [Cyrl, [EU], українська] + ur: [Arab, [AS, ME], اردو] + uz: [Latn, [AS], oʻzbekcha] + ve: [Latn, [AF], Tshivenda] + vec: [Latn, [EU], vèneto] + vep: [Latn, [EU], vepsän kel’] + vi: [Latn, [AS], Tiếng Việt] + vls: [Latn, [EU], West-Vlams] + vmf: [Latn, [EU], Mainfränkisch] + vo: [Latn, [WW], Volapük] + vot: [Latn, [EU], Vaďďa] + vro: [Latn, [EU], Võro] + wa: [Latn, [EU], walon] + war: [Latn, [AS], Winaray] + wls: [Latn, [PA], "Faka'uvea"] + wo: [Latn, [AF], Wolof] + wuu: [Hans, [AS], 吴语] + xal: [Cyrl, [EU], хальмг] + xh: [Latn, [AF], isiXhosa] + xmf: [Geor, [EU], მარგალური] + ydd: [Hebr, [AS, EU], Eastern Yiddish] + yi: [Hebr, [ME, EU, AM], ייִדיש] + yo: [Latn, [AF], Yorùbá] + yrk: [Cyrl, [AS], Ненэцяʼ вада] + yrl: [Latn, [AM], "ñe'engatú"] + yua: [Latn, [AM], "Maaya T'aan"] + yue: [Hant, [AS], 粵語] + za: [Latn, [AS], Vahcuengh] + zea: [Latn, [EU], Zeêuws] + zgh: [Tfng, [AF], ⵜⴰⵎⴰⵣⵉⵖⵜ] + # world? (may apply to many varieties of Chinese) + zh: [Hans, [AS], 中文] + zh-classical: [Hant, [AS], 文言] + zh-cn: [Hans, [AS], 中文(中国大陆)] + zh-hans: [Hans, [AS], 中文(简体)] + zh-hant: [Hant, [AS], 中文(繁體)] + zh-hk: [Hant, [AS], 中文(香港)] + zh-min-nan: [nan] + zh-mo: [Hant, [AS], 中文(澳門)] + zh-my: [Hans, [AS], 中文(马来西亚)] + zh-sg: [Hans, [AS], 中文(新加坡)] + zh-tw: [Hant, [AS], 中文(台灣)] + zh-yue: [yue] + zu: [Latn, [AF], isiZulu] + + # All the supported scripts, grouped logically. + # + # The codes are taken from http://unicode.org/iso15924/iso15924-codes.html . + # + # The classification is roughly based on http://www.unicode.org/charts/ + # with some practical corrections. + # + # The order of the groups affects display. Pau Giner suggested the order; + # the rationale of the order is to distance the largest groups from + # one another to improve discoverability. + # + # The group name "Other" is reserved. +scriptgroups: + # It's hard to find a better place for Goth except the Latin group. + Latin: [Latn, Goth] + # Greek is probably different enough from Latin and Cyrillic, but user testing + # may prove otherwise. + Greek: [Grek] + WestCaucasian: [Armn, Geor] + Arabic: [Arab] + # Maybe MiddleEastern can be unified with Arabic. + # Maybe Thaana can be moved here from SouthAsian. + # Maybe it can be unified with African. + MiddleEastern: [Hebr, Syrc] + African: [Ethi, Nkoo, Tfng] + # India, Nepal, Bangladesh, Sri-Lanka, Bhutan, Maldives. + # + # Thaana (Thaa, the script of Maldives) is here, even though it's RTL, + # because it's closer geographically to India. Maybe it should be moved + # to MiddleEastern or to Arabic, if that would be easier to users. + # + # Tibetan (Tibt) is here, even though it's classified as "Central Asian" by + # Unicode, because linguistically and geographically it's closely related to + # the Brahmic family. + SouthAsian: [Beng, Deva, Gujr, Guru, Knda, Mlym, Orya, Saur, Sinh, Taml, Telu, Tibt, Thaa] + Cyrillic: [Cyrl] + CJK: [Hans, Hant, Kana, Kore, Jpan, Yiii] + SouthEastAsian: [Bali, Batk, Bugi, Java, Khmr, Laoo, Mymr, Thai] + Mongolian: [Mong] + SignWriting: [Sgnw] + NativeAmerican: [Cher, Cans] + # Undetermined script + Special: [Zyyy] + +rtlscripts: + [Arab, Hebr, Syrc, Nkoo, Thaa] + + # The numbers are also used in HTML id attributes +regiongroups: + # Worldwide, international + WW: 1 + # Special languages. To be shown near Worldwide when relevant. + SP: 1 + # America + AM: 2 + # Europe + EU: 3 + # Middle East + ME: 3 + # Africa + AF: 3 + # Asia + AS: 4 + # Pacific + PA: 4 diff --git a/data/spyc.php b/data/spyc.php new file mode 100644 index 0000000..47d5877 --- /dev/null +++ b/data/spyc.php @@ -0,0 +1,1046 @@ + + * @author Chris Wanstrath + * @link http://code.google.com/p/spyc/ + * @copyright Copyright 2005-2006 Chris Wanstrath, 2006-2011 Vlad Andersen + * @license http://www.opensource.org/licenses/mit-license.php MIT License + * @package Spyc + */ + +if (!function_exists('spyc_load')) { + /** + * Parses YAML to array. + * @param string $string YAML string. + * @return array + */ + function spyc_load ($string) { + return Spyc::YAMLLoadString($string); + } +} + +if (!function_exists('spyc_load_file')) { + /** + * Parses YAML to array. + * @param string $file Path to YAML file. + * @return array + */ + function spyc_load_file ($file) { + return Spyc::YAMLLoad($file); + } +} + +/** + * The Simple PHP YAML Class. + * + * This class can be used to read a YAML file and convert its contents + * into a PHP array. It currently supports a very limited subsection of + * the YAML spec. + * + * Usage: + * + * $Spyc = new Spyc; + * $array = $Spyc->load($file); + * + * or: + * + * $array = Spyc::YAMLLoad($file); + * + * or: + * + * $array = spyc_load_file($file); + * + * @package Spyc + */ +class Spyc { + + // SETTINGS + + const REMPTY = "\0\0\0\0\0"; + + /** + * Setting this to true will force YAMLDump to enclose any string value in + * quotes. False by default. + * + * @var bool + */ + public $setting_dump_force_quotes = false; + + /** + * Setting this to true will forse YAMLLoad to use syck_load function when + * possible. False by default. + * @var bool + */ + public $setting_use_syck_is_possible = false; + + + + /**#@+ + * @access private + * @var mixed + */ + private $_dumpIndent; + private $_dumpWordWrap; + private $_containsGroupAnchor = false; + private $_containsGroupAlias = false; + private $path; + private $result; + private $LiteralPlaceHolder = '___YAML_Literal_Block___'; + private $SavedGroups = array(); + private $indent; + /** + * Path modifier that should be applied after adding current element. + * @var array + */ + private $delayedPath = array(); + + /**#@+ + * @access public + * @var mixed + */ + public $_nodeId; + +/** + * Load a valid YAML string to Spyc. + * @param string $input + * @return array + */ + public function load ($input) { + return $this->__loadString($input); + } + + /** + * Load a valid YAML file to Spyc. + * @param string $file + * @return array + */ + public function loadFile ($file) { + return $this->__load($file); + } + + /** + * Load YAML into a PHP array statically + * + * The load method, when supplied with a YAML stream (string or file), + * will do its best to convert YAML in a file into a PHP array. Pretty + * simple. + * Usage: + * + * $array = Spyc::YAMLLoad('lucky.yaml'); + * print_r($array); + * + * @access public + * @return array + * @param string $input Path of YAML file or string containing YAML + */ + public static function YAMLLoad($input) { + $Spyc = new Spyc; + return $Spyc->__load($input); + } + + /** + * Load a string of YAML into a PHP array statically + * + * The load method, when supplied with a YAML string, will do its best + * to convert YAML in a string into a PHP array. Pretty simple. + * + * Note: use this function if you don't want files from the file system + * loaded and processed as YAML. This is of interest to people concerned + * about security whose input is from a string. + * + * Usage: + * + * $array = Spyc::YAMLLoadString("---\n0: hello world\n"); + * print_r($array); + * + * @access public + * @return array + * @param string $input String containing YAML + */ + public static function YAMLLoadString($input) { + $Spyc = new Spyc; + return $Spyc->__loadString($input); + } + + /** + * Dump YAML from PHP array statically + * + * The dump method, when supplied with an array, will do its best + * to convert the array into friendly YAML. Pretty simple. Feel free to + * save the returned string as nothing.yaml and pass it around. + * + * Oh, and you can decide how big the indent is and what the wordwrap + * for folding is. Pretty cool -- just pass in 'false' for either if + * you want to use the default. + * + * Indent's default is 2 spaces, wordwrap's default is 40 characters. And + * you can turn off wordwrap by passing in 0. + * + * @access public + * @return string + * @param array $array PHP array + * @param int $indent Pass in false to use the default, which is 2 + * @param int $wordwrap Pass in 0 for no wordwrap, false for default (40) + */ + public static function YAMLDump($array,$indent = false,$wordwrap = false) { + $spyc = new Spyc; + return $spyc->dump($array,$indent,$wordwrap); + } + + + /** + * Dump PHP array to YAML + * + * The dump method, when supplied with an array, will do its best + * to convert the array into friendly YAML. Pretty simple. Feel free to + * save the returned string as tasteful.yaml and pass it around. + * + * Oh, and you can decide how big the indent is and what the wordwrap + * for folding is. Pretty cool -- just pass in 'false' for either if + * you want to use the default. + * + * Indent's default is 2 spaces, wordwrap's default is 40 characters. And + * you can turn off wordwrap by passing in 0. + * + * @access public + * @return string + * @param array $array PHP array + * @param int $indent Pass in false to use the default, which is 2 + * @param int $wordwrap Pass in 0 for no wordwrap, false for default (40) + */ + public function dump($array,$indent = false,$wordwrap = false) { + // Dumps to some very clean YAML. We'll have to add some more features + // and options soon. And better support for folding. + + // New features and options. + if ($indent === false or !is_numeric($indent)) { + $this->_dumpIndent = 2; + } else { + $this->_dumpIndent = $indent; + } + + if ($wordwrap === false or !is_numeric($wordwrap)) { + $this->_dumpWordWrap = 40; + } else { + $this->_dumpWordWrap = $wordwrap; + } + + // New YAML document + $string = "---\n"; + + // Start at the base of the array and move through it. + if ($array) { + $array = (array)$array; + $previous_key = -1; + foreach ($array as $key => $value) { + if (!isset($first_key)) $first_key = $key; + $string .= $this->_yamlize($key,$value,0,$previous_key, $first_key, $array); + $previous_key = $key; + } + } + return $string; + } + + /** + * Attempts to convert a key / value array item to YAML + * @access private + * @return string + * @param $key The name of the key + * @param $value The value of the item + * @param $indent The indent of the current node + */ + private function _yamlize($key,$value,$indent, $previous_key = -1, $first_key = 0, $source_array = null) { + if (is_array($value)) { + if (empty ($value)) + return $this->_dumpNode($key, array(), $indent, $previous_key, $first_key, $source_array); + // It has children. What to do? + // Make it the right kind of item + $string = $this->_dumpNode($key, self::REMPTY, $indent, $previous_key, $first_key, $source_array); + // Add the indent + $indent += $this->_dumpIndent; + // Yamlize the array + $string .= $this->_yamlizeArray($value,$indent); + } elseif (!is_array($value)) { + // It doesn't have children. Yip. + $string = $this->_dumpNode($key, $value, $indent, $previous_key, $first_key, $source_array); + } + return $string; + } + + /** + * Attempts to convert an array to YAML + * @access private + * @return string + * @param $array The array you want to convert + * @param $indent The indent of the current level + */ + private function _yamlizeArray($array,$indent) { + if (is_array($array)) { + $string = ''; + $previous_key = -1; + foreach ($array as $key => $value) { + if (!isset($first_key)) $first_key = $key; + $string .= $this->_yamlize($key, $value, $indent, $previous_key, $first_key, $array); + $previous_key = $key; + } + return $string; + } else { + return false; + } + } + + /** + * Returns YAML from a key and a value + * @access private + * @return string + * @param $key The name of the key + * @param $value The value of the item + * @param $indent The indent of the current node + */ + private function _dumpNode($key, $value, $indent, $previous_key = -1, $first_key = 0, $source_array = null) { + // do some folding here, for blocks + if (is_string ($value) && ((strpos($value,"\n") !== false || strpos($value,": ") !== false || strpos($value,"- ") !== false || + strpos($value,"*") !== false || strpos($value,"#") !== false || strpos($value,"<") !== false || strpos($value,">") !== false || strpos ($value, ' ') !== false || + strpos($value,"[") !== false || strpos($value,"]") !== false || strpos($value,"{") !== false || strpos($value,"}") !== false) || strpos($value,"&") !== false || strpos($value, "'") !== false || strpos($value, "!") === 0 || + substr ($value, -1, 1) == ':') + ) { + $value = $this->_doLiteralBlock($value,$indent); + } else { + $value = $this->_doFolding($value,$indent); + } + + if ($value === array()) $value = '[ ]'; + if (in_array ($value, array ('true', 'TRUE', 'false', 'FALSE', 'y', 'Y', 'n', 'N', 'null', 'NULL'), true)) { + $value = $this->_doLiteralBlock($value,$indent); + } + if (trim ($value) != $value) + $value = $this->_doLiteralBlock($value,$indent); + + if (is_bool($value)) { + $value = ($value) ? "true" : "false"; + } + + if ($value === null) $value = 'null'; + if ($value === "'" . self::REMPTY . "'") $value = null; + + $spaces = str_repeat(' ',$indent); + + //if (is_int($key) && $key - 1 == $previous_key && $first_key===0) { + if (is_array ($source_array) && array_keys($source_array) === range(0, count($source_array) - 1)) { + // It's a sequence + $string = $spaces.'- '.$value."\n"; + } else { + // if ($first_key===0) throw new Exception('Keys are all screwy. The first one was zero, now it\'s "'. $key .'"'); + // It's mapped + if (strpos($key, ":") !== false || strpos($key, "#") !== false) { $key = '"' . $key . '"'; } + $string = rtrim ($spaces.$key.': '.$value)."\n"; + } + return $string; + } + + /** + * Creates a literal block for dumping + * @access private + * @return string + * @param $value + * @param $indent int The value of the indent + */ + private function _doLiteralBlock($value,$indent) { + if ($value === "\n") return '\n'; + if (strpos($value, "\n") === false && strpos($value, "'") === false) { + return sprintf ("'%s'", $value); + } + if (strpos($value, "\n") === false && strpos($value, '"') === false) { + return sprintf ('"%s"', $value); + } + $exploded = explode("\n",$value); + $newValue = '|'; + $indent += $this->_dumpIndent; + $spaces = str_repeat(' ',$indent); + foreach ($exploded as $line) { + $newValue .= "\n" . $spaces . ($line); + } + return $newValue; + } + + /** + * Folds a string of text, if necessary + * @access private + * @return string + * @param $value The string you wish to fold + */ + private function _doFolding($value,$indent) { + // Don't do anything if wordwrap is set to 0 + + if ($this->_dumpWordWrap !== 0 && is_string ($value) && strlen($value) > $this->_dumpWordWrap) { + $indent += $this->_dumpIndent; + $indent = str_repeat(' ',$indent); + $wrapped = wordwrap($value,$this->_dumpWordWrap,"\n$indent"); + $value = ">\n".$indent.$wrapped; + } else { + if ($this->setting_dump_force_quotes && is_string ($value) && $value !== self::REMPTY) + $value = '"' . $value . '"'; + } + + + return $value; + } + +// LOADING FUNCTIONS + + private function __load($input) { + $Source = $this->loadFromSource($input); + return $this->loadWithSource($Source); + } + + private function __loadString($input) { + $Source = $this->loadFromString($input); + return $this->loadWithSource($Source); + } + + private function loadWithSource($Source) { + if (empty ($Source)) return array(); + if ($this->setting_use_syck_is_possible && function_exists ('syck_load')) { + $array = syck_load (implode ('', $Source)); + return is_array($array) ? $array : array(); + } + + $this->path = array(); + $this->result = array(); + + $cnt = count($Source); + for ($i = 0; $i < $cnt; $i++) { + $line = $Source[$i]; + + $this->indent = strlen($line) - strlen(ltrim($line)); + $tempPath = $this->getParentPathByIndent($this->indent); + $line = self::stripIndent($line, $this->indent); + if (self::isComment($line)) continue; + if (self::isEmpty($line)) continue; + $this->path = $tempPath; + + $literalBlockStyle = self::startsLiteralBlock($line); + if ($literalBlockStyle) { + $line = rtrim ($line, $literalBlockStyle . " \n"); + $literalBlock = ''; + $line .= $this->LiteralPlaceHolder; + $literal_block_indent = strlen($Source[$i+1]) - strlen(ltrim($Source[$i+1])); + while (++$i < $cnt && $this->literalBlockContinues($Source[$i], $this->indent)) { + $literalBlock = $this->addLiteralLine($literalBlock, $Source[$i], $literalBlockStyle, $literal_block_indent); + } + $i--; + } + + while (++$i < $cnt && self::greedilyNeedNextLine($line)) { + $line = rtrim ($line, " \n\t\r") . ' ' . ltrim ($Source[$i], " \t"); + } + $i--; + + + + if (strpos ($line, '#')) { + if (strpos ($line, '"') === false && strpos ($line, "'") === false) + $line = preg_replace('/\s+#(.+)$/','',$line); + } + + $lineArray = $this->_parseLine($line); + + if ($literalBlockStyle) + $lineArray = $this->revertLiteralPlaceHolder ($lineArray, $literalBlock); + + $this->addArray($lineArray, $this->indent); + + foreach ($this->delayedPath as $indent => $delayedPath) + $this->path[$indent] = $delayedPath; + + $this->delayedPath = array(); + + } + return $this->result; + } + + private function loadFromSource ($input) { + if (!empty($input) && strpos($input, "\n") === false && file_exists($input)) + return file($input); + + return $this->loadFromString($input); + } + + private function loadFromString ($input) { + $lines = explode("\n",$input); + foreach ($lines as $k => $_) { + $lines[$k] = rtrim ($_, "\r"); + } + return $lines; + } + + /** + * Parses YAML code and returns an array for a node + * @access private + * @return array + * @param string $line A line from the YAML file + */ + private function _parseLine($line) { + if (!$line) return array(); + $line = trim($line); + if (!$line) return array(); + + $array = array(); + + $group = $this->nodeContainsGroup($line); + if ($group) { + $this->addGroup($line, $group); + $line = $this->stripGroup ($line, $group); + } + + if ($this->startsMappedSequence($line)) + return $this->returnMappedSequence($line); + + if ($this->startsMappedValue($line)) + return $this->returnMappedValue($line); + + if ($this->isArrayElement($line)) + return $this->returnArrayElement($line); + + if ($this->isPlainArray($line)) + return $this->returnPlainArray($line); + + + return $this->returnKeyValuePair($line); + + } + + /** + * Finds the type of the passed value, returns the value as the new type. + * @access private + * @param string $value + * @return mixed + */ + private function _toType($value) { + if ($value === '') return null; + $first_character = $value[0]; + $last_character = substr($value, -1, 1); + + $is_quoted = false; + do { + if (!$value) break; + if ($first_character != '"' && $first_character != "'") break; + if ($last_character != '"' && $last_character != "'") break; + $is_quoted = true; + } while (0); + + if ($is_quoted) + return strtr(substr ($value, 1, -1), array ('\\"' => '"', '\'\'' => '\'', '\\\'' => '\'')); + + if (strpos($value, ' #') !== false && !$is_quoted) + $value = preg_replace('/\s+#(.+)$/','',$value); + + if (!$is_quoted) $value = str_replace('\n', "\n", $value); + + if ($first_character == '[' && $last_character == ']') { + // Take out strings sequences and mappings + $innerValue = trim(substr ($value, 1, -1)); + if ($innerValue === '') return array(); + $explode = $this->_inlineEscape($innerValue); + // Propagate value array + $value = array(); + foreach ($explode as $v) { + $value[] = $this->_toType($v); + } + return $value; + } + + if (strpos($value,': ')!==false && $first_character != '{') { + $array = explode(': ',$value); + $key = trim($array[0]); + array_shift($array); + $value = trim(implode(': ',$array)); + $value = $this->_toType($value); + return array($key => $value); + } + + if ($first_character == '{' && $last_character == '}') { + $innerValue = trim(substr ($value, 1, -1)); + if ($innerValue === '') return array(); + // Inline Mapping + // Take out strings sequences and mappings + $explode = $this->_inlineEscape($innerValue); + // Propagate value array + $array = array(); + foreach ($explode as $v) { + $SubArr = $this->_toType($v); + if (empty($SubArr)) continue; + if (is_array ($SubArr)) { + $array[key($SubArr)] = $SubArr[key($SubArr)]; continue; + } + $array[] = $SubArr; + } + return $array; + } + + if ($value == 'null' || $value == 'NULL' || $value == 'Null' || $value == '' || $value == '~') { + return null; + } + + if ( is_numeric($value) && preg_match ('/^(-|)[1-9]+[0-9]*$/', $value) ){ + $intvalue = (int)$value; + if ($intvalue != PHP_INT_MAX) + $value = $intvalue; + return $value; + } + + if (in_array($value, + array('true', 'on', '+', 'yes', 'y', 'True', 'TRUE', 'On', 'ON', 'YES', 'Yes', 'Y'))) { + return true; + } + + if (in_array(strtolower($value), + array('false', 'off', '-', 'no', 'n'))) { + return false; + } + + if (is_numeric($value)) { + if ($value === '0') return 0; + if (rtrim ($value, 0) === $value) + $value = (float)$value; + return $value; + } + + return $value; + } + + /** + * Used in inlines to check for more inlines or quoted strings + * @access private + * @return array + */ + private function _inlineEscape($inline) { + // There's gotta be a cleaner way to do this... + // While pure sequences seem to be nesting just fine, + // pure mappings and mappings with sequences inside can't go very + // deep. This needs to be fixed. + + $seqs = array(); + $maps = array(); + $saved_strings = array(); + + // Check for strings + $regex = '/(?:(")|(?:\'))((?(1)[^"]+|[^\']+))(?(1)"|\')/'; + if (preg_match_all($regex,$inline,$strings)) { + $saved_strings = $strings[0]; + $inline = preg_replace($regex,'YAMLString',$inline); + } + unset($regex); + + $i = 0; + do { + + // Check for sequences + while (preg_match('/\[([^{}\[\]]+)\]/U',$inline,$matchseqs)) { + $seqs[] = $matchseqs[0]; + $inline = preg_replace('/\[([^{}\[\]]+)\]/U', ('YAMLSeq' . (count($seqs) - 1) . 's'), $inline, 1); + } + + // Check for mappings + while (preg_match('/{([^\[\]{}]+)}/U',$inline,$matchmaps)) { + $maps[] = $matchmaps[0]; + $inline = preg_replace('/{([^\[\]{}]+)}/U', ('YAMLMap' . (count($maps) - 1) . 's'), $inline, 1); + } + + if ($i++ >= 10) break; + + } while (strpos ($inline, '[') !== false || strpos ($inline, '{') !== false); + + $explode = explode(', ',$inline); + $stringi = 0; $i = 0; + + while (1) { + + // Re-add the sequences + if (!empty($seqs)) { + foreach ($explode as $key => $value) { + if (strpos($value,'YAMLSeq') !== false) { + foreach ($seqs as $seqk => $seq) { + $explode[$key] = str_replace(('YAMLSeq'.$seqk.'s'),$seq,$value); + $value = $explode[$key]; + } + } + } + } + + // Re-add the mappings + if (!empty($maps)) { + foreach ($explode as $key => $value) { + if (strpos($value,'YAMLMap') !== false) { + foreach ($maps as $mapk => $map) { + $explode[$key] = str_replace(('YAMLMap'.$mapk.'s'), $map, $value); + $value = $explode[$key]; + } + } + } + } + + + // Re-add the strings + if (!empty($saved_strings)) { + foreach ($explode as $key => $value) { + while (strpos($value,'YAMLString') !== false) { + $explode[$key] = preg_replace('/YAMLString/',$saved_strings[$stringi],$value, 1); + unset($saved_strings[$stringi]); + ++$stringi; + $value = $explode[$key]; + } + } + } + + $finished = true; + foreach ($explode as $key => $value) { + if (strpos($value,'YAMLSeq') !== false) { + $finished = false; break; + } + if (strpos($value,'YAMLMap') !== false) { + $finished = false; break; + } + if (strpos($value,'YAMLString') !== false) { + $finished = false; break; + } + } + if ($finished) break; + + $i++; + if ($i > 10) + break; // Prevent infinite loops. + } + + return $explode; + } + + private function literalBlockContinues ($line, $lineIndent) { + if (!trim($line)) return true; + if (strlen($line) - strlen(ltrim($line)) > $lineIndent) return true; + return false; + } + + private function referenceContentsByAlias ($alias) { + do { + if (!isset($this->SavedGroups[$alias])) { echo "Bad group name: $alias."; break; } + $groupPath = $this->SavedGroups[$alias]; + $value = $this->result; + foreach ($groupPath as $k) { + $value = $value[$k]; + } + } while (false); + return $value; + } + + private function addArrayInline ($array, $indent) { + $CommonGroupPath = $this->path; + if (empty ($array)) return false; + + foreach ($array as $k => $_) { + $this->addArray(array($k => $_), $indent); + $this->path = $CommonGroupPath; + } + return true; + } + + private function addArray ($incoming_data, $incoming_indent) { + + // print_r ($incoming_data); + + if (count ($incoming_data) > 1) + return $this->addArrayInline ($incoming_data, $incoming_indent); + + $key = key ($incoming_data); + $value = isset($incoming_data[$key]) ? $incoming_data[$key] : null; + if ($key === '__!YAMLZero') $key = '0'; + + if ($incoming_indent == 0 && !$this->_containsGroupAlias && !$this->_containsGroupAnchor) { // Shortcut for root-level values. + if ($key || $key === '' || $key === '0') { + $this->result[$key] = $value; + } else { + $this->result[] = $value; end ($this->result); $key = key ($this->result); + } + $this->path[$incoming_indent] = $key; + return; + } + + + + $history = array(); + // Unfolding inner array tree. + $history[] = $_arr = $this->result; + foreach ($this->path as $k) { + $history[] = $_arr = $_arr[$k]; + } + + if ($this->_containsGroupAlias) { + $value = $this->referenceContentsByAlias($this->_containsGroupAlias); + $this->_containsGroupAlias = false; + } + + + // Adding string or numeric key to the innermost level or $this->arr. + if (is_string($key) && $key == '<<') { + if (!is_array ($_arr)) { $_arr = array (); } + + $_arr = array_merge ($_arr, $value); + } elseif ($key || $key === '' || $key === '0') { + if (!is_array ($_arr)) + $_arr = array ($key=>$value); + else + $_arr[$key] = $value; + } else { + if (!is_array ($_arr)) { $_arr = array ($value); $key = 0; } + else { $_arr[] = $value; end ($_arr); $key = key ($_arr); } + } + + $reverse_path = array_reverse($this->path); + $reverse_history = array_reverse ($history); + $reverse_history[0] = $_arr; + $cnt = count($reverse_history) - 1; + for ($i = 0; $i < $cnt; $i++) { + $reverse_history[$i+1][$reverse_path[$i]] = $reverse_history[$i]; + } + $this->result = $reverse_history[$cnt]; + + $this->path[$incoming_indent] = $key; + + if ($this->_containsGroupAnchor) { + $this->SavedGroups[$this->_containsGroupAnchor] = $this->path; + if (is_array ($value)) { + $k = key ($value); + if (!is_int ($k)) { + $this->SavedGroups[$this->_containsGroupAnchor][$incoming_indent + 2] = $k; + } + } + $this->_containsGroupAnchor = false; + } + + } + + private static function startsLiteralBlock ($line) { + $lastChar = substr (trim($line), -1); + if ($lastChar != '>' && $lastChar != '|') return false; + if ($lastChar == '|') return $lastChar; + // HTML tags should not be counted as literal blocks. + if (preg_match ('#<.*?>$#', $line)) return false; + return $lastChar; + } + + private static function greedilyNeedNextLine($line) { + $line = trim ($line); + if (!strlen($line)) return false; + if (substr ($line, -1, 1) == ']') return false; + if ($line[0] == '[') return true; + if (preg_match ('#^[^:]+?:\s*\[#', $line)) return true; + return false; + } + + private function addLiteralLine ($literalBlock, $line, $literalBlockStyle, $indent = -1) { + $line = self::stripIndent($line, $indent); + if ($literalBlockStyle !== '|') { + $line = self::stripIndent($line); + } + $line = rtrim ($line, "\r\n\t ") . "\n"; + if ($literalBlockStyle == '|') { + return $literalBlock . $line; + } + if (strlen($line) == 0) + return rtrim($literalBlock, ' ') . "\n"; + if ($line == "\n" && $literalBlockStyle == '>') { + return rtrim ($literalBlock, " \t") . "\n"; + } + if ($line != "\n") + $line = trim ($line, "\r\n ") . " "; + return $literalBlock . $line; + } + + function revertLiteralPlaceHolder ($lineArray, $literalBlock) { + foreach ($lineArray as $k => $_) { + if (is_array($_)) + $lineArray[$k] = $this->revertLiteralPlaceHolder ($_, $literalBlock); + elseif (substr($_, -1 * strlen ($this->LiteralPlaceHolder)) == $this->LiteralPlaceHolder) + $lineArray[$k] = rtrim ($literalBlock, " \r\n"); + } + return $lineArray; + } + + private static function stripIndent ($line, $indent = -1) { + if ($indent == -1) $indent = strlen($line) - strlen(ltrim($line)); + return substr ($line, $indent); + } + + private function getParentPathByIndent ($indent) { + if ($indent == 0) return array(); + $linePath = $this->path; + do { + end($linePath); $lastIndentInParentPath = key($linePath); + if ($indent <= $lastIndentInParentPath) array_pop ($linePath); + } while ($indent <= $lastIndentInParentPath); + return $linePath; + } + + + private function clearBiggerPathValues ($indent) { + + + if ($indent == 0) $this->path = array(); + if (empty ($this->path)) return true; + + foreach ($this->path as $k => $_) { + if ($k > $indent) unset ($this->path[$k]); + } + + return true; + } + + + private static function isComment ($line) { + if (!$line) return false; + if ($line[0] == '#') return true; + if (trim($line, " \r\n\t") == '---') return true; + return false; + } + + private static function isEmpty ($line) { + return (trim ($line) === ''); + } + + + private function isArrayElement ($line) { + if (!$line) return false; + if ($line[0] != '-') return false; + if (strlen ($line) > 3) + if (substr($line,0,3) == '---') return false; + + return true; + } + + private function isHashElement ($line) { + return strpos($line, ':'); + } + + private function isLiteral ($line) { + if ($this->isArrayElement($line)) return false; + if ($this->isHashElement($line)) return false; + return true; + } + + + private static function unquote ($value) { + if (!$value) return $value; + if (!is_string($value)) return $value; + if ($value[0] == '\'') return trim ($value, '\''); + if ($value[0] == '"') return trim ($value, '"'); + return $value; + } + + private function startsMappedSequence ($line) { + return ($line[0] == '-' && substr ($line, -1, 1) == ':'); + } + + private function returnMappedSequence ($line) { + $array = array(); + $key = self::unquote(trim(substr($line,1,-1))); + $array[$key] = array(); + $this->delayedPath = array(strpos ($line, $key) + $this->indent => $key); + return array($array); + } + + private function returnMappedValue ($line) { + $array = array(); + $key = self::unquote (trim(substr($line,0,-1))); + $array[$key] = ''; + return $array; + } + + private function startsMappedValue ($line) { + return (substr ($line, -1, 1) == ':'); + } + + private function isPlainArray ($line) { + return ($line[0] == '[' && substr ($line, -1, 1) == ']'); + } + + private function returnPlainArray ($line) { + return $this->_toType($line); + } + + private function returnKeyValuePair ($line) { + $array = array(); + $key = ''; + if (strpos ($line, ':')) { + // It's a key/value pair most likely + // If the key is in double quotes pull it out + if (($line[0] == '"' || $line[0] == "'") && preg_match('/^(["\'](.*)["\'](\s)*:)/',$line,$matches)) { + $value = trim(str_replace($matches[1],'',$line)); + $key = $matches[2]; + } else { + // Do some guesswork as to the key and the value + $explode = explode(':',$line); + $key = trim($explode[0]); + array_shift($explode); + $value = trim(implode(':',$explode)); + } + // Set the type of the value. Int, string, etc + $value = $this->_toType($value); + if ($key === '0') $key = '__!YAMLZero'; + $array[$key] = $value; + } else { + $array = array ($line); + } + return $array; + + } + + + private function returnArrayElement ($line) { + if (strlen($line) <= 1) return array(array()); // Weird %) + $array = array(); + $value = trim(substr($line,1)); + $value = $this->_toType($value); + $array[] = $value; + return $array; + } + + + private function nodeContainsGroup ($line) { + $symbolsForReference = 'A-z0-9_\-'; + if (strpos($line, '&') === false && strpos($line, '*') === false) return false; // Please die fast ;-) + if ($line[0] == '&' && preg_match('/^(&['.$symbolsForReference.']+)/', $line, $matches)) return $matches[1]; + if ($line[0] == '*' && preg_match('/^(\*['.$symbolsForReference.']+)/', $line, $matches)) return $matches[1]; + if (preg_match('/(&['.$symbolsForReference.']+)$/', $line, $matches)) return $matches[1]; + if (preg_match('/(\*['.$symbolsForReference.']+$)/', $line, $matches)) return $matches[1]; + if (preg_match ('#^\s*<<\s*:\s*(\*[^\s]+).*$#', $line, $matches)) return $matches[1]; + return false; + + } + + private function addGroup ($line, $group) { + if ($group[0] == '&') $this->_containsGroupAnchor = substr ($group, 1); + if ($group[0] == '*') $this->_containsGroupAlias = substr ($group, 1); + //print_r ($this->path); + } + + private function stripGroup ($line, $group) { + $line = trim(str_replace($group, '', $line)); + return $line; + } +} + +// Enable use of Spyc from command line +// The syntax is the following: php spyc.php spyc.yaml + +define ('SPYC_FROM_COMMAND_LINE', false); + +do { + if (!SPYC_FROM_COMMAND_LINE) break; + if (empty ($_SERVER['argc']) || $_SERVER['argc'] < 2) break; + if (empty ($_SERVER['PHP_SELF']) || $_SERVER['PHP_SELF'] != 'spyc.php') break; + $file = $argv[1]; + printf ("Spyc loading file: %s\n", $file); + print_r (spyc_load_file ($file)); +} while (0); \ No newline at end of file diff --git a/data/ulsdata2json.php b/data/ulsdata2json.php new file mode 100644 index 0000000..576d586 --- /dev/null +++ b/data/ulsdata2json.php @@ -0,0 +1,118 @@ +territoryInfo->territory as $territoryRecord ) { + $territoryAtributes = $territoryRecord->attributes(); + $territoryCodeAttr = $territoryAtributes['type']; + $territoryCode = (string) $territoryCodeAttr[0]; + $parsedLangdb['territories'][$territoryCode] = array(); + + foreach ( $territoryRecord->languagePopulation as $languageRecord ) { + $languageAttributes = $languageRecord->attributes(); + $languageCodeAttr = $languageAttributes['type']; + // Lower case is a convention for language codes in ULS. + // '_' is used in CLDR for compound codes and it's replaced with '-' here. + + $normalisedCode = strtr( strtolower( (string) $languageCodeAttr[0] ), '_', '-' ); + + $parsedLangdb['territories'][$territoryCode][] = $normalisedCode; + + // In case of codes with variants, also add the base because ULS might consider + // them as separate languages, e.g. zh, zh-hant and zh-hans. + if ( strpos( $normalisedCode, '-' ) !== false ) { + $parts = explode( '-', $normalisedCode ); + $parsedLangdb['territories'][$territoryCode][] = $parts[0]; + } + } +} + +foreach ( $parsedLangdb['territories'] as $territoryCode => $languages ) { + foreach ( $languages as $index => $language ) { + if ( !isset( $parsedLangdb['languages'][$language] ) ) { + echo "Unknown language $language for territory $territoryCode\n"; + unset( $parsedLangdb['territories'][$territoryCode][$index] ); + continue; + } + + $data = $parsedLangdb['languages'][$language]; + if ( count( $data ) === 1 ) { + echo "Redirect for language $language to {$data[0]} territory $territoryCode\n"; + $parsedLangdb['territories'][$territoryCode][$index] = $data[0]; + continue; + } + } + + // Clean-up to save space + if ( count( $parsedLangdb['territories'][$territoryCode] ) === 0 ) { + unset( $parsedLangdb['territories'][$territoryCode] ); + continue; + } + + // Remove duplicates we might have created + $parsedLangdb['territories'][$territoryCode] = + array_unique( $parsedLangdb['territories'][$territoryCode] ); + + + // We need to renumber or json conversion thinks these are objects + $parsedLangdb['territories'][$territoryCode] = + array_values( $parsedLangdb['territories'][$territoryCode] ); +} + +print "Writing JSON langdb...\n"; +$jsonVerbose = json_encode( $parsedLangdb, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ); +// For making diff review easier. +file_put_contents( '../language-data.json', $jsonVerbose ); + +print "Done.\n"; diff --git a/index.js b/index.js new file mode 100644 index 0000000..cd5b9b1 --- /dev/null +++ b/index.js @@ -0,0 +1,261 @@ +var languageData = require( './language-data.json' ); + +/** + * Utility functions for querying language data. + */ + +/** + * Is this language a redirect to another language? + * @param {string} language Language code + * @return {string} Target language code if it's a redirect or false if it's not + */ +function isRedirect( language ) { + return ( languageData.languages[ language ] !== undefined && languageData.languages[ language ].length === 1 ) ? languageData.languages[ language ][ 0 ] : false; +} + +/** + * Returns the script of the language. + * @param {string} language Language code + * @return {string} + */ +function getScript( language ) { + var target = isRedirect( language ); + if ( target ) { + return getScript( target ); + } + if ( !languageData.languages[ language ] ) { + // Undetermined + return 'Zyyy'; + } + return languageData.languages[ language ][ 0 ]; +} + +/** + * Returns the regions in which a language is spoken. + * @param {string} language Language code + * @return {string[]} 'UNKNOWN' + */ +function getRegions( language ) { + var target = isRedirect( language ); + if ( target ) { + return getRegions( target ); + } + return ( languageData.languages[ language ] && languageData.languages[ language ][ 1 ] ) || 'UNKNOWN'; +} + +/** + * Returns the autonym of the language. + * @param {string} language Language code + * @return {string} + */ +function getAutonym( language ) { + var target = isRedirect( language ); + if ( target ) { + return getAutonym( target ); + } + return ( languageData.languages[ language ] && languageData.languages[ language ][ 2 ] ) || language; +} + +/** + * Returns all language codes and corresponding autonyms + * @return {array} + */ +function getAutonyms() { + var language, + autonymsByCode = {}; + for ( language in languageData.languages ) { + if ( isRedirect( language ) ) { + continue; + } + autonymsByCode[ language ] = getAutonym( language ); + } + return autonymsByCode; +} + +/** + * Returns all languages written in the given scripts. + * @param {string[]} scripts + * @return {string[]} languages codes + */ +function getLanguagesInScripts( scripts ) { + var language, i, + languagesInScripts = []; + for ( language in languageData.languages ) { + if ( isRedirect( language ) ) { + continue; + } + for ( i = 0; i < scripts.length; i++ ) { + if ( scripts[ i ] === getScript( language ) ) { + languagesInScripts.push( language ); + break; + } + } + } + return languagesInScripts; +} + +/** + * Returns all languages written in script. + * @param {string} script + * @return {string[]} array of strings (languages codes) + */ +function getLanguagesInScript( script ) { + return getLanguagesInScripts( [ script ] ); +} + +/** + * Returns the script group of a script or 'Other' if it doesn't + * belong to any group. + * @param {string} script Script code + * @return {string} script group name + */ +function getGroupOfScript( script ) { + var scriptGroup; + for ( scriptGroup in languageData.scriptgroups ) { + if ( languageData.scriptgroups[ scriptGroup ].includes( script ) ) { + return scriptGroup; + } + } + return 'Other'; +} + +/** + * Returns the script group of a language. + * @param {string} language Language code + * @return {string} script group name + */ +function getScriptGroupOfLanguage( language ) { + return getGroupOfScript( getScript( language ) ); +} + +/** + * Get the given list of languages grouped by script. + * @param {string[]} languages Array of language codes + * @return {Object} Array of languages indexed by script codes + */ +function getLanguagesByScriptGroup( languages ) { + var languagesByScriptGroup = {}, + language, resolvedRedirect, langScriptGroup; + for ( language in languages ) { + resolvedRedirect = isRedirect( language ) || language; + langScriptGroup = getScriptGroupOfLanguage( resolvedRedirect ); + if ( !languagesByScriptGroup[ langScriptGroup ] ) { + languagesByScriptGroup[ langScriptGroup ] = []; + } + languagesByScriptGroup[ langScriptGroup ].push( language ); + } + return languagesByScriptGroup; +} + +/** + * Returns an associative array of languages in several regions, + * grouped by script group. + * @param {string[]} regions array of region codes + * @return {Object} + */ +function getLanguagesByScriptGroupInRegions( regions ) { + var language, i, scriptGroup, + languagesByScriptGroupInRegions = {}; + for ( language in languageData.languages ) { + if ( isRedirect( language ) ) { + continue; + } + for ( i = 0; i < regions.length; i++ ) { + if ( getRegions( language ).includes( regions[ i ] ) ) { + scriptGroup = getScriptGroupOfLanguage( language ); + if ( languagesByScriptGroupInRegions[ scriptGroup ] === undefined ) { + languagesByScriptGroupInRegions[ scriptGroup ] = []; + } + languagesByScriptGroupInRegions[ scriptGroup ].push( language ); + break; + } + } + } + return languagesByScriptGroupInRegions; +} + +/** + * Returns an associative array of languages in a region, + * grouped by script group. + * @param {string} region Region code + * @return {Object} + */ +function getLanguagesByScriptGroupInRegion( region ) { + return getLanguagesByScriptGroupInRegions( [ region ] ); +} + +/** + * A callback for sorting languages by autonym. + * Can be used as an argument to a sort function. + * @param {string} a Language code + * @param {string} b Language code + * @return {number} + */ +function sortByAutonym( a, b ) { + var autonymA = getAutonym( a ) || a, + autonymB = getAutonym( b ) || b; + return ( autonymA.toLowerCase() < autonymB.toLowerCase() ) ? -1 : 1; +} + +/** + * Check if a language is right-to-left. + * @param {string} language Language code + * @return {boolean} + */ +function isRtl( language ) { + return languageData.rtlscripts.includes( getScript( language ) ); +} + +/** + * Return the direction of the language + * @param {string} language Language code + * @return {string} + */ +function getDir( language ) { + return isRtl( language ) ? 'rtl' : 'ltr'; +} + +/** + * Returns the languages spoken in a territory. + * @param {string} territory Territory code + * @return {string[]} list of language codes + */ +function getLanguagesInTerritory( territory ) { + return languageData.territories[ territory ]; +} + +/** + * Adds a language in run time and sets its options as provided. + * If the target option is provided, the language is defined as a redirect. + * Other possible options are script, regions and autonym. + * + * @param {string} code New language code. + * @param {Object} options Language properties. + */ +function addLanguage( code, options ) { + if ( options.target ) { + languageData.languages[ code ] = [ options.target ]; + } else { + languageData.languages[ code ] = [ options.script, options.regions, options.autonym ]; + } +} + +module.exports = { + addLanguage, + getAutonym, + getAutonyms, + getDir, + getGroupOfScript, + getLanguagesByScriptGroup, + getLanguagesByScriptGroupInRegion, + getLanguagesByScriptGroupInRegions, + getLanguagesInScript, + getLanguagesInScripts, + getLanguagesInTerritory, + getRegions, + getScript, + getScriptGroupOfLanguage, + isRedirect, + isRtl, + sortByAutonym +}; diff --git a/language-data.json b/language-data.json new file mode 100644 index 0000000..94936fe --- /dev/null +++ b/language-data.json @@ -0,0 +1,5282 @@ +{ + "languages": { + "aa": [ + "Latn", + [ + "AF" + ], + "Qafár af" + ], + "ab": [ + "Cyrl", + [ + "EU" + ], + "Аҧсшәа" + ], + "abs": [ + "Latn", + [ + "AS" + ], + "Bahasa Ambon" + ], + "ace": [ + "Latn", + [ + "AS", + "PA" + ], + "Acèh" + ], + "acf": [ + "Latn", + [ + "AM" + ], + "kwéyòl" + ], + "ady": [ + "Cyrl", + [ + "EU", + "ME" + ], + "Адыгабзэ" + ], + "ady-cyrl": [ + "ady" + ], + "ady-latn": [ + "Latn", + [ + "EU", + "ME" + ], + "Adygabze" + ], + "aeb": [ + "aeb-arab" + ], + "aeb-arab": [ + "Arab", + [ + "AF" + ], + "تونسي" + ], + "aeb-latn": [ + "Latn", + [ + "AF" + ], + "Tûnsî" + ], + "af": [ + "Latn", + [ + "AF" + ], + "Afrikaans" + ], + "ahr": [ + "Deva", + [ + "AS" + ], + "अहिराणी" + ], + "ak": [ + "Latn", + [ + "AF" + ], + "Akan" + ], + "akz": [ + "Latn", + [ + "AM" + ], + "Albaamo innaaɬiilka" + ], + "aln": [ + "Latn", + [ + "EU" + ], + "Gegë" + ], + "am": [ + "Ethi", + [ + "AF" + ], + "አማርኛ" + ], + "an": [ + "Latn", + [ + "EU" + ], + "aragonés" + ], + "ang": [ + "Latn", + [ + "EU" + ], + "Ænglisc" + ], + "anp": [ + "Deva", + [ + "AS" + ], + "अङ्गिका" + ], + "ar": [ + "Arab", + [ + "ME" + ], + "العربية" + ], + "arc": [ + "Syrc", + [ + "ME" + ], + "ܐܪܡܝܐ" + ], + "arn": [ + "Latn", + [ + "AM" + ], + "mapudungun" + ], + "aro": [ + "Latn", + [ + "AM" + ], + "Araona" + ], + "arq": [ + "Arab", + [ + "AF" + ], + "جازايرية" + ], + "ary": [ + "Latn", + [ + "ME" + ], + "Maġribi" + ], + "arz": [ + "Arab", + [ + "ME" + ], + "مصرى" + ], + "as": [ + "Beng", + [ + "AS" + ], + "অসমীয়া" + ], + "ase": [ + "Sgnw", + [ + "AM" + ], + "American sign language" + ], + "ast": [ + "Latn", + [ + "EU" + ], + "asturianu" + ], + "atj": [ + "Latn", + [ + "AM" + ], + "atikamekw" + ], + "av": [ + "Cyrl", + [ + "EU" + ], + "авар" + ], + "avk": [ + "Latn", + [ + "WW" + ], + "Kotava" + ], + "ay": [ + "Latn", + [ + "AM" + ], + "Aymar aru" + ], + "az": [ + "az-latn" + ], + "az-arab": [ + "Arab", + [ + "AS", + "ME" + ], + "تۆرکجه" + ], + "az-latn": [ + "Latn", + [ + "EU", + "ME" + ], + "azərbaycanca" + ], + "az-cyrl": [ + "Cyrl", + [ + "EU", + "ME" + ], + "азәрбајҹанҹа" + ], + "azb": [ + "az-arab" + ], + "azj": [ + "az-latn" + ], + "ba": [ + "Cyrl", + [ + "EU" + ], + "башҡортса" + ], + "ban": [ + "Bali", + [ + "AS" + ], + "ᬩᬲᬩᬮᬶ" + ], + "bar": [ + "Latn", + [ + "EU" + ], + "Boarisch" + ], + "bat-smg": [ + "sgs" + ], + "bbc-latn": [ + "Latn", + [ + "AS" + ], + "Batak Toba" + ], + "bbc-batk": [ + "Batk", + [ + "AS" + ], + "Batak Toba" + ], + "bbc": [ + "Latn", + [ + "AS" + ], + "Batak Toba" + ], + "bcc": [ + "Arab", + [ + "AS", + "ME" + ], + "جهلسری بلوچی" + ], + "bcl": [ + "Latn", + [ + "AS" + ], + "Bikol Central" + ], + "be-tarask": [ + "Cyrl", + [ + "EU" + ], + "беларуская (тарашкевіца)" + ], + "be-x-old": [ + "be-tarask" + ], + "be": [ + "Cyrl", + [ + "EU" + ], + "беларуская" + ], + "bew": [ + "Latn", + [ + "AS" + ], + "Bahasa Betawi" + ], + "bfa": [ + "Latn", + [ + "AF" + ], + "Bari" + ], + "bft": [ + "Arab", + [ + "AS" + ], + "بلتی" + ], + "bfq": [ + "Taml", + [ + "AS" + ], + "படகா" + ], + "bg": [ + "Cyrl", + [ + "EU" + ], + "български" + ], + "bgn": [ + "Arab", + [ + "AS", + "ME" + ], + "روچ کپتین بلوچی" + ], + "bh": [ + "bho" + ], + "bho": [ + "Deva", + [ + "AS" + ], + "भोजपुरी" + ], + "bi": [ + "Latn", + [ + "PA" + ], + "Bislama" + ], + "bjn": [ + "Latn", + [ + "AS" + ], + "Bahasa Banjar" + ], + "bm": [ + "Latn", + [ + "AF" + ], + "bamanankan" + ], + "bn": [ + "Beng", + [ + "AS" + ], + "বাংলা" + ], + "bo": [ + "Tibt", + [ + "AS" + ], + "བོད་ཡིག" + ], + "bpy": [ + "Beng", + [ + "AS" + ], + "বিষ্ণুপ্রিয়া মণিপুরী" + ], + "bqi": [ + "Arab", + [ + "AS", + "ME" + ], + "بختیاری" + ], + "br": [ + "Latn", + [ + "EU" + ], + "brezhoneg" + ], + "brh": [ + "Latn", + [ + "ME", + "AS" + ], + "Bráhuí" + ], + "brx": [ + "Deva", + [ + "AS" + ], + "बड़ो" + ], + "bs": [ + "Latn", + [ + "EU" + ], + "bosanski" + ], + "bto": [ + "Latn", + [ + "AS" + ], + "Iriga Bicolano" + ], + "bug": [ + "Bugi", + [ + "AS" + ], + "ᨅᨔ ᨕᨘᨁᨗ" + ], + "bxr": [ + "Cyrl", + [ + "AS" + ], + "буряад" + ], + "ca": [ + "Latn", + [ + "EU" + ], + "català" + ], + "cbk-zam": [ + "Latn", + [ + "AS" + ], + "Chavacano de Zamboanga" + ], + "cdo": [ + "Latn", + [ + "AS" + ], + "Mìng-dĕ̤ng-ngṳ̄" + ], + "ce": [ + "Cyrl", + [ + "EU" + ], + "нохчийн" + ], + "ceb": [ + "Latn", + [ + "AS" + ], + "Cebuano" + ], + "ch": [ + "Latn", + [ + "PA" + ], + "Chamoru" + ], + "chm": [ + "mhr" + ], + "cho": [ + "Latn", + [ + "AM" + ], + "Choctaw" + ], + "chr": [ + "Cher", + [ + "AM" + ], + "ᏣᎳᎩ" + ], + "chy": [ + "Latn", + [ + "AM" + ], + "Tsetsêhestâhese" + ], + "ciw": [ + "Latn", + [ + "AM" + ], + "Ojibwemowin" + ], + "cjy": [ + "cjy-hant" + ], + "cjy-hans": [ + "Hans", + [ + "AS" + ], + "晋语(简化字)" + ], + "cjy-hant": [ + "Hant", + [ + "AS" + ], + "晉語" + ], + "ckb": [ + "Arab", + [ + "ME" + ], + "کوردیی ناوەندی" + ], + "cnh": [ + "Latn", + [ + "AS" + ], + "Lai holh" + ], + "co": [ + "Latn", + [ + "EU" + ], + "corsu" + ], + "cps": [ + "Latn", + [ + "AS" + ], + "Capiceño" + ], + "cr": [ + "Cans", + [ + "AM" + ], + "ᓀᐦᐃᔭᐍᐏᐣ" + ], + "cr-cans": [ + "cr" + ], + "cr-latn": [ + "Latn", + [ + "AM" + ], + "Nēhiyawēwin" + ], + "crh": [ + "Latn", + [ + "EU" + ], + "qırımtatarca" + ], + "crh-cyrl": [ + "Cyrl", + [ + "EU" + ], + "къырымтатарджа" + ], + "crh-latn": [ + "crh" + ], + "cs": [ + "Latn", + [ + "EU" + ], + "čeština" + ], + "csb": [ + "Latn", + [ + "EU" + ], + "kaszëbsczi" + ], + "cu": [ + "Cyrl", + [ + "EU" + ], + "словѣньскъ \/ ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ" + ], + "cv": [ + "Cyrl", + [ + "EU" + ], + "Чӑвашла" + ], + "cy": [ + "Latn", + [ + "EU" + ], + "Cymraeg" + ], + "da": [ + "Latn", + [ + "EU" + ], + "dansk" + ], + "de-at": [ + "Latn", + [ + "EU" + ], + "Österreichisches Deutsch" + ], + "de-ch": [ + "Latn", + [ + "EU" + ], + "Schweizer Hochdeutsch" + ], + "de-formal": [ + "Latn", + [ + "EU" + ], + "Deutsch (Sie-Form)" + ], + "de": [ + "Latn", + [ + "EU" + ], + "Deutsch" + ], + "din": [ + "Latn", + [ + "AF" + ], + "Thuɔŋjäŋ" + ], + "diq": [ + "Latn", + [ + "EU", + "AS" + ], + "Zazaki" + ], + "dsb": [ + "Latn", + [ + "EU" + ], + "dolnoserbski" + ], + "dtp": [ + "Latn", + [ + "AS" + ], + "Dusun Bundu-liwan" + ], + "dty": [ + "Deva", + [ + "AS" + ], + "डोटेली" + ], + "dv": [ + "Thaa", + [ + "AS" + ], + "ދިވެހިބަސް" + ], + "dz": [ + "Tibt", + [ + "AS" + ], + "ཇོང་ཁ" + ], + "ee": [ + "Latn", + [ + "AF" + ], + "eʋegbe" + ], + "egl": [ + "Latn", + [ + "EU" + ], + "Emiliàn" + ], + "el": [ + "Grek", + [ + "EU" + ], + "Ελληνικά" + ], + "eml": [ + "Latn", + [ + "EU" + ], + "emiliàn e rumagnòl" + ], + "en-ca": [ + "Latn", + [ + "AM" + ], + "Canadian English" + ], + "en-gb": [ + "Latn", + [ + "EU", + "AS", + "PA" + ], + "British English" + ], + "en": [ + "Latn", + [ + "EU", + "AM", + "AF", + "ME", + "AS", + "PA", + "WW" + ], + "English" + ], + "eo": [ + "Latn", + [ + "WW" + ], + "Esperanto" + ], + "es-419": [ + "Latn", + [ + "AM" + ], + "español de America Latina" + ], + "es-formal": [ + "Latn", + [ + "EU", + "AM", + "AF", + "WW" + ], + "español (formal)" + ], + "es": [ + "Latn", + [ + "EU", + "AM", + "AF", + "WW" + ], + "español" + ], + "esu": [ + "Latn", + [ + "AM" + ], + "Yup'ik" + ], + "et": [ + "Latn", + [ + "EU" + ], + "eesti" + ], + "eu": [ + "Latn", + [ + "EU" + ], + "euskara" + ], + "ext": [ + "Latn", + [ + "EU" + ], + "estremeñu" + ], + "fa": [ + "Arab", + [ + "AS", + "ME" + ], + "فارسی" + ], + "fax": [ + "Latn", + [ + "EU" + ], + "Fala" + ], + "ff": [ + "Latn", + [ + "AF" + ], + "Fulfulde" + ], + "fi": [ + "Latn", + [ + "EU" + ], + "suomi" + ], + "fil": [ + "tl" + ], + "fit": [ + "Latn", + [ + "EU" + ], + "meänkieli" + ], + "fiu-vro": [ + "vro" + ], + "fj": [ + "Latn", + [ + "PA" + ], + "Na Vosa Vakaviti" + ], + "fo": [ + "Latn", + [ + "EU" + ], + "føroyskt" + ], + "fr": [ + "Latn", + [ + "EU", + "AM", + "WW" + ], + "français" + ], + "frc": [ + "Latn", + [ + "AM" + ], + "français cadien" + ], + "frp": [ + "Latn", + [ + "EU" + ], + "arpetan" + ], + "frr": [ + "Latn", + [ + "EU" + ], + "Nordfriisk" + ], + "fur": [ + "Latn", + [ + "EU" + ], + "furlan" + ], + "fy": [ + "Latn", + [ + "EU" + ], + "Frysk" + ], + "ga": [ + "Latn", + [ + "EU" + ], + "Gaeilge" + ], + "gag": [ + "Latn", + [ + "EU" + ], + "Gagauz" + ], + "gah": [ + "Latn", + [ + "AS" + ], + "Alekano" + ], + "gan-hans": [ + "Hans", + [ + "AS" + ], + "赣语(简体)" + ], + "gan-hant": [ + "gan" + ], + "gan": [ + "Hant", + [ + "AS" + ], + "贛語" + ], + "gbz": [ + "Latn", + [ + "AS" + ], + "Dari-e Mazdeyasnā" + ], + "gcf": [ + "Latn", + [ + "AM" + ], + "Guadeloupean Creole French" + ], + "gd": [ + "Latn", + [ + "EU" + ], + "Gàidhlig" + ], + "gl": [ + "Latn", + [ + "EU" + ], + "galego" + ], + "glk": [ + "Arab", + [ + "AS", + "ME" + ], + "گیلکی" + ], + "gn": [ + "Latn", + [ + "AM" + ], + "Avañe'ẽ" + ], + "gom": [ + "gom-deva" + ], + "gom-deva": [ + "Deva", + [ + "AS" + ], + "गोवा कोंकणी" + ], + "gom-latn": [ + "Latn", + [ + "AS" + ], + "Gova Konknni" + ], + "got": [ + "Goth", + [ + "EU" + ], + "𐌲𐌿𐍄𐌹𐍃𐌺" + ], + "grc": [ + "Grek", + [ + "EU" + ], + "Ἀρχαία ἑλληνικὴ" + ], + "gsw": [ + "Latn", + [ + "EU" + ], + "Alemannisch" + ], + "gu": [ + "Gujr", + [ + "AS" + ], + "ગુજરાતી" + ], + "guc": [ + "Latn", + [ + "AM" + ], + "Wayúu" + ], + "gur": [ + "Latn", + [ + "AF" + ], + "Gurenɛ" + ], + "gv": [ + "Latn", + [ + "EU" + ], + "Gaelg" + ], + "ha-arab": [ + "Arab", + [ + "AF" + ], + "هَوُسَ" + ], + "ha-latn": [ + "Latn", + [ + "AF" + ], + "Hausa" + ], + "ha": [ + "ha-latn" + ], + "hak": [ + "Latn", + [ + "AS" + ], + "Hak-kâ-fa" + ], + "haw": [ + "Latn", + [ + "AM", + "PA" + ], + "Hawai`i" + ], + "he": [ + "Hebr", + [ + "ME" + ], + "עברית" + ], + "hi": [ + "Deva", + [ + "AS" + ], + "हिन्दी" + ], + "hif": [ + "Latn", + [ + "PA", + "AS" + ], + "Fiji Hindi" + ], + "hif-deva": [ + "Deva", + [ + "AS" + ], + "फ़ीजी हिन्दी" + ], + "hif-latn": [ + "hif" + ], + "hil": [ + "Latn", + [ + "AS" + ], + "Ilonggo" + ], + "hne": [ + "Deva", + [ + "AS" + ], + "छत्तीसगढ़ी" + ], + "ho": [ + "Latn", + [ + "PA" + ], + "Hiri Motu" + ], + "hr": [ + "Latn", + [ + "EU" + ], + "hrvatski" + ], + "hrx": [ + "Latn", + [ + "AM" + ], + "Hunsrik" + ], + "hsb": [ + "Latn", + [ + "EU" + ], + "hornjoserbsce" + ], + "hsn": [ + "Hans", + [ + "AS" + ], + "湘语" + ], + "ht": [ + "Latn", + [ + "AM" + ], + "Kreyòl ayisyen" + ], + "hu-formal": [ + "Latn", + [ + "EU" + ], + "Magyar (magázó)" + ], + "hu": [ + "Latn", + [ + "EU" + ], + "magyar" + ], + "hy": [ + "Armn", + [ + "EU", + "ME" + ], + "Հայերեն" + ], + "hz": [ + "Latn", + [ + "AF" + ], + "Otsiherero" + ], + "ia": [ + "Latn", + [ + "WW" + ], + "interlingua" + ], + "id": [ + "Latn", + [ + "AS" + ], + "Bahasa Indonesia" + ], + "ie": [ + "Latn", + [ + "WW" + ], + "Interlingue" + ], + "ig": [ + "Latn", + [ + "AF" + ], + "Igbo" + ], + "ii": [ + "Yiii", + [ + "AS" + ], + "ꆇꉙ" + ], + "ik": [ + "Latn", + [ + "AM" + ], + "Iñupiak" + ], + "ike-cans": [ + "Cans", + [ + "AM" + ], + "ᐃᓄᒃᑎᑐᑦ" + ], + "ike-latn": [ + "Latn", + [ + "AM" + ], + "inuktitut" + ], + "ilo": [ + "Latn", + [ + "AS" + ], + "Ilokano" + ], + "inh": [ + "Cyrl", + [ + "EU" + ], + "ГӀалгӀай" + ], + "io": [ + "Latn", + [ + "WW" + ], + "Ido" + ], + "is": [ + "Latn", + [ + "EU" + ], + "íslenska" + ], + "it": [ + "Latn", + [ + "EU" + ], + "italiano" + ], + "iu": [ + "Cans", + [ + "AM" + ], + "ᐃᓄᒃᑎᑐᑦ" + ], + "ja": [ + "Jpan", + [ + "AS" + ], + "日本語" + ], + "jam": [ + "Latn", + [ + "AM" + ], + "Patois" + ], + "jbo": [ + "Latn", + [ + "WW" + ], + "lojban" + ], + "jdt": [ + "jdt-cyrl" + ], + "jdt-cyrl": [ + "Cyrl", + [ + "EU", + "AS" + ], + "жугьури" + ], + "jut": [ + "Latn", + [ + "EU" + ], + "jysk" + ], + "jv": [ + "Latn", + [ + "AS", + "PA" + ], + "Basa Jawa" + ], + "jv-java": [ + "Java", + [ + "AS", + "PA" + ], + "ꦧꦱꦗꦮ" + ], + "ka": [ + "Geor", + [ + "EU" + ], + "ქართული" + ], + "kaa": [ + "Latn", + [ + "AS" + ], + "Qaraqalpaqsha" + ], + "kab": [ + "Latn", + [ + "AF", + "EU" + ], + "Taqbaylit" + ], + "kac": [ + "Latn", + [ + "AS" + ], + "Jinghpaw" + ], + "kbd-cyrl": [ + "kbd" + ], + "kbd-latn": [ + "Latn", + [ + "EU" + ], + "Qabardjajəbza" + ], + "kbd": [ + "Cyrl", + [ + "EU", + "ME" + ], + "Адыгэбзэ" + ], + "kbp": [ + "Latn", + [ + "AF" + ], + "Kabɩyɛ" + ], + "kea": [ + "Latn", + [ + "AF" + ], + "Kabuverdianu" + ], + "kg": [ + "Latn", + [ + "AF" + ], + "Kongo" + ], + "kgp": [ + "Latn", + [ + "AM" + ], + "Kaingáng" + ], + "khw": [ + "Arab", + [ + "ME", + "AS" + ], + "کھوار" + ], + "ki": [ + "Latn", + [ + "AF" + ], + "Gĩkũyũ" + ], + "kiu": [ + "Latn", + [ + "EU", + "ME" + ], + "Kırmancki" + ], + "kj": [ + "Latn", + [ + "AF" + ], + "Kwanyama" + ], + "kjh": [ + "Cyrl", + [ + "AS" + ], + "хакас" + ], + "kk": [ + "kk-cyrl" + ], + "kk-arab": [ + "Arab", + [ + "EU", + "AS" + ], + "قازاقشا (تٶتە)" + ], + "kk-cn": [ + "kk-arab" + ], + "kk-cyrl": [ + "Cyrl", + [ + "EU", + "AS" + ], + "қазақша" + ], + "kk-kz": [ + "kk-cyrl" + ], + "kk-latn": [ + "Latn", + [ + "EU", + "AS", + "ME" + ], + "qazaqşa" + ], + "kk-tr": [ + "kk-latn" + ], + "kl": [ + "Latn", + [ + "AM", + "EU" + ], + "kalaallisut" + ], + "km": [ + "Khmr", + [ + "AS" + ], + "ភាសាខ្មែរ" + ], + "kn": [ + "Knda", + [ + "AS" + ], + "ಕನ್ನಡ" + ], + "knn": [ + "Deva", + [ + "AS" + ], + "महाराष्ट्रीय कोंकणी" + ], + "ko-kp": [ + "Kore", + [ + "AS" + ], + "한국어 (조선)" + ], + "ko": [ + "Kore", + [ + "AS" + ], + "한국어" + ], + "koi": [ + "Cyrl", + [ + "EU" + ], + "Перем Коми" + ], + "kr": [ + "Latn", + [ + "AF" + ], + "Kanuri" + ], + "krc": [ + "Cyrl", + [ + "EU" + ], + "къарачай-малкъар" + ], + "kri": [ + "Latn", + [ + "AF" + ], + "Krio" + ], + "krj": [ + "Latn", + [ + "ME", + "EU" + ], + "Kinaray-a" + ], + "krl": [ + "Latn", + [ + "EU" + ], + "Karjala" + ], + "ks-arab": [ + "Arab", + [ + "AS" + ], + "کٲشُر" + ], + "ks-deva": [ + "Deva", + [ + "AS" + ], + "कॉशुर" + ], + "ks": [ + "Arab", + [ + "AS" + ], + "کٲشُر" + ], + "ksf": [ + "Latn", + [ + "AF" + ], + "Bafia" + ], + "ksh": [ + "Latn", + [ + "EU" + ], + "Ripoarisch" + ], + "ku": [ + "ku-latn" + ], + "ku-arab": [ + "Arab", + [ + "EU", + "ME" + ], + "كوردي" + ], + "ku-latn": [ + "Latn", + [ + "EU", + "ME" + ], + "Kurdî" + ], + "kv": [ + "Cyrl", + [ + "EU" + ], + "коми" + ], + "kw": [ + "Latn", + [ + "EU" + ], + "kernowek" + ], + "ky": [ + "Cyrl", + [ + "AS" + ], + "Кыргызча" + ], + "la": [ + "Latn", + [ + "EU" + ], + "Latina" + ], + "lad": [ + "lad-latn" + ], + "lad-latn": [ + "Latn", + [ + "ME", + "EU", + "AM" + ], + "Ladino" + ], + "lad-hebr": [ + "Hebr", + [ + "ME", + "EU", + "AM" + ], + "לאדינו" + ], + "lb": [ + "Latn", + [ + "EU" + ], + "Lëtzebuergesch" + ], + "lbe": [ + "Cyrl", + [ + "EU" + ], + "лакку" + ], + "lez": [ + "Cyrl", + [ + "EU" + ], + "лезги" + ], + "lfn": [ + "Latn", + [ + "WW" + ], + "Lingua Franca Nova" + ], + "lg": [ + "Latn", + [ + "AF" + ], + "Luganda" + ], + "li": [ + "Latn", + [ + "EU" + ], + "Limburgs" + ], + "lij": [ + "Latn", + [ + "EU" + ], + "Ligure" + ], + "liv": [ + "Latn", + [ + "EU" + ], + "Līvõ kēļ" + ], + "lki": [ + "Arab", + [ + "AS", + "ME" + ], + "لەکی‎" + ], + "lkt": [ + "Latn", + [ + "AM" + ], + "Lakȟótiyapi" + ], + "lld": [ + "Latn", + [ + "EU" + ], + "Ladin" + ], + "lmo": [ + "Latn", + [ + "EU" + ], + "lumbaart" + ], + "ln": [ + "Latn", + [ + "AF" + ], + "lingála" + ], + "lo": [ + "Laoo", + [ + "AS" + ], + "ລາວ" + ], + "loz": [ + "Latn", + [ + "AF" + ], + "Silozi" + ], + "lt": [ + "Latn", + [ + "EU" + ], + "lietuvių" + ], + "lrc": [ + "Arab", + [ + "AS", + "ME" + ], + "لۊری شومالی" + ], + "ltg": [ + "Latn", + [ + "EU" + ], + "latgaļu" + ], + "lud": [ + "Latn", + [ + "EU" + ], + "lüüdi" + ], + "lus": [ + "Latn", + [ + "AS" + ], + "Mizo ţawng" + ], + "lut": [ + "Latn", + [ + "AM" + ], + "dxʷləšucid" + ], + "luz": [ + "Arab", + [ + "ME" + ], + "لئری دوٙمینی" + ], + "lv": [ + "Latn", + [ + "EU" + ], + "latviešu" + ], + "lzh": [ + "Hant", + [ + "AS" + ], + "文言" + ], + "lzz": [ + "Latn", + [ + "EU", + "ME" + ], + "Lazuri" + ], + "mai": [ + "Deva", + [ + "AS" + ], + "मैथिली" + ], + "map-bms": [ + "Latn", + [ + "AS" + ], + "Basa Banyumasan" + ], + "mdf": [ + "Cyrl", + [ + "EU" + ], + "мокшень" + ], + "mfe": [ + "Latn", + [ + "AM" + ], + "Morisyen" + ], + "mg": [ + "Latn", + [ + "AF" + ], + "Malagasy" + ], + "mh": [ + "Latn", + [ + "PA" + ], + "Ebon" + ], + "mhr": [ + "Cyrl", + [ + "EU" + ], + "олык марий" + ], + "mi": [ + "Latn", + [ + "PA" + ], + "Māori" + ], + "mic": [ + "Latn", + [ + "AM" + ], + "Mi'kmaq" + ], + "min": [ + "Latn", + [ + "AS" + ], + "Baso Minangkabau" + ], + "miq": [ + "Latn", + [ + "AM" + ], + "Mískitu" + ], + "mk": [ + "Cyrl", + [ + "EU" + ], + "македонски" + ], + "ml": [ + "Mlym", + [ + "AS", + "ME" + ], + "മലയാളം" + ], + "mn": [ + "Cyrl", + [ + "AS" + ], + "монгол" + ], + "mnc": [ + "Mong", + [ + "AS" + ], + "ᠮᠠᠨᠵᡠ ᡤᡳᠰᡠᠨ" + ], + "mni": [ + "Beng", + [ + "AS" + ], + "মেইতেই লোন্" + ], + "mnw": [ + "Mymr", + [ + "AS" + ], + "ဘာသာ မန်" + ], + "mo": [ + "Cyrl", + [ + "EU" + ], + "молдовеняскэ" + ], + "mr": [ + "Deva", + [ + "AS", + "ME" + ], + "मराठी" + ], + "mrj": [ + "Cyrl", + [ + "EU" + ], + "кырык мары" + ], + "ms": [ + "Latn", + [ + "AS" + ], + "Bahasa Melayu" + ], + "mt": [ + "Latn", + [ + "EU" + ], + "Malti" + ], + "mui": [ + "Latn", + [ + "AS" + ], + "Musi" + ], + "mus": [ + "Latn", + [ + "AM" + ], + "Mvskoke" + ], + "mwl": [ + "Latn", + [ + "EU" + ], + "Mirandés" + ], + "mwv": [ + "Latn", + [ + "AS" + ], + "Behase Mentawei" + ], + "mww": [ + "mww-latn" + ], + "mww-latn": [ + "Latn", + [ + "AS" + ], + "Hmoob Dawb" + ], + "my": [ + "Mymr", + [ + "AS" + ], + "မြန်မာဘာသာ" + ], + "myv": [ + "Cyrl", + [ + "EU" + ], + "эрзянь" + ], + "mzn": [ + "Arab", + [ + "ME", + "AS" + ], + "مازِرونی" + ], + "na": [ + "Latn", + [ + "PA" + ], + "Dorerin Naoero" + ], + "nah": [ + "Latn", + [ + "AM" + ], + "Nāhuatl" + ], + "nan": [ + "Latn", + [ + "AS" + ], + "Bân-lâm-gú" + ], + "nap": [ + "Latn", + [ + "EU" + ], + "Napulitano" + ], + "nb": [ + "Latn", + [ + "EU" + ], + "norsk (bokmål)" + ], + "nds-nl": [ + "Latn", + [ + "EU" + ], + "Nedersaksisch" + ], + "nds": [ + "Latn", + [ + "EU" + ], + "Plattdüütsch" + ], + "ne": [ + "Deva", + [ + "AS" + ], + "नेपाली" + ], + "new": [ + "Deva", + [ + "AS" + ], + "नेपाल भाषा" + ], + "ng": [ + "Latn", + [ + "AF" + ], + "Oshiwambo" + ], + "niu": [ + "Latn", + [ + "PA" + ], + "ko e vagahau Niuē" + ], + "njo": [ + "Latn", + [ + "AS" + ], + "Ao" + ], + "nl-informal": [ + "Latn", + [ + "EU", + "AM" + ], + "Nederlands (informeel)" + ], + "nl": [ + "Latn", + [ + "EU", + "AM" + ], + "Nederlands" + ], + "nn": [ + "Latn", + [ + "EU" + ], + "norsk (nynorsk)" + ], + "no": [ + "Latn", + [ + "EU" + ], + "norsk" + ], + "nov": [ + "Latn", + [ + "WW" + ], + "Novial" + ], + "nqo": [ + "Nkoo", + [ + "AF" + ], + "ߒߞߏ" + ], + "nrm": [ + "Latn", + [ + "EU" + ], + "Nouormand" + ], + "nso": [ + "Latn", + [ + "AF" + ], + "Sesotho sa Leboa" + ], + "nv": [ + "Latn", + [ + "AM" + ], + "Diné bizaad" + ], + "ny": [ + "Latn", + [ + "AF" + ], + "Chi-Chewa" + ], + "oc": [ + "Latn", + [ + "EU" + ], + "occitan" + ], + "olo": [ + "Latn", + [ + "AS", + "EU" + ], + "Livvin-Karjalan" + ], + "om": [ + "Latn", + [ + "AF" + ], + "Oromoo" + ], + "or": [ + "Orya", + [ + "AS" + ], + "ଓଡ଼ିଆ" + ], + "os": [ + "Cyrl", + [ + "EU" + ], + "Ирон" + ], + "ota": [ + "Arab", + [ + "AS", + "EU" + ], + "لسان عثمانى" + ], + "pa": [ + "pa-guru" + ], + "pa-guru": [ + "Guru", + [ + "AS" + ], + "ਪੰਜਾਬੀ" + ], + "pag": [ + "Latn", + [ + "AS" + ], + "Pangasinan" + ], + "pam": [ + "Latn", + [ + "AS" + ], + "Kapampangan" + ], + "pap": [ + "Latn", + [ + "AM" + ], + "Papiamentu" + ], + "pbb": [ + "Latn", + [ + "AM" + ], + "Nasa Yuwe" + ], + "pcd": [ + "Latn", + [ + "EU" + ], + "Picard" + ], + "pdc": [ + "Latn", + [ + "EU", + "AM" + ], + "Deitsch" + ], + "pdt": [ + "Latn", + [ + "EU", + "AM" + ], + "Plautdietsch" + ], + "pfl": [ + "Latn", + [ + "EU" + ], + "Pälzisch" + ], + "pi": [ + "Deva", + [ + "AS" + ], + "पालि" + ], + "pih": [ + "Latn", + [ + "PA" + ], + "Norfuk \/ Pitkern" + ], + "pis": [ + "Latn", + [ + "PA" + ], + "Pijin" + ], + "pko": [ + "Latn", + [ + "AF" + ], + "Pökoot" + ], + "pl": [ + "Latn", + [ + "EU" + ], + "polski" + ], + "pms": [ + "Latn", + [ + "EU" + ], + "Piemontèis" + ], + "pnb": [ + "Arab", + [ + "AS", + "ME" + ], + "پنجابی" + ], + "pnt": [ + "Grek", + [ + "EU" + ], + "Ποντιακά" + ], + "ppl": [ + "Latn", + [ + "AM" + ], + "Nawat" + ], + "prg": [ + "Latn", + [ + "EU" + ], + "Prūsiskan" + ], + "prs": [ + "Arab", + [ + "AS", + "ME" + ], + "دری" + ], + "ps": [ + "Arab", + [ + "AS", + "ME" + ], + "پښتو" + ], + "pt-br": [ + "Latn", + [ + "AM" + ], + "português do Brasil" + ], + "pt": [ + "Latn", + [ + "EU", + "AM", + "AS", + "PA", + "AF", + "WW" + ], + "português" + ], + "qu": [ + "Latn", + [ + "AM" + ], + "Runa Simi" + ], + "qug": [ + "Latn", + [ + "AM" + ], + "Runa shimi" + ], + "rap": [ + "Latn", + [ + "AM" + ], + "arero rapa nui" + ], + "rcf": [ + "Latn", + [ + "AF" + ], + "Kreol Réyoné" + ], + "rgn": [ + "Latn", + [ + "EU" + ], + "Rumagnôl" + ], + "rif": [ + "Latn", + [ + "AF" + ], + "Tarifit" + ], + "rki": [ + "Mymr", + [ + "AS" + ], + "ရခိုင်" + ], + "rm": [ + "Latn", + [ + "EU" + ], + "rumantsch" + ], + "rmf": [ + "Latn", + [ + "EU" + ], + "kaalengo tšimb" + ], + "rmy": [ + "Latn", + [ + "EU" + ], + "Romani" + ], + "rn": [ + "Latn", + [ + "AF" + ], + "Kirundi" + ], + "ro": [ + "Latn", + [ + "EU" + ], + "română" + ], + "roa-rup": [ + "rup" + ], + "roa-tara": [ + "Latn", + [ + "EU" + ], + "tarandíne" + ], + "rtm": [ + "Latn", + [ + "PA" + ], + "Faeag Rotuma" + ], + "ru": [ + "Cyrl", + [ + "EU", + "AS", + "ME" + ], + "русский" + ], + "rue": [ + "Cyrl", + [ + "EU" + ], + "русиньскый" + ], + "rup": [ + "Latn", + [ + "EU" + ], + "armãneashti" + ], + "ruq": [ + "Cyrl", + [ + "EU" + ], + "Влахесте" + ], + "ruq-cyrl": [ + "ruq" + ], + "ruq-grek": [ + "Grek", + [ + "EU" + ], + "Megleno-Romanian (Greek script)" + ], + "ruq-latn": [ + "Latn", + [ + "EU" + ], + "Vlăheşte" + ], + "rut": [ + "Cyrl", + [ + "EU" + ], + "мыхаӀбишды" + ], + "rw": [ + "Latn", + [ + "AF" + ], + "Kinyarwanda" + ], + "rwr": [ + "Deva", + [ + "AS" + ], + "मारवाड़ी" + ], + "ryu": [ + "Kana", + [ + "AS" + ], + "ʔucināguci" + ], + "sa": [ + "Deva", + [ + "AS" + ], + "संस्कृतम्" + ], + "sah": [ + "Cyrl", + [ + "EU", + "AS" + ], + "саха тыла" + ], + "sat": [ + "Latn", + [ + "AS" + ], + "Santali" + ], + "saz": [ + "Saur", + [ + "AS" + ], + "ꢱꣃꢬꢵꢯ꣄ꢡ꣄ꢬꢵ" + ], + "sc": [ + "Latn", + [ + "EU" + ], + "sardu" + ], + "scn": [ + "Latn", + [ + "EU" + ], + "sicilianu" + ], + "sco": [ + "Latn", + [ + "EU" + ], + "Scots" + ], + "sd": [ + "Arab", + [ + "AS" + ], + "سنڌي" + ], + "sdc": [ + "Latn", + [ + "EU" + ], + "Sassaresu" + ], + "sdh": [ + "Arab", + [ + "ME" + ], + "کوردی خوارگ" + ], + "se": [ + "Latn", + [ + "EU" + ], + "sámegiella" + ], + "ses": [ + "Latn", + [ + "AF" + ], + "Koyraboro Senni" + ], + "sei": [ + "Latn", + [ + "AM" + ], + "Cmique Itom" + ], + "sg": [ + "Latn", + [ + "AF" + ], + "Sängö" + ], + "sgs": [ + "Latn", + [ + "EU" + ], + "žemaitėška" + ], + "sh": [ + "Latn", + [ + "EU" + ], + "srpskohrvatski" + ], + "shi-latn": [ + "Latn", + [ + "AF" + ], + "Tašlḥiyt" + ], + "shi-tfng": [ + "Tfng", + [ + "AF" + ], + "ⵜⴰⵛⵍⵃⵉⵜ" + ], + "shi": [ + "shi-latn" + ], + "shn": [ + "Mymr", + [ + "AS" + ], + "လိၵ်ႈတႆး" + ], + "si": [ + "Sinh", + [ + "AS" + ], + "සිංහල" + ], + "simple": [ + "Latn", + [ + "WW" + ], + "Simple English" + ], + "sk": [ + "Latn", + [ + "EU" + ], + "slovenčina" + ], + "sl": [ + "Latn", + [ + "EU" + ], + "slovenščina" + ], + "sli": [ + "Latn", + [ + "EU" + ], + "Schläsch" + ], + "slr": [ + "Latn", + [ + "AS" + ], + "Salırça" + ], + "sly": [ + "Latn", + [ + "AS" + ], + "Bahasa Selayar" + ], + "syc": [ + "Syrc", + [ + "ME" + ], + "ܣܘܪܝܝܐ" + ], + "sm": [ + "Latn", + [ + "PA" + ], + "Gagana Samoa" + ], + "sma": [ + "Latn", + [ + "EU" + ], + "åarjelsaemien" + ], + "smj": [ + "Latn", + [ + "EU" + ], + "julevsámegiella" + ], + "smn": [ + "Latn", + [ + "EU" + ], + "anarâškielâ" + ], + "sms": [ + "Latn", + [ + "EU" + ], + "sää´mǩiõll" + ], + "sn": [ + "Latn", + [ + "AF" + ], + "chiShona" + ], + "so": [ + "Latn", + [ + "AF" + ], + "Soomaaliga" + ], + "son": [ + "Latn", + [ + "AF" + ], + "soŋay" + ], + "sq": [ + "Latn", + [ + "EU" + ], + "shqip" + ], + "sr": [ + "sr-cyrl" + ], + "sr-ec": [ + "sr-cyrl" + ], + "sr-cyrl": [ + "Cyrl", + [ + "EU" + ], + "српски" + ], + "sr-el": [ + "sr-latn" + ], + "sr-latn": [ + "Latn", + [ + "EU" + ], + "srpski" + ], + "srn": [ + "Latn", + [ + "AM", + "EU" + ], + "Sranantongo" + ], + "ss": [ + "Latn", + [ + "AF" + ], + "SiSwati" + ], + "st": [ + "Latn", + [ + "AF" + ], + "Sesotho" + ], + "stq": [ + "Latn", + [ + "EU" + ], + "Seeltersk" + ], + "su": [ + "Latn", + [ + "AS" + ], + "Basa Sunda" + ], + "sv": [ + "Latn", + [ + "EU" + ], + "svenska" + ], + "sw": [ + "Latn", + [ + "AF" + ], + "Kiswahili" + ], + "swb": [ + "Latn", + [ + "AF" + ], + "Shikomoro" + ], + "sxu": [ + "Latn", + [ + "EU" + ], + "Säggssch" + ], + "szl": [ + "Latn", + [ + "EU" + ], + "ślůnski" + ], + "ta": [ + "Taml", + [ + "AS" + ], + "தமிழ்" + ], + "tcy": [ + "Knda", + [ + "AS" + ], + "ತುಳು" + ], + "te": [ + "Telu", + [ + "AS" + ], + "తెలుగు" + ], + "tet": [ + "Latn", + [ + "AS", + "PA" + ], + "tetun" + ], + "tg-cyrl": [ + "Cyrl", + [ + "AS" + ], + "тоҷикӣ" + ], + "tg-latn": [ + "Latn", + [ + "AS" + ], + "tojikī" + ], + "tg": [ + "Cyrl", + [ + "AS" + ], + "тоҷикӣ" + ], + "th": [ + "Thai", + [ + "AS" + ], + "ไทย" + ], + "ti": [ + "Ethi", + [ + "AF" + ], + "ትግርኛ" + ], + "tk": [ + "Latn", + [ + "AS" + ], + "Türkmençe" + ], + "tkr": [ + "Cyrl", + [ + "AS" + ], + "ЦӀаьхна миз" + ], + "tl": [ + "Latn", + [ + "AS" + ], + "Tagalog" + ], + "tly": [ + "Cyrl", + [ + "EU", + "AS", + "ME" + ], + "толышә зывон" + ], + "tn": [ + "Latn", + [ + "AF" + ], + "Setswana" + ], + "to": [ + "Latn", + [ + "PA" + ], + "lea faka-Tonga" + ], + "tokipona": [ + "Latn", + [ + "WW" + ], + "Toki Pona" + ], + "tpi": [ + "Latn", + [ + "PA", + "AS" + ], + "Tok Pisin" + ], + "tr": [ + "Latn", + [ + "EU", + "ME" + ], + "Türkçe" + ], + "trp": [ + "Latn", + [ + "AS" + ], + "Kokborok (Tripuri)" + ], + "tru": [ + "Latn", + [ + "AS" + ], + "Ṫuroyo" + ], + "ts": [ + "Latn", + [ + "AF" + ], + "Xitsonga" + ], + "tsd": [ + "Grek", + [ + "EU" + ], + "Τσακωνικά" + ], + "tt": [ + "Cyrl", + [ + "EU" + ], + "татарча" + ], + "tt-cyrl": [ + "tt" + ], + "tt-latn": [ + "Latn", + [ + "EU" + ], + "tatarça" + ], + "ttt": [ + "Cyrl", + [ + "AS" + ], + "Tati" + ], + "tum": [ + "Latn", + [ + "AF" + ], + "chiTumbuka" + ], + "tw": [ + "Latn", + [ + "AF" + ], + "Twi" + ], + "twd": [ + "Latn", + [ + "EU" + ], + "Tweants" + ], + "ty": [ + "Latn", + [ + "PA" + ], + "reo tahiti" + ], + "tyv": [ + "Cyrl", + [ + "AS" + ], + "тыва дыл" + ], + "tzl": [ + "Latn", + [ + "WW" + ], + "Talossan" + ], + "tzm": [ + "Tfng", + [ + "AF" + ], + "ⵜⴰⵎⴰⵣⵉⵖⵜ" + ], + "udm": [ + "Cyrl", + [ + "EU" + ], + "удмурт" + ], + "ug": [ + "ug-arab" + ], + "ug-arab": [ + "Arab", + [ + "AS" + ], + "ئۇيغۇرچە" + ], + "ug-latn": [ + "Latn", + [ + "AS" + ], + "uyghurche" + ], + "ug-cyrl": [ + "Cyrl", + [ + "AS" + ], + "уйғурчә" + ], + "uk": [ + "Cyrl", + [ + "EU" + ], + "українська" + ], + "ur": [ + "Arab", + [ + "AS", + "ME" + ], + "اردو" + ], + "uz": [ + "Latn", + [ + "AS" + ], + "oʻzbekcha" + ], + "ve": [ + "Latn", + [ + "AF" + ], + "Tshivenda" + ], + "vec": [ + "Latn", + [ + "EU" + ], + "vèneto" + ], + "vep": [ + "Latn", + [ + "EU" + ], + "vepsän kel’" + ], + "vi": [ + "Latn", + [ + "AS" + ], + "Tiếng Việt" + ], + "vls": [ + "Latn", + [ + "EU" + ], + "West-Vlams" + ], + "vmf": [ + "Latn", + [ + "EU" + ], + "Mainfränkisch" + ], + "vo": [ + "Latn", + [ + "WW" + ], + "Volapük" + ], + "vot": [ + "Latn", + [ + "EU" + ], + "Vaďďa" + ], + "vro": [ + "Latn", + [ + "EU" + ], + "Võro" + ], + "wa": [ + "Latn", + [ + "EU" + ], + "walon" + ], + "war": [ + "Latn", + [ + "AS" + ], + "Winaray" + ], + "wls": [ + "Latn", + [ + "PA" + ], + "Faka'uvea" + ], + "wo": [ + "Latn", + [ + "AF" + ], + "Wolof" + ], + "wuu": [ + "Hans", + [ + "AS" + ], + "吴语" + ], + "xal": [ + "Cyrl", + [ + "EU" + ], + "хальмг" + ], + "xh": [ + "Latn", + [ + "AF" + ], + "isiXhosa" + ], + "xmf": [ + "Geor", + [ + "EU" + ], + "მარგალური" + ], + "ydd": [ + "Hebr", + [ + "AS", + "EU" + ], + "Eastern Yiddish" + ], + "yi": [ + "Hebr", + [ + "ME", + "EU", + "AM" + ], + "ייִדיש" + ], + "yo": [ + "Latn", + [ + "AF" + ], + "Yorùbá" + ], + "yrk": [ + "Cyrl", + [ + "AS" + ], + "Ненэцяʼ вада" + ], + "yrl": [ + "Latn", + [ + "AM" + ], + "ñe'engatú" + ], + "yua": [ + "Latn", + [ + "AM" + ], + "Maaya T'aan" + ], + "yue": [ + "Hant", + [ + "AS" + ], + "粵語" + ], + "za": [ + "Latn", + [ + "AS" + ], + "Vahcuengh" + ], + "zea": [ + "Latn", + [ + "EU" + ], + "Zeêuws" + ], + "zgh": [ + "Tfng", + [ + "AF" + ], + "ⵜⴰⵎⴰⵣⵉⵖⵜ" + ], + "zh": [ + "Hans", + [ + "AS" + ], + "中文" + ], + "zh-classical": [ + "Hant", + [ + "AS" + ], + "文言" + ], + "zh-cn": [ + "Hans", + [ + "AS" + ], + "中文(中国大陆)" + ], + "zh-hans": [ + "Hans", + [ + "AS" + ], + "中文(简体)" + ], + "zh-hant": [ + "Hant", + [ + "AS" + ], + "中文(繁體)" + ], + "zh-hk": [ + "Hant", + [ + "AS" + ], + "中文(香港)" + ], + "zh-min-nan": [ + "nan" + ], + "zh-mo": [ + "Hant", + [ + "AS" + ], + "中文(澳門)" + ], + "zh-my": [ + "Hans", + [ + "AS" + ], + "中文(马来西亚)" + ], + "zh-sg": [ + "Hans", + [ + "AS" + ], + "中文(新加坡)" + ], + "zh-tw": [ + "Hant", + [ + "AS" + ], + "中文(台灣)" + ], + "zh-yue": [ + "yue" + ], + "zu": [ + "Latn", + [ + "AF" + ], + "isiZulu" + ] + }, + "scriptgroups": { + "Latin": [ + "Latn", + "Goth" + ], + "Greek": [ + "Grek" + ], + "WestCaucasian": [ + "Armn", + "Geor" + ], + "Arabic": [ + "Arab" + ], + "MiddleEastern": [ + "Hebr", + "Syrc" + ], + "African": [ + "Ethi", + "Nkoo", + "Tfng" + ], + "SouthAsian": [ + "Beng", + "Deva", + "Gujr", + "Guru", + "Knda", + "Mlym", + "Orya", + "Saur", + "Sinh", + "Taml", + "Telu", + "Tibt", + "Thaa" + ], + "Cyrillic": [ + "Cyrl" + ], + "CJK": [ + "Hans", + "Hant", + "Kana", + "Kore", + "Jpan", + "Yiii" + ], + "SouthEastAsian": [ + "Bali", + "Batk", + "Bugi", + "Java", + "Khmr", + "Laoo", + "Mymr", + "Thai" + ], + "Mongolian": [ + "Mong" + ], + "SignWriting": [ + "Sgnw" + ], + "NativeAmerican": [ + "Cher", + "Cans" + ], + "Special": [ + "Zyyy" + ] + }, + "rtlscripts": [ + "Arab", + "Hebr", + "Syrc", + "Nkoo", + "Thaa" + ], + "regiongroups": { + "WW": 1, + "SP": 1, + "AM": 2, + "EU": 3, + "ME": 3, + "AF": 3, + "AS": 4, + "PA": 4 + }, + "territories": { + "AC": [ + "en" + ], + "AD": [ + "ca", + "es", + "fr" + ], + "AE": [ + "ar", + "ml", + "ps", + "fa" + ], + "AF": [ + "fa", + "ps", + "uz", + "tk", + "bgn", + "ug-arab", + "kk-arab", + "kk-cyrl" + ], + "AG": [ + "en", + "pt" + ], + "AI": [ + "en" + ], + "AL": [ + "sq", + "el", + "mk" + ], + "AM": [ + "hy", + "ku-latn", + "az-latn" + ], + "AO": [ + "pt", + "ln" + ], + "AR": [ + "es", + "en", + "cy", + "gn" + ], + "AS": [ + "sm", + "en" + ], + "AT": [ + "de", + "bar", + "en", + "fr", + "it", + "hr", + "sl", + "hu" + ], + "AU": [ + "en", + "zh-hant", + "zh", + "it" + ], + "AW": [ + "nl", + "pap", + "en" + ], + "AX": [ + "sv" + ], + "AZ": [ + "az-latn", + "az-cyrl", + "tly", + "ku-latn", + "ttt", + "tkr" + ], + "BA": [ + "bs", + "en", + "hr", + "sr-cyrl", + "sr-latn" + ], + "BB": [ + "en" + ], + "BD": [ + "bn", + "en", + "my", + "mni" + ], + "BE": [ + "en", + "nl", + "fr", + "de", + "vls", + "wa" + ], + "BF": [ + "fr" + ], + "BG": [ + "bg", + "en", + "ru", + "tr", + "de" + ], + "BH": [ + "ar", + "ml" + ], + "BI": [ + "rn", + "fr", + "sw", + "en" + ], + "BJ": [ + "fr", + "yo" + ], + "BL": [ + "fr" + ], + "BM": [ + "en" + ], + "BN": [ + "ms", + "zh-hant", + "zh", + "en" + ], + "BO": [ + "es", + "qu", + "ay", + "gn", + "aro" + ], + "BQ": [ + "pap", + "nl" + ], + "BR": [ + "pt", + "en", + "de", + "it", + "ja", + "es", + "ko", + "kgp", + "yrl" + ], + "BS": [ + "en" + ], + "BT": [ + "dz", + "ne", + "en" + ], + "BW": [ + "en", + "tn", + "af" + ], + "BY": [ + "be", + "ru" + ], + "BZ": [ + "en", + "es" + ], + "CA": [ + "en", + "fr", + "it", + "de", + "pdt", + "cr", + "yi", + "iu", + "atj" + ], + "CC": [ + "ms", + "en" + ], + "CD": [ + "sw", + "fr", + "ln", + "kg", + "rw" + ], + "CF": [ + "fr", + "sg", + "ln" + ], + "CG": [ + "fr", + "ln" + ], + "CH": [ + "de", + "gsw", + "en", + "fr", + "it", + "lmo", + "pt", + "rm" + ], + "CI": [ + "fr" + ], + "CK": [ + "en" + ], + "CL": [ + "es", + "en", + "arn" + ], + "CM": [ + "fr", + "en", + "ff", + "ar", + "ksf", + "ha-arab", + "ha-latn" + ], + "CN": [ + "zh", + "wuu", + "yue", + "hsn", + "hak", + "nan", + "gan", + "ii", + "ug-arab", + "za", + "mn", + "bo", + "ko", + "kk-arab", + "kk-cyrl", + "ky", + "en", + "ru", + "vi", + "uz", + "lzh" + ], + "CO": [ + "es", + "guc" + ], + "CR": [ + "es" + ], + "CU": [ + "es" + ], + "CV": [ + "kea", + "pt" + ], + "CW": [ + "pap", + "nl", + "es" + ], + "CX": [ + "en" + ], + "CY": [ + "el", + "en", + "tr", + "fr", + "hy", + "ar" + ], + "CZ": [ + "cs", + "en", + "sk", + "de", + "pl" + ], + "DE": [ + "de", + "en", + "fr", + "bar", + "nds", + "nl", + "it", + "es", + "ru", + "vmf", + "tr", + "gsw", + "da", + "hr", + "ku-latn", + "el", + "ksh", + "pl", + "hsb", + "frr", + "dsb", + "stq", + "pfl" + ], + "DG": [ + "en" + ], + "DJ": [ + "aa", + "so", + "ar", + "fr" + ], + "DK": [ + "da", + "en", + "de", + "sv", + "fo", + "kl", + "jut" + ], + "DM": [ + "en" + ], + "DO": [ + "es", + "en" + ], + "DZ": [ + "arq", + "ar", + "fr", + "kab", + "en" + ], + "EA": [ + "es" + ], + "EC": [ + "es", + "qu", + "qug" + ], + "EE": [ + "et", + "ru", + "en", + "fi", + "vro" + ], + "EG": [ + "ar", + "arz", + "en", + "el" + ], + "EH": [ + "ar" + ], + "ER": [ + "ti", + "en", + "ar", + "aa" + ], + "ES": [ + "es", + "en", + "ca", + "gl", + "eu", + "ast", + "ext" + ], + "ET": [ + "en", + "am", + "om", + "so", + "ti", + "aa" + ], + "FI": [ + "fi", + "en", + "sv", + "de", + "ru", + "et", + "rmf", + "se", + "smn", + "sms" + ], + "FJ": [ + "en", + "hi", + "hif", + "fj", + "rtm" + ], + "FK": [ + "en" + ], + "FM": [ + "en" + ], + "FO": [ + "fo" + ], + "FR": [ + "fr", + "en", + "es", + "de", + "oc", + "it", + "pt", + "pcd", + "gsw", + "br", + "co", + "ca", + "nl", + "eu", + "frp", + "ia" + ], + "GA": [ + "fr" + ], + "GB": [ + "en", + "fr", + "de", + "sco", + "pa-guru", + "cy", + "bn", + "zh-hant", + "zh", + "el", + "it", + "ks", + "gd", + "yi", + "ml", + "ga", + "kw" + ], + "GD": [ + "en" + ], + "GE": [ + "ka", + "xmf", + "ru", + "hy", + "ab", + "os", + "ku-latn" + ], + "GF": [ + "fr", + "zh-hant", + "zh" + ], + "GG": [ + "en" + ], + "GH": [ + "ak", + "en", + "ee", + "gur", + "ha-latn" + ], + "GI": [ + "en", + "es" + ], + "GL": [ + "kl", + "da" + ], + "GM": [ + "en" + ], + "GN": [ + "fr", + "ff", + "nqo" + ], + "GP": [ + "fr" + ], + "GQ": [ + "es", + "fr", + "pt" + ], + "GR": [ + "el", + "en", + "fr", + "de", + "pnt", + "mk", + "tr", + "bg", + "sq", + "tsd" + ], + "GT": [ + "es" + ], + "GU": [ + "en", + "ch" + ], + "GW": [ + "pt" + ], + "GY": [ + "en" + ], + "HK": [ + "zh-hant", + "zh", + "yue", + "en" + ], + "HN": [ + "es", + "en" + ], + "HR": [ + "hr", + "en", + "it" + ], + "HT": [ + "ht", + "fr" + ], + "HU": [ + "hu", + "en", + "de", + "fr", + "ro", + "hr", + "sk", + "sl" + ], + "IC": [ + "es" + ], + "ID": [ + "id", + "jv", + "su", + "ms", + "min", + "bew", + "ban", + "bug", + "bjn", + "ace", + "bbc", + "zh-hant", + "zh", + "sly", + "mwv" + ], + "IE": [ + "en", + "ga", + "fr" + ], + "IL": [ + "he", + "en", + "ar", + "ru", + "ro", + "yi", + "pl", + "lad-latn", + "hu", + "am", + "ti", + "ml" + ], + "IM": [ + "en", + "gv" + ], + "IN": [ + "hi", + "en", + "bn", + "te", + "mr", + "ta", + "ur", + "gu", + "kn", + "ml", + "or", + "pa-guru", + "bho", + "as", + "mai", + "hne", + "ne", + "sat", + "ks", + "gom-deva", + "sd", + "tcy", + "brx", + "mni", + "saz", + "bfq", + "njo", + "bo", + "bpy", + "bft", + "sa", + "dv", + "dz" + ], + "IO": [ + "en" + ], + "IQ": [ + "ar", + "en", + "ckb", + "az-arab", + "az-latn", + "fa", + "lrc" + ], + "IR": [ + "fa", + "az-arab", + "az-latn", + "mzn", + "glk", + "ckb", + "sdh", + "tk", + "lrc", + "ar", + "bqi", + "luz", + "lki", + "bgn", + "hy", + "ps", + "ka", + "gbz", + "kk-arab", + "kk-cyrl" + ], + "IS": [ + "is", + "da" + ], + "IT": [ + "it", + "en", + "fr", + "sc", + "de", + "vec", + "nap", + "lij", + "scn", + "sl", + "sdc", + "fur", + "egl", + "ca", + "el", + "lmo", + "pms", + "hr", + "rgn" + ], + "JE": [ + "en" + ], + "JM": [ + "en", + "jam" + ], + "JO": [ + "ar", + "en" + ], + "JP": [ + "ja", + "ryu", + "ko" + ], + "KE": [ + "sw", + "en", + "ki", + "so", + "pko", + "om", + "ar", + "pa-guru", + "gu" + ], + "KG": [ + "ky", + "ru" + ], + "KH": [ + "km" + ], + "KI": [ + "en" + ], + "KM": [ + "ar", + "fr" + ], + "KN": [ + "en" + ], + "KP": [ + "ko" + ], + "KR": [ + "ko" + ], + "KW": [ + "ar" + ], + "KY": [ + "en" + ], + "KZ": [ + "ru", + "kk-cyrl", + "en", + "de", + "ug-cyrl", + "ug-arab" + ], + "LA": [ + "lo" + ], + "LB": [ + "ar", + "en", + "hy", + "ku-arab", + "ku-latn", + "fr" + ], + "LC": [ + "en" + ], + "LI": [ + "de", + "gsw" + ], + "LK": [ + "si", + "ta", + "en" + ], + "LR": [ + "en" + ], + "LS": [ + "st", + "en", + "zu", + "ss", + "xh" + ], + "LT": [ + "lt", + "ru", + "en", + "de", + "sgs" + ], + "LU": [ + "fr", + "lb", + "de", + "en", + "pt" + ], + "LV": [ + "lv", + "en", + "ru", + "ltg" + ], + "LY": [ + "ar" + ], + "MA": [ + "ary", + "ar", + "zgh", + "fr", + "en", + "tzm", + "shi-latn", + "rif", + "es" + ], + "MC": [ + "fr" + ], + "MD": [ + "ro", + "uk", + "bg", + "gag", + "ru" + ], + "ME": [ + "sr-latn", + "sr-cyrl", + "sq" + ], + "MF": [ + "fr" + ], + "MG": [ + "mg", + "fr", + "en" + ], + "MH": [ + "en", + "mh" + ], + "MK": [ + "mk", + "sq", + "tr" + ], + "ML": [ + "bm", + "fr", + "ses", + "ar" + ], + "MM": [ + "my", + "shn", + "kac", + "mnw" + ], + "MN": [ + "mn", + "kk-arab", + "kk-cyrl", + "zh", + "ru", + "ug-cyrl", + "ug-arab" + ], + "MO": [ + "zh-hant", + "zh", + "pt", + "en" + ], + "MP": [ + "en", + "ch" + ], + "MQ": [ + "fr" + ], + "MR": [ + "ar", + "fr", + "ff", + "wo" + ], + "MS": [ + "en" + ], + "MT": [ + "mt", + "en", + "it", + "fr" + ], + "MU": [ + "mfe", + "en", + "bho", + "ur", + "fr", + "ta" + ], + "MV": [ + "dv" + ], + "MW": [ + "en", + "ny", + "tum", + "zu" + ], + "MX": [ + "es", + "en", + "yua", + "sei" + ], + "MY": [ + "ms", + "en", + "zh-hant", + "zh", + "ta", + "jv", + "dtp", + "ml", + "bug", + "bjn" + ], + "MZ": [ + "pt", + "ts", + "ny", + "sw", + "zu" + ], + "NA": [ + "af", + "kj", + "ng", + "hz", + "en", + "de", + "tn" + ], + "NC": [ + "fr" + ], + "NE": [ + "ha-latn", + "fr", + "ar" + ], + "NF": [ + "en" + ], + "NG": [ + "en", + "ha-latn", + "ig", + "yo", + "ha-arab", + "ar" + ], + "NI": [ + "es" + ], + "NL": [ + "nl", + "en", + "de", + "fr", + "nds", + "li", + "fy", + "id", + "zea", + "rif", + "tr" + ], + "NO": [ + "nb", + "nn", + "se" + ], + "NP": [ + "ne", + "mai", + "bho", + "new", + "en", + "dty", + "hi", + "bo", + "bn" + ], + "NR": [ + "en", + "na" + ], + "NU": [ + "en", + "niu" + ], + "NZ": [ + "en", + "mi" + ], + "OM": [ + "ar", + "fa" + ], + "PA": [ + "es", + "en", + "zh-hant", + "zh" + ], + "PE": [ + "es", + "qu", + "ay" + ], + "PF": [ + "fr", + "ty", + "zh-hant", + "zh" + ], + "PG": [ + "tpi", + "en", + "ho" + ], + "PH": [ + "en", + "tl", + "es", + "ceb", + "ilo", + "hil", + "war", + "pam", + "pag", + "zh-hant", + "zh", + "cps", + "krj", + "bto" + ], + "PK": [ + "ur", + "pa-guru", + "en", + "ps", + "sd", + "brh", + "fa", + "bgn", + "tg", + "bft", + "khw", + "ks" + ], + "PL": [ + "pl", + "en", + "de", + "ru", + "szl", + "be", + "uk", + "csb", + "sli", + "lt" + ], + "PM": [ + "fr", + "en" + ], + "PN": [ + "en" + ], + "PR": [ + "es", + "en" + ], + "PS": [ + "ar" + ], + "PT": [ + "pt", + "en", + "fr", + "es", + "gl" + ], + "PW": [ + "en" + ], + "PY": [ + "gn", + "es", + "de" + ], + "QA": [ + "ar", + "fa", + "ml" + ], + "RE": [ + "fr", + "rcf", + "ta" + ], + "RO": [ + "ro", + "en", + "fr", + "es", + "hu", + "de", + "tr", + "sr-latn", + "sr-cyrl", + "bg", + "el", + "pl" + ], + "RS": [ + "sr-cyrl", + "sr-latn", + "sq", + "hu", + "ro", + "hr", + "sk", + "uk" + ], + "RU": [ + "ru", + "tt", + "ba", + "cv", + "hy", + "ce", + "av", + "udm", + "mhr", + "sah", + "os", + "kbd", + "myv", + "mdf", + "kv", + "lez", + "krc", + "inh", + "tyv", + "az-cyrl", + "az-latn", + "ady", + "krl", + "lbe", + "koi", + "mrj", + "fi", + "sr-latn", + "sr-cyrl", + "vep", + "mn", + "vot", + "cu" + ], + "RW": [ + "rw", + "en", + "fr" + ], + "SA": [ + "ar" + ], + "SB": [ + "en" + ], + "SC": [ + "fr", + "en" + ], + "SD": [ + "ar", + "en", + "ha-arab", + "ha-latn" + ], + "SE": [ + "sv", + "en", + "fi", + "fit", + "se", + "yi", + "smj", + "sma", + "ia" + ], + "SG": [ + "en", + "zh", + "ms", + "ta", + "ml", + "pa-guru" + ], + "SH": [ + "en" + ], + "SI": [ + "sl", + "hr", + "en", + "de", + "hu", + "it" + ], + "SJ": [ + "nb", + "ru" + ], + "SK": [ + "sk", + "cs", + "en", + "de", + "hu", + "uk", + "pl" + ], + "SL": [ + "kri", + "en" + ], + "SM": [ + "it", + "eo" + ], + "SN": [ + "wo", + "fr", + "ff" + ], + "SO": [ + "so", + "ar", + "sw", + "om" + ], + "SR": [ + "nl", + "srn", + "zh-hant", + "zh" + ], + "SS": [ + "ar", + "en" + ], + "ST": [ + "pt" + ], + "SV": [ + "es" + ], + "SX": [ + "en", + "es", + "nl" + ], + "SY": [ + "ar", + "ku-latn", + "fr", + "hy" + ], + "SZ": [ + "en", + "ss", + "zu", + "ts" + ], + "TA": [ + "en" + ], + "TC": [ + "en" + ], + "TD": [ + "fr", + "ar" + ], + "TF": [ + "fr" + ], + "TG": [ + "fr", + "ee" + ], + "TH": [ + "th", + "en", + "zh-hant", + "zh", + "mnw", + "shn" + ], + "TJ": [ + "tg", + "ru", + "fa", + "ar" + ], + "TK": [ + "en" + ], + "TL": [ + "pt", + "tet" + ], + "TM": [ + "tk", + "ru", + "uz", + "ku-latn" + ], + "TN": [ + "aeb-arab", + "ar", + "fr" + ], + "TO": [ + "to", + "en" + ], + "TR": [ + "tr", + "en", + "ku-latn", + "kbd", + "az-latn", + "az-arab", + "ar", + "bg", + "ady", + "kiu", + "hy", + "ka", + "sr-latn", + "sr-cyrl", + "lzz", + "sq", + "ab", + "el", + "tru", + "uz", + "ky", + "kk-cyrl" + ], + "TT": [ + "en", + "es" + ], + "TV": [ + "en" + ], + "TW": [ + "zh-hant", + "zh" + ], + "TZ": [ + "sw", + "en" + ], + "UA": [ + "uk", + "ru", + "pl", + "yi", + "rue", + "be", + "crh", + "ro", + "bg", + "tr", + "hu", + "el" + ], + "UG": [ + "sw", + "lg", + "en", + "rw", + "hi" + ], + "UM": [ + "en" + ], + "US": [ + "en", + "es", + "zh-hant", + "zh", + "fr", + "de", + "tl", + "it", + "vi", + "ko", + "ru", + "nv", + "yi", + "pdc", + "haw", + "frc", + "chr", + "esu", + "cho", + "lkt", + "ik", + "mus" + ], + "UY": [ + "es" + ], + "UZ": [ + "uz", + "ru", + "kaa", + "tr" + ], + "VA": [ + "it", + "la" + ], + "VC": [ + "en" + ], + "VE": [ + "es" + ], + "VG": [ + "en" + ], + "VI": [ + "en" + ], + "VN": [ + "vi", + "zh-hant", + "zh" + ], + "VU": [ + "bi", + "en", + "fr" + ], + "WF": [ + "wls", + "fr" + ], + "WS": [ + "sm", + "en" + ], + "XK": [ + "sq", + "aln", + "sr-cyrl", + "sr-latn" + ], + "YE": [ + "ar", + "en" + ], + "YT": [ + "swb", + "fr", + "sw" + ], + "ZA": [ + "en", + "zu", + "xh", + "af", + "nso", + "tn", + "st", + "ts", + "ss", + "ve", + "hi", + "sw" + ], + "ZM": [ + "en", + "ny", + "loz" + ], + "ZW": [ + "sn", + "en", + "ny", + "ve", + "tn" + ] + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..59d618a --- /dev/null +++ b/package.json @@ -0,0 +1,41 @@ +{ + "name": "language-data", + "version": "0.1.0", + "description": "Language data and utilities", + "homepage": "https://github.com/wikimedia/language-data.uls", + "keywords": [ + "cldr", + "internationalization", + "localization", + "l10n" + ], + "author": { + "name": "Santhosh Thottingal", + "email": "santhosh.thottingal@gmail.com" + }, + "contributors": [ + "Amir Aharoni", + "Niklas Laxström", + "Santhosh Thottingal" + ], + "devDependencies": { + "assert": "1.4.1", + "eslint": "2.9.0", + "eslint-config-wikimedia": "0.3.0", + "mocha": "^2.5.3" + }, + "repository": { + "type": "git", + "url": "git://github.com/wikimedia/language-data.git" + }, + "bugs": { + "url" : "http://github.com/wikimedia/language-data/issues" + }, + "engine": { + "node": ">=0.10.x" + }, + "license": "(MIT OR GPL-2.0+)", + "scripts": { + "test": "mocha" + } +} diff --git a/test/index.js b/test/index.js new file mode 100644 index 0000000..7b4525b --- /dev/null +++ b/test/index.js @@ -0,0 +1,137 @@ +var languageData = require( '../index' ), + assert = require( 'assert' ); + +describe( 'languagedata', function () { + var orphanScripts, badRedirects, doubleRedirects, languagesWithoutAutonym; + /* + * Runs over all script codes mentioned in langdb and checks whether + * they belong to the 'Other' group. + */ + orphanScripts = function () { + var language, script, + result = []; + for ( language in languageData.languages ) { + script = languageData.getScript( language ); + if ( languageData.getGroupOfScript( script ) === 'Other' ) { + result.push( script ); + } + } + return result; + }; + /* + * Runs over all languages and checks that all redirects have a valid target. + */ + badRedirects = function () { + var language, target, + result = []; + for ( language in languageData.languages ) { + target = languageData.isRedirect( language ); + if ( target && !languageData.languages[ target ] ) { + result.push( language ); + } + } + return result; + }; + /* + * Runs over all languages and checks that all redirects point to a language. + * There's no reason to have double redirects. + */ + doubleRedirects = function () { + var language, target, + result = []; + for ( language in languageData.languages ) { + target = languageData.isRedirect( language ); + if ( target && languageData.isRedirect( target ) ) { + result.push( language ); + } + } + return result; + }; + /* + * Runs over all script codes mentioned in langdb and checks whether + * they have something that looks like an autonym. + */ + languagesWithoutAutonym = function () { + var language, + result = []; + for ( language in languageData.languages ) { + if ( typeof languageData.getAutonym( language ) !== 'string' ) { + result.push( language ); + } + } + return result; + }; + it( 'autonyms', function () { + var autonyms; + // Add a language in run time. + // This is done early to make sure that it doesn't break other functions. + languageData.addLanguage( 'qqq', { + script: 'Latn', + regions: [ 'SP' ], + autonym: 'Language documentation' + } ); + assert.ok( languageData.getAutonym( 'qqq' ), 'Language documentation', 'Language qqq was added with the correct autonym' ); + autonyms = languageData.getAutonyms(); + assert.strictEqual( autonyms[ 'zu' ], 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' ); + assert.strictEqual( autonyms[ 'pa' ], undefined, 'Language "pa" is not listed in autonyms, because it is a redirect' ); + assert.strictEqual( autonyms[ 'pa-guru' ], 'ਪੰਜਾਬੀ', 'Language "pa-guru" has the correct autonym' ); + assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' ); + assert.strictEqual( languageData.getAutonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa.' ); + assert.strictEqual( languageData.getAutonym( 'pa-guru' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa-guru.' ); + // autonyms: gn: avañe'ẽ, de: deutsch, hu: magyar, fi: suomi + assert.deepEqual( [ 'de', 'fi', 'gn', 'hu' ].sort( languageData.sortByAutonym ), [ + 'gn', 'de', 'hu', 'fi' + ], 'Languages are correctly sorted by autonym' ); + } ); + it( 'regions and groups', function () { + var languagesToGroup, groupedLanguages; + // This test assumes that we don't want any scripts to be in the 'Other' + // group. Actually, this may become wrong some day. + assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' ); + languagesToGroup = { + en: 'English', + 'fiu-vro': 'Võro', // Alias before target + ru: 'русский', + sr: 'српски', // Alias before target + 'sr-cyrl': 'српски', // Target before alias + 'sr-latn': 'srpski', // Target before alias + 'sr-el': 'srpski', // Alias after target + vro: 'Võro' // Target after alias + }; + groupedLanguages = { + Latin: [ 'en', 'fiu-vro', 'sr-latn', 'sr-el', 'vro' ], + Cyrillic: [ 'ru', 'sr', 'sr-cyrl' ] + }; + assert.deepEqual( languageData.getLanguagesByScriptGroup( languagesToGroup ), groupedLanguages, 'A custom list of languages is grouped correctly using getLanguagesByScriptGroup.' ); + assert.deepEqual( languageData.getRegions( 'lzz' ), [ + 'EU', 'ME' + ], 'Correct regions of the Laz language were selected' ); + assert.strictEqual( languageData.getRegions( 'no-such-language' ), 'UNKNOWN', 'The region of an invalid language is "UNKNOWN"' ); + assert.ok( languageData.getLanguagesInTerritory( 'RU' ).includes( 'sah' ), 'Sakha language is spoken in Russia' ); + } ); + it( 'scripts', function () { + // This test assumes that we don't want any scripts to be in the 'Other' + // group. Actually, this may become wrong some day. + assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' ); + assert.deepEqual( languageData.getLanguagesInScript( 'Guru' ), [ 'pa-guru' ], '"pa-guru" is written in script Guru, and "pa" is skipped as a redirect' ); + assert.deepEqual( languageData.getLanguagesInScripts( [ 'Geor', 'Armn' ] ), [ 'hy', 'ka', 'xmf' ], 'languages in scripts Geor and Armn are selected correctly' ); + assert.deepEqual( languageData.getLanguagesInScript( 'Knda' ), [ + 'kn', 'tcy' + ], 'languages in script Knda are selected correctly' ); + assert.strictEqual( languageData.getGroupOfScript( 'Beng' ), 'SouthAsian', 'Bengali script belongs to the SouthAsian group.' ); + assert.strictEqual( languageData.getScriptGroupOfLanguage( 'iu' ), 'NativeAmerican', 'The script of the Inupiaq language belongs to the NativeAmerican group.' ); + } ); + it( 'redirects', function () { + assert.strictEqual( languageData.isRedirect( 'sr-ec' ), 'sr-cyrl', '"sr-ec" is a redirect to "sr-cyrl"' ); + assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' ); + assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' ); + assert.strictEqual( languageData.getScript( 'no-such-language' ), 'Zyyy', 'A script for an unknown language is Zyyy - undetermined' ); + assert.strictEqual( languageData.getScript( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' ); + } ); + it( 'directionality', function () { + assert.strictEqual( languageData.isRtl( 'te' ), false, 'Telugu language is not RTL' ); + assert.strictEqual( languageData.isRtl( 'dv' ), true, 'Divehi language is RTL' ); + assert.strictEqual( languageData.getDir( 'mzn' ), 'rtl', 'Mazandarani language is RTL' ); + assert.strictEqual( languageData.getDir( 'uk' ), 'ltr', 'Ukrainian language is LTR' ); + } ); +} );