territoryInfo->territory as $territoryRecord ) { $territoryAtributes = $territoryRecord->attributes(); $territoryCodeAttr = $territoryAtributes['type']; $territoryCode = (string) $territoryCodeAttr[0]; $parsedLangdb['territories'][$territoryCode] = array(); foreach ( $territoryRecord->languagePopulation as $languageRecord ) { $languageAttributes = $languageRecord->attributes(); $languageCodeAttr = $languageAttributes['type']; // Lower case is a convention for language codes in ULS. // '_' is used in CLDR for compound codes and it's replaced with '-' here. $normalisedCode = strtr( strtolower( (string) $languageCodeAttr[0] ), '_', '-' ); $parsedLangdb['territories'][$territoryCode][] = $normalisedCode; // In case of codes with variants, also add the base because ULS might consider // them as separate languages, e.g. zh, zh-hant and zh-hans. if ( strpos( $normalisedCode, '-' ) !== false ) { $parts = explode( '-', $normalisedCode ); $parsedLangdb['territories'][$territoryCode][] = $parts[0]; } } } foreach ( $parsedLangdb['territories'] as $territoryCode => $languages ) { foreach ( $languages as $index => $language ) { if ( !isset( $parsedLangdb['languages'][$language] ) ) { echo "Unknown language $language for territory $territoryCode\n"; unset( $parsedLangdb['territories'][$territoryCode][$index] ); continue; } $data = $parsedLangdb['languages'][$language]; if ( count( $data ) === 1 ) { echo "Redirect for language $language to {$data[0]} territory $territoryCode\n"; $parsedLangdb['territories'][$territoryCode][$index] = $data[0]; continue; } } // Clean-up to save space if ( count( $parsedLangdb['territories'][$territoryCode] ) === 0 ) { unset( $parsedLangdb['territories'][$territoryCode] ); continue; } // Remove duplicates we might have created $parsedLangdb['territories'][$territoryCode] = array_unique( $parsedLangdb['territories'][$territoryCode] ); // We need to renumber or json conversion thinks these are objects $parsedLangdb['territories'][$territoryCode] = array_values( $parsedLangdb['territories'][$territoryCode] ); } print "Writing JSON langdb...\n"; $jsonVerbose = json_encode( $parsedLangdb, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE ); // For making diff review easier. file_put_contents( '../language-data.json', $jsonVerbose ); print "Done.\n";