模組:Language/data
可在模組:Language/data/doc建立此模組的說明文件
local U = mw.ustring.char
-- diacritics
local grave = U(0x300)
local acute = U(0x301)
local double_acute = U(0x30B)
local tilde = U(0x303)
local macron = U(0x304)
local dgrave = U(0x30F)
local invbreve = U(0x311)
--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
local data = {
["ang"] = {
["name"] = "Old English",
["article"] = {"Old English"},
["scripts"] = {"Latn"},
-- Remove macrons, acutes, and overdots
["replacements"] = {
["[ĀÁ]"] = "A",
["[āá]"] = "a",
["[ǢǼ]"] = "Æ",
["[ǣǽ]"] = "æ",
["Ċ"] = "C",
["ċ"] = "c",
["[ĒÉ]"] = "E",
["[ēé]"] = "e",
["Ġ"] = "G",
["ġ"] = "g",
["[ĪÍ]"] = "I",
["[īí]"] = "i",
["[ŌÓ]"] = "O",
["[ōó]"] = "o",
["[ŪÚ]"] = "U",
["[ūú]"] = "u",
["[ȲÝ]"] = "Y",
["[ȳý]"] = "y",
},
},
["ar"] = {
["name"] = "Arabic",
["article"] = "Arabic language",
["scripts"] = { "Arab" },
--[[ ālif with wasla is replaced by ālif;
taṭwīl, fatḥatan, ḍammatan, kasratan,
fatḥa, ḍamma, kasra,
shadda, sukūn, and superscript (dagger) ālif are removed. ]]
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
[U(0x0671)] = U(0x0627),
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["be"] = {
["article"] = "Belarusian language",
["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bengali language",
["scripts"] = { "Beng" },
},
["cu"] = {
["name"] = "Old Church Slavonic",
["article"] = "Old Church Slavonic",
["scripts"] = { "Cyrs" },
},
["de"] = {
["name"] = "German",
["article"] = "German language",
["scripts"] = { "Latn" },
--[[
["replacements"] = {
["ae"] = "ä",
["oe"] = "ö",
["ue"] = "ü",
["A[Ee]"] = "Ä",
["O[Ee]"] = "Ö",
["U[Ee]"] = "Ü",
},
]]
},
["en"] = {
["name"] = "English",
["article"] = "English language",
["scripts"] = { "Latn" },
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
["scripts"] = { "Latn" },
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
["scripts"] = { "Latn" },
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
["scripts"] = { "Latn" },
},
["gem-pro"] = {
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
["scripts"] = { "Grek" },
["replacements"] = {
-- Vowels with macrons or breves are replaced with plain letters.
["[ᾱᾰ]"] = "α",
["[ᾹᾸ]"] = "Α",
["[ῑῐ]"] = "ι",
["[ῙῘ]"] = "Ι",
["[ῡῠ]"] = "υ",
["[ῩῨ]"] = "Υ",
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϱ"] = "ρ",
["ϲ"] = "σ",
["ϕ"] = "φ",
},
},
["grk-pro"] = {
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["hi"] = {
["name"] = "Hindi",
["article"] = "Hindi",
["scripts"] = { "Deva" },
},
["ine-pro"] = {
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
["scripts"] = { "Jpan" },
},
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
["scripts"] = { "Latn" },
["replacements"] = {
-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
["[ĀĂ]"] = "A",
["[āă]"] = "a",
["[ĒĔ]"] = "E",
["[ēĕë]"] = "e",
["[ĪĬÏ]"] = "I",
["[īĭï]"] = "i",
["[ŌŎ]"] = "O",
["[ōŏ]"] = "o",
["[ŪŬÜ]"] = "U",
["[ūŭü]"] = "u",
["Ȳ"] = "Y",
["ȳ"] = "y"
},
},
["mul"] = {
["name"] = "Translingual",
["article"] = "",
["script"] = { "" },
},
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Old East Slavic",
["script"] = { "Cyrs" },
["replacements"] = {
[U(0x484)] = "",
},
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
["scripts"] = { "Guru", "Arab", }
},
["ru"] = {
["name"] = "Russian",
["article"] = "Russian language",
["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["se"] = {
["replacements"] = {
["([đflmnŋrsšŧv])'%1"] = "%1%1",
},
},
["sh"] = {
["article"] = "Serbo-Croatian language",
["scripts"] = { "Latn", "Cyrl" },
["replacements"] = {
["[ȀÀȂÁĀÃ]"] = "A",
["[ȁàȃáāã]"] = "a",
["[ȄÈȆÉĒẼ]"] = "E",
["[ȅèȇéēẽ]"] = "e",
["[ȈÌȊÍĪĨ]"] = "I",
["[ȉìȋíīĩ]"] = "i",
["[ȌÒȎÓŌÕ]"] = "O",
["[ȍòȏóōõ]"] = "o",
["[ȐȒŔ]"] = "R",
["[ȑȓŕ]"] = "r",
["[ȔÙȖÚŪŨ]"] = "U",
["[ȕùȗúūũ]"] = "u",
["Ѐ"] = "Е",
["ѐ"] = "е",
["[ӢЍ]"] = "И",
["[ӣѝ]"] = "и",
["[Ӯ]"] = "У",
["[ӯ]"] = "у"
},
},
["sla-pro"] = {
["name"] = "Proto-Slavic", -- also Common Slavic
["type"] = "reconstructed",
["scripts"] = { "Latn" },
["replacements"] = {
["[ÀÁÃĀȀȂ]"] = "A",
["[àáãāȁȃ]"] = "a",
["[ÈÉẼĒȄȆ]"] = "E",
["[èéẽēȅȇ]"] = "e",
["[ÌÍĨĪȈȊ]"] = "I",
["[ìíĩīȉȋ]"] = "i",
["[ÒÓÕŌȌȎŐ]"] = "O",
["[òóõōȍȏő]"] = "o",
["[ÙÚŨŪȔȖŰ]"] = "U",
["[ùúũūȕȗű]"] = "u",
["[ỲÝỸȲ]"] = "Y",
["[ỳýỹȳ]"] = "y",
["Ǭ"] = "Ǫ",
["ǭ"] = "ǫ",
["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
},
},
["uk"] = {
["article"] = "Ukrainian language",
["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
["scripts"] = { "Arab" },
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
["scripts"] = { "Hani" },
},
["xcl"] = {
["name"] = "Old Armenian",
["article"] = "Classical Armenian",
["script"] = { "Armn" },
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
}
--[[
[""] = {
["name"] = "",
["article"] = "",
["script"] = { "" },
},
[""] = {
["name"] = "",
["article"] = "",
["script"] = { "" },
["replacements"] = {
},
},
]]
return data