Module:Wp/nod/Translit data
Appearance
Documentation for this module may be created at Module:Wp/nod/Translit data/doc
-- Data to support transliteration of Northern Thai from the Lanna script
local data={}
-- Letter class.
-- H = high (both Lanna & Siamese)
-- M = middle (both Lanna & Siamese)
-- L = low
-- R = high in Northern Thai, but letter is middle in Siamese and Lao
-- F = low in Northern Thai, but sound is written with middle letter in Siamese
data.class = {
["ᨠ"]= 'R', ["ᨡ"]= 'H', ["ᨢ"]= 'H', ["ᨣ"]= 'F', ["ᨤ"]= 'L', ["ᨥ"]= 'L', ["ᨦ"]= 'L',
["ᨧ"]= 'R', ["ᨨ"]= 'H', ["ᨩ"]= 'F', ["ᨪ"]= 'L', ["ᨫ"]= 'L', ["ᨬ"]= 'L',
["ᨭ"]= 'R', ["ᨮ"]= 'H', ["ᨯ"]= 'M', ["ᨰ"]= 'L', ["ᨱ"]= 'L',
["ᨲ"]= 'R', ["ᨳ"]= 'H', ["ᨴ"]= 'F', ["ᨵ"]= 'L', ["ᨶ"]= 'L',
["ᨷ"]= 'M',
["ᨸ"]= 'R', ["ᨹ"]= 'H', ["ᨺ"]= 'H', ["ᨻ"]= 'F', ["ᨼ"]= 'L', ["ᨽ"]= 'L', ["ᨾ"]= 'L',
["ᨿ"]= 'L', ["ᩀ"]= 'M', ["ᩁ"]= 'L', ["ᩂ"]= 'L', ["ᩃ"]= 'L', ["ᩄ"]= 'L', ["ᩅ"]= 'L',
["ᩆ"]= 'H', ["ᩇ"]= 'H', ["ᩈ"]= 'H', ["ᩉ"]= 'H', ["ᩊ"]= 'L', ["ᩋ"]= 'M', ["ᩌ"]= 'L',
["ᩓ"]= 'L', ["ᩔ"]= 'H',
["ᩍ"]= 'M', ["ᩎ"]= 'M', ["ᩏ"]= 'M', ["ᩐ"]= 'M', ["ᩑ"]= 'M', ["ᩒ"]= 'M'
--]--
}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
data.disruptor = u(0xffffe)
local function sc(s) return gsub(s, "[ᨠᨣก]", ""); end; -- Remove mark bearers, which are added for readability.
-- Basic transliteration. Keeps SAKOT as that is needed for vowel rearrangement.
data.tt2 = {
-- consonants
["ᨠ"] = "ก", ["ᨡ"] = "ข", ["ᨢ"] = "ฃ", ["ᨣ"] = "ค", ["ᨤ"] = "ฅ", ["ᨥ"] = "ฆ", ["ᨦ"] = "ง",
["ᨧ"] = "จ", ["ᨨ"] = "ฉ", ["ᨩ"] = "ช", ["ᨪ"] = "ซ", ["ᨫ"] = "ฌ", ["ᨬ"] = "ญ",
["ᨭ"] = "ฏ", ["ᨮ"] = "ฐ", ["ᨯ"] = "ด", ["ᨰ"] = "ฒ", ["ᨱ"] = "ณ",
["ᨲ"] = "ต", ["ᨳ"] = "ถ", ["ᨴ"] = "ท", ["ᨵ"] = "ธ", ["ᨶ"] = "น",
["ᨷ"] = "บ", ["ᨸ"] = "ป", ["ᨹ"] = "ผ", ["ᨺ"] = "ฝ", ["ᨻ"] = "พ", ["ᨼ"] = "ฟ", ["ᨽ"] = "ภ", ["ᨾ"] = "ม",
["ᨿ"] = "ย",
-- ["ᩀ"] = "ย̱", -- Renders badly with a vowel below
-- ["ᩀ"] = "อ"..u(0x200D).."ย", -- Needs a special font
["ᩀ"] = "อ"..u(0x200D)..u(0x2060).."ย", -- Needs a special font
["ᩁ"] = "ร", ["ᩂ"] = "ฤ", ["ᩃ"] = "ล", ["ᩄ"] = "ฦ", ["ᩅ"] = "ว",
["ᩆ"] = "ศ", ["ᩇ"] = "ษ", ["ᩈ"] = "ส", ["ᩉ"] = "ห", ["ᩊ"] = "ฬ", ["ᩋ"] = "อ", ["ᩌ"] = "ฮ",
-- independent vowels
["ᩍ"] = "อิ", ["ᩎ"] = "อี", ["ᩏ"] = "อุ", ["ᩐ"] = "อู", ["ᩑ"] = "อ↶เ", ["ᩒ"] = "อ↶โ",
-- medials and miscellaneous
["ᩓ"] = "ล↶แ", ["ᩔ"] = "สส", ["ᩕ"] = sc("ᨠ᩠ร"), ["ᩖ"] = sc("ᨠ᩠ล"), ["ᩗ"] = "งล", ["ᩘ"] = "ง",
["᪢"] = "สวัรค์",
-- dependent vowels and diacritics
-- ["᩠"] = "", -- defer - needed for rearrangement
["ᩡ"] = "ะ", ["ᩢ"] = "ั", ["ᩣ"] = "า", ["ᩤ"] = "า", -- ignore bindu
["ᩥ"] = "ิ", ["ᩦ"] = "ี", ["ᩧ"] = "ึ", ["ᩨ"] = "ื", ["ᩩ"] = "ุ", ["ᩪ"] = "ู",
["ᩫ"] = "", ["ᩬ"] = "อ", ["ᩭ"] = "อย",
["ᩮ"] = "↶เ", ["ᩯ"] = "↶แ", ["ᩰ"] = "↶โ", ["ᩱ"] = "↶ไ", ["ᩲ"] = "↶ใ",
["ᩳ"] = "↶เา", ["ᩴ"] = "ํ", ["᩵"] = "่", ["᩶"] = "้",
["᩺"] = "์", ["᩻"] = " ๆ ", ["᩼"] = "์", ["᩿"] = "ฺ",
-- numerals
["᪀"] = "0", ["᪁"] = "1", ["᪂"] = "2", ["᪃"] = "3", ["᪄"] = "4",
["᪅"] = "5", ["᪆"] = "6", ["᪇"] = "7", ["᪈"] = "8", ["᪉"] = "9",
["᪐"] = "๐", ["᪑"] = "๑", ["᪒"] = "๒", ["᪓"] = "๓", ["᪔"] = "๔",
["᪕"] = "๕", ["᪖"] = "๖", ["᪗"] = "๗", ["᪘"] = "๘", ["᪙"] = "๙",
-- punctuation marks
["ᪧ"] = "ๆ", ["᪨"] = "ฯ", ["᪩"] = "๚", ["᪪"] = "ฯ", ["᪫"] = "๚", ["᪬"] = "๛",
-- zero-width space (display it if it hides in a word)
-- [u(0x200B)] = "‼", -- Not appropriate for text!
[data.disruptor] = "",
}
data.tt3 = {}
for t,v in pairs(data.tt2) do
data.tt3[t] = v
end
-- Overrides for difference between Siamese and Northern Thai
data.tt3["ᨣ"] = "ก"
data.tt3["ᨤ"] = "ค"
data.tt3["ᨩ"] = "จ"
data.tt3["ᨴ"] = "ต"
data.tt3["ᨻ"] = "ป"
data.oddname = { -- For Unicode codepoint names
["ᩍ"] = "อักขระอิ", ["ᩎ"] = "อักชระอี", ["ᩏ"]= "อักขระอุ", ["ᩐ"] = "อักขระอู",
["ᩑ"] = "อักขระเอ", ["ᩒ"] = "อักขระโอ", ["ᩓ"] = "แล", ["ᩔ"] = "สะสองห้อง",
}
-- Transliterations of 'hard' words. It will generally be better if they are
-- expressed as transliterations of alternative forms. First form is for
-- transliteration, second form is for 'transcription' where Thai reading rules
-- are (generally) used.
local ko -- 3 Ways to do it!
-- ko = "ก็"
ko = "ค็"
-- ko = "คํ่"
data.hard = {
["ᨡᩮᩬᩢ᩶ᩣ᩠ᨦ"] = {"ᨡᩮᩢ᩶ᩣᨡᩬᨦ"},
["ᨡᩬ᩶᩻ᨦ"] = {"ᨡᩮᩢ᩶ᩣᨩᩬᨦ"},
["ᨡ᩠ᨾ᩻ᩮᩁ"] = {"เขมร"},
["ᨡ᩠ᨾᩮᩁ"] = {"เขมร"},
["ᨣ᩠ᨳᩤ"] = {"ᨣᩤᨳᩣ"},
["ᨣᩴ"] = {ko, "ก็"},
["ᨣᩴ᩵"] = {ko, "ก็"},
["ᨣᩴ᩠ᨾᩣ"] = {ko.."มา"},
["ᨣ᩠ᨾᩦ"] = {ko.."มี"},
["ᨣᩴ᩠ᩅ᩵ᩣ"] = {ko.."ว่า"},
["ᨣᩴᩝ᩵"] = {ko.."บ่"},
["ᨣᩝᩴ᩵"] = {ko.."บ่"},
["ᨣᩬᩁ"] = {"คอน", "กอน"},
["ᨧᩪᩦ᩶"] = {"ᨧᩮᩢ᩶ᩣ"},
["ᨧᩢ"] = {"ᨧᩢ᩠ᨠ"},
["ᨧᩢ᩠ᨾᩣ"] = {"ᨧᩢ᩠ᨠᨾᩣ"},
["ᨧᩢ᩠ᨾᩦ"] = {"ᨧᩢ᩠ᨠᨾᩦ"},
["ᨧᩢ᩠ᩅ᩵ᩣ"] = {"ᨧᩢ᩠ᨠᩅ᩵ᩤ"},
["ᨧᩥ᩠᩵ᨦᩢ"] = {"ᨧᩥ᩠᩵ᨦᨧᩢ᩠ᨠ"},
["ᨧᩩ᩵͏ᩢᨦ"] = {"ᨧᩩ᩵ᨦᨧᩢ᩠ᨠ"},
["ᨧᩩᩢ᩵ᨦ"] = {"ᨧᩩ᩵ᨦᨧᩢ᩠ᨠ"},
["ᨧᩮᩢ᩠᩶ᨡᩣ"] = {"ᨧᩮᩢ᩶ᩣᨡ᩶ᩣ"},
["ᨧᩮᩢ᩠ᨡ᩶ᩣ"] = {"ᨧᩮᩢ᩶ᩣᨡ᩶ᩣ"},
["ᨧ"] = {"ᨧᩡ"},
["ᨧᩁᩂ"] = {"จรือ"},
["ᨧᩣᩁᩂ"] = {"ᨧᩣᩁᩨ"},
["ᨩᩨ᩠᩵ᨶ᩠ᨾ"] = {"ᨩᩨ᩠᩵ᨶᨩᩫ᩠ᨾ"},
["ᨩᩨ᩠ᩅ᩵ᩣ"] = {"ᨩᩨ᩵ᩅ᩵ᩤ"},
["ᨩᩨ᩠᩵ᩅᩣ"] = {"ᨩᩨ᩵ᩅ᩵ᩤ"},
["ᨯ᩠ᨦ᩠ᨶᩦ᩶"] = {"ดั่งนี้"},
["ᨯ᩠ᩃᩦ"] = {"ᨯᩦᩉᩖᩦ"},
["ᨯᩦ᩠ᩃ"] = {"ᨯᩦᩉᩖᩦ"},
["ᨲᩬᩁ"] = {"ตอน", "ต๋อน"},
["ᨲᩦᩣ᩠ᨿ"] = {"ᨲᩦᨲᩣ᩠ᨿ"},
["ᨲᩮᩪᩦ᩵ᩣ᩠ᨿ"] = {"ᨲᩪᨲᩦᨲᩮᩢ᩵ᩣᨲᩣ᩠ᨿ"},
["ᨲᩯ᩠ᨶᩬ᩵"] = {"ᨲᩯ᩠ᨶᨲᩬᩴ᩵"},
["ᨲᩬ᩵ᩯ᩠ᨶ"] = {"ᨲᩬᩴ᩵ᨲᩯ᩠ᨶ"},
["ᨲᩬ᩵͏ᩯ᩠ᨶ"] = {"ᨲᩬᩴ᩵ᨲᩯ᩠ᨶ"},
["ᨲᩦ᩠ᨿᩁ"] = {"ᨲᩦᨲ᩠ᨿᨶ"},
["ᨳᩪᩢᨲᩬᨦ"] = {"ᨳᩪᨠᨲᩬ᩶ᨦ"},
["ᨴᩘ᩠ᩃᩣ᩠ᨿ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
["ᨴᩢ᩠ᩃᩣ᩠ᨿ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
["ᨴᩢ᩠ᩃᩣ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
["ᨴᩢᩗᩣ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
["ᨴᩯ᩠᩶ᩃ"] = {"ᨴᩯ᩶ᩃᩯ"},
["ᨴ᩠᩶ᩃᩯ"] = {"ᨴᩯ᩶ᩃᩯ"},
["ᨴᩱ᩠ᨿ"] = {"ไทย", "ไทย"},
["ᨷᩴ"] = {"บ่"},
["ᨷᩴ᩵"] = {"บ่"},
["ᨷ᩠ᨾᩦ"] = {"ᨷ᩵ᨾᩦ"},
["ᨷ᩠᩵ᨾᩦ"] = {"ᨷ᩵ᨾᩦ"},
["ᨷ᩠ᨯᩦ"] = {"ᨷ᩵ᨯᩦ"},
["ᨷᩕᩮᩢᩣᩅ᩺ᨪᩮᩬᩥᩁ᩺"] = {"เบราว์เซอร์"},
["ᨷᨠᨲᩥ"] = {"ᨸᨠᨲᩥ"},
["ᨷᨠ᩠ᨠᨲᩥ"] = {"ᨸᨠ᩠ᨠᨲᩥ"},
["ᨷᨠ᩠ᨠᨲ᩠ᨲᩥ"] = {"ᨸᨠ᩠ᨠᨲ᩠ᨲᩥ"},
["ᨷᨧ᩠ᨧᩩᨷ᩠ᨷᨶ᩠ᨶ"] = {"ปัจจุบบันนะ", "ปัจจุบันนะ"},
["ᨷᨧ᩠ᨧᩩᨷ᩠ᨷᨶ᩠ᨶ᩺"] = {"ปัจจุบบันน์", "ปัจจุบัน"},
["ᨷᨧ᩠ᨧᩩᨷᨶ᩠ᨶ"] = {"ปัจจุบันนะ"},
["ᨷᨧ᩠ᨧᩩᨷᨶ᩠ᨶ᩺"] = {"ปัจจุบันน์", "ปัจจุบัน"},
["ᨷᨬ᩠ᩉᩣ"] = {"ᨸᨬ᩠ᩉᩣ"},
["ᨷᨭᩥᨷᩢ᩠ᨲ"] = {"ᨸᨭᩥᨷᩢ᩠ᨲ"},
["ᨷᨭᩥᨷᨲ᩠ᨲᩥ"] = {"ᨸᨭᩥᨷᨲ᩠ᨲᩥ"},
["ᨷᩅᨲ᩠ᨲᩥ"] = {"ᨸᩅᨲ᩠ᨲᩥ"},
["ᨻᩱᩣ᩠ᨿᩉ᩠ᨶ᩶ᩣ"] = {"ᨻᩱᨻᩣ᩠ᨿᩉ᩠ᨶ᩶ᩣ"},
["ᨽᩣ᩠ᨷ"] = {"ᨽᩣ᩠ᨻ"},
["ᨽᩣᩈᩣ"] = {"ภาสา", "ภาษา"},
["ᨾᩢ᩠ᩅ᩵ᩣ"] = {"มักว่า"},
["ᨾᩦᩣ᩠ᨠ"] = {"มีมาก"},
["ᨾᩯ᩠ᨶ᩠ᩅ᩵ᩣ"] = {"แม่นว่า"},
["ᨾᩯ᩠᩵ᨶ᩠ᩅᩣ"] = {"แม่นว่า"},
["ᨾᩯ᩵ᩣ᩠ᨶ"] = {"ᨾᩯ᩵ᨾᩣ᩠ᨶ"},
["ᨿᩨ᩠ᨶᩣ᩠ᩅ"] = {"ᨿᩨ᩠ᨶᨿᩣ᩠ᩅ"},
["ᨿᩥ᩠ᨶᩣ᩠ᨠᨶᩢ᩠ᨠ"] = {"ᨿᩥ᩠ᨶᨿᩣ᩠ᨠᨶᩢ᩠ᨠ"},
["ᩁᩢ"] = {"ᩁᩢ᩠ᨠ"},
["ᩁᩦ"] = {"อันว่า"},
["ᩁᩂ"] = {"ᩁᩨ"},
["ᩁᩂᨡᩣ"] = {"ᩁᩨᨩᩣ"},
["ᩁᩂᨪᩣ"] = {"ᩁᩨᨪᩣ"},
["ᩁᩡᩌᩰᩫᨦ"] = {"ระโฮง"},
["ᩁᩪᨷ"] = {"ᩁᩪᨸ", "ฮูป"},
["ᩁᩪ᩠ᨷ"] = {"ᩁᩪᨸ", "ฮูป"},
["ᩁᩬᨦ"] = {"ᩌᩬᨦ"},
["ᩁᩬ᩶ᩁ"] = {"ᩁᩬ᩶ᩁ", "ฮ้อน"},
["ᩁᩢ᩠ᨷ"] = {"ᩁᩢ᩠ᨷ", "ᩌᩢᨷ"},
["ᩁᩰᩫ᩠ᨦ"] = {"ᩁᩰᩫ᩠ᨦ", "โฮง"},
["ᩃᩣᩴ"] = {"ลำ"},
["ᩃᩣᩴᨯᩢ᩠ᨷ"] = {"ลำᨯᩢ᩠ᨷ"},
["ᩃᩣᩴᨯᩢ᩠ᨷᨲ᩠ᩅᩫᩀᩪᨶᩥᨣᩰᩫ᩠ᨯ"] = {"ลำᨯᩢ᩠ᨷᨲ᩠ᩅᩫᩀᩪᨶᩥᨣᩰᩫ᩠ᨯ"}, -- No word breaks in this title!
["ᩓ᩠ᩅ"] = {"ᩓ᩠᩶ᩅ"},
["ᩓᩯ"] = {"ᩓ"},
["ᩅᩢ᩠ᨯᩤ"] = {"ᩅᩢ᩠ᨯᩅᩤ"},
["ᩈᩈᩈ"] = {"ᩈᩣ᩠ᨾ"},
["᪓᩠ᨴ"] = {"᪓ ᨴᩦ"},
["ᩈ᩠ᩅ᩠᩵ᨶᩣ"] = {"ᩈ᩠ᩅ᩵ᨶᩅ᩵ᩤ"},
["ᩈ᩠ᩅ᩠ᨶ᩵ᩣ"] = {"ᩈ᩠ᩅ᩵ᨶᩅ᩵ᩤ"},
["ᩈᩫ᩠ᨦ᩻ᩣ᩠ᩁ"] = {"ᩈᩫ᩠ᨦᩈᩣ᩠ᩁ"},
["ᩈᩫ᩠ᨦᩣ᩠ᩁ"] = {"ᩈᩫ᩠ᨦᩈᩣ᩠ᩁ"},
["ᩉᩨ᩶ᩣ᩠ᨿ"] = {"ᩉᩨ᩶ᩉᩣ᩠ᨿ"},
["ᩉᩨ᩵ᩁᨩᩓ"] = {"ᨾᩦᩉᩢ᩠᩶ᨶᨩᩡᩓ"},
["ᩌᩰᩫᨦ"] = {"โฮง"},
["ᩐᩣ"] = {"ᩋᩮᩢᩣ"},
["ᩐ᩵ᩣ"] = {"ᩋᩮᩢ᩵ᩣ"},
}
return data