Module:Wp/nod/Translit data

From Wikimedia Incubator

Documentation for this module may be created at Module:Wp/nod/Translit data/doc

-- Data to support transliteration of Northern Thai from the Lanna script

local data={}
-- Letter class.
-- H = high (both Lanna & Siamese)
-- M = middle (both Lanna & Siamese)
-- L = low
-- R = high in Northern Thai, but letter is middle in Siamese and Lao
-- F = low in Northern Thai, but sound is written with middle letter in Siamese 
data.class = {
["ᨠ"]= 'R', ["ᨡ"]= 'H', ["ᨢ"]= 'H', ["ᨣ"]= 'F', ["ᨤ"]= 'L', ["ᨥ"]= 'L', ["ᨦ"]= 'L',
["ᨧ"]= 'R', ["ᨨ"]= 'H', ["ᨩ"]= 'F', ["ᨪ"]= 'L', ["ᨫ"]= 'L', ["ᨬ"]= 'L',
["ᨭ"]= 'R', ["ᨮ"]= 'H', ["ᨯ"]= 'M', ["ᨰ"]= 'L', ["ᨱ"]= 'L',
["ᨲ"]= 'R', ["ᨳ"]= 'H', ["ᨴ"]= 'F', ["ᨵ"]= 'L', ["ᨶ"]= 'L',
["ᨷ"]= 'M',
["ᨸ"]= 'R', ["ᨹ"]= 'H', ["ᨺ"]= 'H', ["ᨻ"]= 'F', ["ᨼ"]= 'L', ["ᨽ"]= 'L', ["ᨾ"]= 'L',
["ᨿ"]= 'L', ["ᩀ"]= 'M', ["ᩁ"]= 'L', ["ᩂ"]= 'L', ["ᩃ"]= 'L', ["ᩄ"]= 'L', ["ᩅ"]= 'L',
["ᩆ"]= 'H', ["ᩇ"]= 'H', ["ᩈ"]= 'H', ["ᩉ"]= 'H', ["ᩊ"]= 'L', ["ᩋ"]= 'M', ["ᩌ"]= 'L',
["ᩓ"]= 'L', ["ᩔ"]= 'H',
["ᩍ"]= 'M', ["ᩎ"]= 'M', ["ᩏ"]= 'M', ["ᩐ"]= 'M', ["ᩑ"]= 'M', ["ᩒ"]= 'M'
--]--
}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
data.disruptor = u(0xffffe)
local function sc(s) return gsub(s, "[ᨠᨣก]", ""); end; -- Remove mark bearers, which are added for readability.

-- Basic transliteration.  Keeps SAKOT as that is needed for vowel rearrangement.
data.tt2 = {
	-- consonants
	["ᨠ"] = "ก", ["ᨡ"] = "ข", ["ᨢ"] = "ฃ", ["ᨣ"] = "ค", ["ᨤ"] = "ฅ", ["ᨥ"] = "ฆ", ["ᨦ"] = "ง",
	["ᨧ"] = "จ", ["ᨨ"] = "ฉ", ["ᨩ"] = "ช", ["ᨪ"] = "ซ", ["ᨫ"] = "ฌ", ["ᨬ"] = "ญ",
	["ᨭ"] = "ฏ", ["ᨮ"] = "ฐ", ["ᨯ"] = "ด", ["ᨰ"] = "ฒ", ["ᨱ"] = "ณ",
	["ᨲ"] = "ต", ["ᨳ"] = "ถ", ["ᨴ"] = "ท", ["ᨵ"] = "ธ", ["ᨶ"] = "น",
	["ᨷ"] = "บ", ["ᨸ"] = "ป", ["ᨹ"] = "ผ", ["ᨺ"] = "ฝ", ["ᨻ"] = "พ", ["ᨼ"] = "ฟ", ["ᨽ"] = "ภ", ["ᨾ"] = "ม",
	["ᨿ"] = "ย",
--	["ᩀ"] = "ย̱", -- Renders badly with a vowel below
--	["ᩀ"] = "อ"..u(0x200D).."ย", -- Needs a special font
	["ᩀ"] = "อ"..u(0x200D)..u(0x2060).."ย", -- Needs a special font
	["ᩁ"] = "ร", ["ᩂ"] = "ฤ", ["ᩃ"] = "ล", ["ᩄ"] = "ฦ", ["ᩅ"] = "ว",
	["ᩆ"] = "ศ", ["ᩇ"] = "ษ", ["ᩈ"] = "ส", ["ᩉ"] = "ห", ["ᩊ"] = "ฬ", ["ᩋ"] = "อ", ["ᩌ"] = "ฮ",
	-- independent vowels
	["ᩍ"] = "อิ", ["ᩎ"] = "อี", ["ᩏ"] = "อุ", ["ᩐ"] = "อู", ["ᩑ"] = "อ↶เ", ["ᩒ"] = "อ↶โ",
	-- medials and miscellaneous
	["ᩓ"] = "ล↶แ", ["ᩔ"] = "สส", ["ᩕ"] = sc("ᨠ᩠ร"), ["ᩖ"] = sc("ᨠ᩠ล"), ["ᩗ"] = "งล", ["ᩘ"] = "ง",
	["᪢"] = "สวัรค์",
	-- dependent vowels and diacritics
--	["᩠"] = "", -- defer - needed for rearrangement
	["ᩡ"] = "ะ", ["ᩢ"] = "ั", ["ᩣ"] = "า", ["ᩤ"] = "า", -- ignore bindu
	["ᩥ"] = "ิ", ["ᩦ"] = "ี", ["ᩧ"] = "ึ", ["ᩨ"] = "ื", ["ᩩ"] = "ุ", ["ᩪ"] = "ู",
	["ᩫ"] = "", ["ᩬ"] = "อ", ["ᩭ"] = "อย",
	["ᩮ"] = "↶เ", ["ᩯ"] = "↶แ", ["ᩰ"] = "↶โ", ["ᩱ"] = "↶ไ", ["ᩲ"] = "↶ใ",
	["ᩳ"] = "↶เา", ["ᩴ"] = "ํ", ["᩵"] = "่", ["᩶"] = "้",
	["᩺"] = "์", ["᩻"] = " ๆ ", ["᩼"] = "์", ["᩿"] = "ฺ",
	-- numerals
	["᪀"] = "0", ["᪁"] = "1", ["᪂"] = "2", ["᪃"] = "3", ["᪄"] = "4",
	["᪅"] = "5", ["᪆"] = "6", ["᪇"] = "7", ["᪈"] = "8", ["᪉"] = "9",
	["᪐"] = "๐", ["᪑"] = "๑", ["᪒"] = "๒", ["᪓"] = "๓", ["᪔"] = "๔",
	["᪕"] = "๕", ["᪖"] = "๖", ["᪗"] = "๗", ["᪘"] = "๘", ["᪙"] = "๙",
	-- punctuation marks
	["ᪧ"] = "ๆ", ["᪨"] = "ฯ", ["᪩"] = "๚", ["᪪"] = "ฯ", ["᪫"] = "๚", ["᪬"] = "๛",
	-- zero-width space (display it if it hides in a word)
	-- [u(0x200B)] = "‼", -- Not appropriate for text!
	[data.disruptor] = "",
}
data.tt3 = {}
for t,v in pairs(data.tt2) do
    data.tt3[t] = v
end
-- Overrides for difference between Siamese and Northern Thai
data.tt3["ᨣ"] = "ก"
data.tt3["ᨤ"] = "ค"
data.tt3["ᨩ"] = "จ"
data.tt3["ᨴ"] = "ต"
data.tt3["ᨻ"] = "ป"

data.oddname = { -- For Unicode codepoint names
    ["ᩍ"] = "อักขระอิ",    ["ᩎ"] = "อักชระอี",    ["ᩏ"]= "อักขระอุ",    ["ᩐ"] = "อักขระอู",
    ["ᩑ"]  = "อักขระเอ",   ["ᩒ"] = "อักขระโอ",    ["ᩓ"] = "แล",      ["ᩔ"] = "สะสองห้อง",
    }

-- Transliterations of 'hard' words.  It will generally be better if they are
-- expressed as transliterations of alternative forms.  First form is for
-- transliteration, second form is for 'transcription' where Thai reading rules
-- are (generally) used.
local ko -- 3 Ways to do it!
-- ko = "ก็"
ko = "ค็"
-- ko = "คํ่"
data.hard = {
	["ᨡᩮᩬᩢ᩶ᩣ᩠ᨦ"] = {"ᨡᩮᩢ᩶ᩣᨡᩬᨦ"},
	["ᨡᩬ᩶᩻ᨦ"] = {"ᨡᩮᩢ᩶ᩣᨩᩬᨦ"},
	["ᨡ᩠ᨾ᩻ᩮᩁ"] = {"เขมร"},
	["ᨡ᩠ᨾᩮᩁ"] = {"เขมร"},
	["ᨣ᩠ᨳᩤ"] = {"ᨣᩤᨳᩣ"},
	["ᨣᩴ"] = {ko, "ก็"},
	["ᨣᩴ᩵"] = {ko, "ก็"},
	["ᨣᩴ᩠ᨾᩣ"] = {ko.."มา"},
	["ᨣ᩠ᨾᩦ"] = {ko.."มี"},
	["ᨣᩴ᩠ᩅ᩵ᩣ"] = {ko.."ว่า"},
	["ᨣᩴᩝ᩵"] = {ko.."บ่"},
	["ᨣᩝᩴ᩵"] = {ko.."บ่"},
	["ᨣᩬᩁ"] = {"คอน", "กอน"},

	["ᨧᩪᩦ᩶"] = {"ᨧᩮᩢ᩶ᩣ"},
	["ᨧᩢ"] = {"ᨧᩢ᩠ᨠ"},
	["ᨧᩢ᩠ᨾᩣ"] = {"ᨧᩢ᩠ᨠᨾᩣ"},

	["ᨧᩢ᩠ᨾᩦ"] = {"ᨧᩢ᩠ᨠᨾᩦ"},
	
	["ᨧᩢ᩠ᩅ᩵ᩣ"] = {"ᨧᩢ᩠ᨠᩅ᩵ᩤ"},
	
	["ᨧᩥ᩠᩵ᨦᩢ"] = {"ᨧᩥ᩠᩵ᨦᨧᩢ᩠ᨠ"},
	["ᨧᩩ᩵͏ᩢᨦ"] = {"ᨧᩩ᩵ᨦᨧᩢ᩠ᨠ"},
	["ᨧᩩᩢ᩵ᨦ"] = {"ᨧᩩ᩵ᨦᨧᩢ᩠ᨠ"},
	["ᨧᩮᩢ᩠᩶ᨡᩣ"] = {"ᨧᩮᩢ᩶ᩣᨡ᩶ᩣ"},
	["ᨧᩮᩢ᩠ᨡ᩶ᩣ"] = {"ᨧᩮᩢ᩶ᩣᨡ᩶ᩣ"},
	["ᨧ"] = {"ᨧᩡ"},
	["ᨧᩁᩂ"] = {"จรือ"},
	["ᨧᩣᩁᩂ"] = {"ᨧᩣᩁᩨ"},
	["ᨩᩨ᩠᩵ᨶ᩠ᨾ"] = {"ᨩᩨ᩠᩵ᨶᨩᩫ᩠ᨾ"},

	["ᨩᩨ᩠ᩅ᩵ᩣ"] = {"ᨩᩨ᩵ᩅ᩵ᩤ"},

	["ᨩᩨ᩠᩵ᩅᩣ"] = {"ᨩᩨ᩵ᩅ᩵ᩤ"},

	["ᨯ᩠ᨦ᩠ᨶᩦ᩶"] = {"ดั่งนี้"},
	
	["ᨯ᩠ᩃᩦ"] = {"ᨯᩦᩉᩖᩦ"},
	["ᨯᩦ᩠ᩃ"] = {"ᨯᩦᩉᩖᩦ"},
	["ᨲᩬᩁ"] = {"ตอน", "ต๋อน"},
	["ᨲᩦᩣ᩠ᨿ"] = {"ᨲᩦᨲᩣ᩠ᨿ"},
	["ᨲᩮᩪᩦ᩵ᩣ᩠ᨿ"] = {"ᨲᩪᨲᩦᨲᩮᩢ᩵ᩣᨲᩣ᩠ᨿ"},
	["ᨲᩯ᩠ᨶᩬ᩵"] = {"ᨲᩯ᩠ᨶᨲᩬᩴ᩵"},
	["ᨲᩬ᩵ᩯ᩠ᨶ"] = {"ᨲᩬᩴ᩵ᨲᩯ᩠ᨶ"},
	["ᨲᩬ᩵͏ᩯ᩠ᨶ"] = {"ᨲᩬᩴ᩵ᨲᩯ᩠ᨶ"},
	["ᨲᩦ᩠ᨿᩁ"] = {"ᨲᩦᨲ᩠ᨿᨶ"},
	["ᨳᩪᩢᨲᩬᨦ"] = {"ᨳᩪᨠᨲᩬ᩶ᨦ"},
	["ᨴᩘ᩠ᩃᩣ᩠ᨿ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
	["ᨴᩢ᩠ᩃᩣ᩠ᨿ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
	["ᨴᩢ᩠ᩃᩣ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
	["ᨴᩢᩗᩣ"] = {"ᨴᩢ᩠ᨦᩉᩖᩣ᩠ᨿ"},
	["ᨴᩯ᩠᩶ᩃ"] = {"ᨴᩯ᩶ᩃᩯ"},
	["ᨴ᩠᩶ᩃᩯ"] = {"ᨴᩯ᩶ᩃᩯ"},
	["ᨴᩱ᩠ᨿ"] = {"ไทย", "ไทย"},
	
	["ᨷᩴ"] = {"บ่"},
	["ᨷᩴ᩵"] = {"บ่"},
	["ᨷ᩠ᨾᩦ"] = {"ᨷ᩵ᨾᩦ"},
	["ᨷ᩠᩵ᨾᩦ"] = {"ᨷ᩵ᨾᩦ"},
	["ᨷ᩠ᨯᩦ"] = {"ᨷ᩵ᨯᩦ"},

	["ᨷᩕᩮᩢᩣᩅ᩺ᨪᩮᩬᩥᩁ᩺"] = {"เบราว์เซอร์"},

	["ᨷᨠᨲᩥ"] = {"ᨸᨠᨲᩥ"},
	["ᨷᨠ᩠ᨠᨲᩥ"] = {"ᨸᨠ᩠ᨠᨲᩥ"},
	["ᨷᨠ᩠ᨠᨲ᩠ᨲᩥ"] = {"ᨸᨠ᩠ᨠᨲ᩠ᨲᩥ"},
	["ᨷᨧ᩠ᨧᩩᨷ᩠ᨷᨶ᩠ᨶ"] = {"ปัจจุบบันนะ", "ปัจจุบันนะ"},
	["ᨷᨧ᩠ᨧᩩᨷ᩠ᨷᨶ᩠ᨶ᩺"] = {"ปัจจุบบันน์", "ปัจจุบัน"},
	["ᨷᨧ᩠ᨧᩩᨷᨶ᩠ᨶ"] = {"ปัจจุบันนะ"},
	["ᨷᨧ᩠ᨧᩩᨷᨶ᩠ᨶ᩺"] = {"ปัจจุบันน์", "ปัจจุบัน"},
	["ᨷᨬ᩠ᩉᩣ"] = {"ᨸᨬ᩠ᩉᩣ"},
	["ᨷᨭᩥᨷᩢ᩠ᨲ"] = {"ᨸᨭᩥᨷᩢ᩠ᨲ"},
	["ᨷᨭᩥᨷᨲ᩠ᨲᩥ"] = {"ᨸᨭᩥᨷᨲ᩠ᨲᩥ"},
	["ᨷᩅᨲ᩠ᨲᩥ"] = {"ᨸᩅᨲ᩠ᨲᩥ"},

	["ᨻᩱᩣ᩠ᨿᩉ᩠ᨶ᩶ᩣ"] = {"ᨻᩱᨻᩣ᩠ᨿᩉ᩠ᨶ᩶ᩣ"},

	["ᨽᩣ᩠ᨷ"] = {"ᨽᩣ᩠ᨻ"},
	["ᨽᩣᩈᩣ"] = {"ภาสา", "ภาษา"},

	["ᨾᩢ᩠ᩅ᩵ᩣ"] = {"มักว่า"},
	["ᨾᩦᩣ᩠ᨠ"] = {"มีมาก"},
	["ᨾᩯ᩠ᨶ᩠ᩅ᩵ᩣ"] = {"แม่นว่า"},
	["ᨾᩯ᩠᩵ᨶ᩠ᩅᩣ"] = {"แม่นว่า"},

	["ᨾᩯ᩵ᩣ᩠ᨶ"] = {"ᨾᩯ᩵ᨾᩣ᩠ᨶ"},

	["ᨿᩨ᩠ᨶᩣ᩠ᩅ"] = {"ᨿᩨ᩠ᨶᨿᩣ᩠ᩅ"},
	["ᨿᩥ᩠ᨶᩣ᩠ᨠᨶᩢ᩠ᨠ"] = {"ᨿᩥ᩠ᨶᨿᩣ᩠ᨠᨶᩢ᩠ᨠ"},
	["ᩁᩢ"] = {"ᩁᩢ᩠ᨠ"},
	["ᩁᩦ"] = {"อันว่า"},
	["ᩁᩂ"] = {"ᩁᩨ"},
	["ᩁᩂᨡᩣ"] = {"ᩁᩨᨩᩣ"},
	["ᩁᩂᨪᩣ"] = {"ᩁᩨᨪᩣ"},
	["ᩁᩡᩌᩰᩫᨦ"] = {"ระโฮง"},
	["ᩁᩪᨷ"] = {"ᩁᩪᨸ", "ฮูป"},
	["ᩁᩪ᩠ᨷ"] = {"ᩁᩪᨸ", "ฮูป"},
	["ᩁᩬᨦ"] = {"ᩌᩬᨦ"},
	["ᩁᩬ᩶ᩁ"] = {"ᩁᩬ᩶ᩁ", "ฮ้อน"},
	["ᩁᩢ᩠ᨷ"] = {"ᩁᩢ᩠ᨷ", "ᩌᩢᨷ"},
	["ᩁᩰᩫ᩠ᨦ"] = {"ᩁᩰᩫ᩠ᨦ", "โฮง"},

	["ᩃᩣᩴ"] = {"ลำ"},
	["ᩃᩣᩴᨯᩢ᩠ᨷ"] = {"ลำᨯᩢ᩠ᨷ"},
	["ᩃᩣᩴᨯᩢ᩠ᨷᨲ᩠ᩅᩫᩀᩪᨶᩥᨣᩰᩫ᩠ᨯ"] = {"ลำᨯᩢ᩠ᨷᨲ᩠ᩅᩫᩀᩪᨶᩥᨣᩰᩫ᩠ᨯ"}, -- No word breaks in this title!
	["ᩓ᩠ᩅ"] = {"ᩓ᩠᩶ᩅ"},
	["ᩓᩯ"] = {"ᩓ"},
	["ᩅᩢ᩠ᨯᩤ"] = {"ᩅᩢ᩠ᨯᩅᩤ"},
	["ᩈᩈᩈ"] = {"ᩈᩣ᩠ᨾ"},
	["᪓᩠ᨴ"] = {"᪓ ᨴᩦ"},
	["ᩈ᩠ᩅ᩠᩵ᨶᩣ"] = {"ᩈ᩠ᩅ᩵ᨶᩅ᩵ᩤ"},
	["ᩈ᩠ᩅ᩠ᨶ᩵ᩣ"] = {"ᩈ᩠ᩅ᩵ᨶᩅ᩵ᩤ"},
	
	["ᩈᩫ᩠ᨦ᩻ᩣ᩠ᩁ"] = {"ᩈᩫ᩠ᨦᩈᩣ᩠ᩁ"},
	["ᩈᩫ᩠ᨦᩣ᩠ᩁ"] = {"ᩈᩫ᩠ᨦᩈᩣ᩠ᩁ"},

	["ᩉᩨ᩶ᩣ᩠ᨿ"] = {"ᩉᩨ᩶ᩉᩣ᩠ᨿ"},
	["ᩉᩨ᩵ᩁᨩᩓ"] = {"ᨾᩦᩉᩢ᩠᩶ᨶᨩᩡᩓ"},
	["ᩌᩰᩫᨦ"] = {"โฮง"},
	["ᩐᩣ"] = {"ᩋᩮᩢᩣ"},
	["ᩐ᩵ᩣ"] = {"ᩋᩮᩢ᩵ᩣ"},
}

return data