Module:Wt/sco/script utilities

From Wikimedia Incubator

Documentation for this module may be created at Module:Wt/sco/script utilities/doc

local export = {}

local m_data = mw.loadData("Module:Wt/sco/script utilities/data")
--[=[
Other modules used:
	[[Module:Wt/sco/scripts]]
	[[Module:Wt/sco/languages]]
	[[Module:Wt/sco/parameters]]
	[[Module:Wt/sco/utilities]]
	[[Module:Wt/sco/debug]]
]=]

function export.is_Latin_script(sc)
	return (sc:getCode():find("Latn")) or sc:getCode() == "Latinx"
end

-- Used by [[Template:lang]]
function export.lang_t(frame)
	params = {
		[1] = {},
		[2] = { allow_empty = true, default = "" },
		["sc"] = {},
		["face"] = {},
		["class"] = {},
	}
	
	local args = require("Module:Wt/sco/parameters").process(frame:getParent().args, params)
	local NAMESPACE = mw.title.getCurrentTitle().nsText
	
	local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.")
	lang = require("Module:Wt/sco/languages").getByCode(lang) or require("Module:Wt/sco/languages").err(lang, 1)
	
	local text = args[2]
	
	local sc = args["sc"]
	sc = (sc and (require("Module:Wt/sco/scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
	
	local face = args["face"]
	
	return export.tag_text(text, lang, sc, face, class)
end

local function trackChar(text, character, tracking)
	if character and mw.ustring.find(text, character) then
		require("Module:Wt/sco/debug").track("script/" .. tracking)
	end
end

local function track(text, lang, sc)
	local U = mw.ustring.char
	
	if lang and text then
		-- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]]
		if lang:getCode() == "ang" then
			local decomposed = mw.ustring.toNFD(text)
			local acute = U(0x301)
			
			trackChar(decomposed, acute, "ang/acute")
		end
		
		--[=[
		[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]]
		[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]]
		[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]]
		[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]]
		]=]
		if lang:getCode() == "el" or lang:getCode() == "grc" then
			trackChar(text, "ϑ", "Greek/wrong-theta")
			trackChar(text, "ϰ", "Greek/wrong-kappa")
			trackChar(text, "ϱ", "Greek/wrong-rho")
			trackChar(text, "ϕ", "Greek/wrong-phi")
		end
		
		-- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]]
		if lang:getCode() == "ru" then
			local decomposed = mw.ustring.toNFD(text)
			local grave = U(0x300)
			
			trackChar(decomposed, grave, "Russian/grave-accent")
		end
		
		-- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-tsheg]]
		if lang:getCode() == "bo" then		
			trackChar(text, "་$", "Tibetan/trailing-tsheg")
			trackChar(text, "་%]%]$", "Tibetan/trailing-tsheg")
		end
	end
end

-- Wrap text in the appropriate HTML tags with language and script class.
function export.tag_text(text, lang, sc, face, class, id)
	if not sc then
		sc = require("Module:Wt/sco/scripts").findBestScript(text, lang)
	end
	
	track(text, lang, sc)
		
	-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
	if sc and sc:getDirection() == "down" then
		--[[Wt/sco/	First, escape targets of wikilinks and HTML tags,
				which should not have their spaces
				replaced with line breaks. |	First, escape targets of wikilinks and HTML tags,
				which should not have their spaces
				replaced with line breaks. ]]
		local escaped = {}
		
		local i = 1
		for link_target in mw.ustring.gmatch(text, "%[%[([^|]+|)") do
			escaped[i] = link_target
			text = mw.ustring.gsub(text, link_target, "$" .. i)
			i = i + 1
		end
		
		for HTML_tag in mw.ustring.gmatch(text, "<[^>]+>") do
			escaped[i] = HTML_tag
			text = mw.ustring.gsub(text, HTML_tag, "$" .. i)
			i = i + 1
		end
		
		text = mw.ustring.gsub(
			text,
			" +",
			"<br>"
		)
		
		-- Unescape whatever was escaped.
		text = mw.ustring.gsub(
			text,
			"$(%d)",
			function(a)
				a = tonumber(a)
				return escaped[a]
			end
		)
	end

	if sc:getCode() == "Imag" then
		face = nil
	end

	local function class_attr(classes)
		table.insert(classes, 1, sc:getCode())
		if class and class ~= '' then
			table.insert(classes, class)
		end
		return 'class="' .. table.concat(classes, ' ') .. '"'
	end
	
	local function tag_attr(...)
		local output = {}
		if id then
			table.insert(output, 'id="' .. require("Module:Wt/sco/utilities").make_id(lang, id) .. '"')
		end
		
		table.insert(output, class_attr({...}) )
		
		if lang then
			table.insert(output, 'lang="' .. lang:getCode() .. '"')
		end
		
		return table.concat(output, " ")
	end
	
	if face == "hypothetical" then
	-- [[Special:WhatLinksHere/Template:Wt/sco/tracking/script-utilities/face/hypothetical]]
		require("Module:Wt/sco/debug").track("script-utilities/face/hypothetical")
	end
	
	local data = m_data.faces[face or "nil"]
	
	local post = ""
	if face == "translation" and sc:getDirection() == "rtl" then
		post = "&lrm;"
	end
	
	-- Add a script wrapper
	if data then
		return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' .. post
	else
		error('Invalid script face "' .. face .. '".')
	end
end

function export.tag_translit(translit, lang, kind, attributes)
	if type(lang) == "table" then
		lang = lang.getCode and lang:getCode()
			or error("Third argument to tag_translit should be a language code or language object.")
	end
	
	local data = m_data.translit[kind or "default"]
	
	local opening_tag = {}
	
	table.insert(opening_tag, data.tag)
	if lang == "ja" then
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr"')
	else
		table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr Latn"')
	end
	
	if data.dir then
		table.insert(opening_tag, 'dir="' .. data.dir .. '"')
	end
	
	table.insert(opening_tag, attributes)
	
	return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">"
end

-- Add a notice to request the native script of a word
function export.request_script(lang, sc)
	local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
	
	-- By default, request for "native" script
	local cat_script = "native"
	local disp_script = "script"
	
	-- If the script was not specified, and the language has only one script, use that.
	if not sc and #scripts == 1 then
		sc = scripts[1]
	end
	
	-- Is the script known?
	if sc then
		-- If the script is Latin, return nothing.
		if export.is_Latin_script(sc) then
			return ""
		end
		
		if sc:getCode() ~= scripts[1]:getCode() then
			disp_script = sc:getCanonicalName()
		end
		
		-- The category needs to be specific to script only if there is chance
		-- of ambiguity. This occurs when lang=und, or when the language has
		-- multiple scripts.
		if lang:getCode() == "und" or scripts[2] then
			cat_script = sc:getCanonicalName()
		end
	else
		-- The script is not known.
		-- Does the language have at least one non-Latin script in its list?
		local has_nonlatin = false
		
		for i, val in ipairs(scripts) do
			if not export.is_Latin_script(val) then
				has_nonlatin = true
				break
			end
		end
		
		-- If there are non-Latin scripts, return nothing.
		if not has_nonlatin then
			return ""
		end
	end
	
	local category = ""
	
	if mw.title.getCurrentTitle().nsText ~= "Template" then
		category = "[[Category:Wt/sco/" .. lang:getCanonicalName() .. " terms needin " .. cat_script .. " script]]"
	end
	
	return "<small>[" .. disp_script .. " needed]</small>" .. category
end

function export.template_rfscript(frame)
	local args = frame.args
	local lang = args[1] or error("The first parameter (language code) has not been given")
	local sc = args["sc"]; if sc == "" then sc = nil end
	lang = require("Module:Wt/sco/languages").getByCode(lang) or error("The language code \"" .. lang .. "\" is not valid.")
	sc = (sc and (require("Module:Wt/sco/scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
	
	local ret = export.request_script(lang, sc)
	
	if ret == "" then
		error("This language is written in the Latin alphabet. It does not need a native script.")
	else
		return ret
	end
end

function export.checkScript(text, scriptCode, result)
	local scriptObject = require("Module:Wt/sco/scripts").getByCode(scriptCode)
	
	if not scriptObject then
		error('The script code "' .. scriptCode .. '" is not recognized.')
	end
	
	local originalText = text
	
	-- Remove non-letter characters.
	text = mw.ustring.gsub(text, "[%A]", "")
	
	-- Remove all characters of the script in question.
	text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "")
	
	if text ~= "" then
		if type(result) == "string" then
			error(result)
		else
			error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getCategoryName() .. '.', 2)
		end
	end
end

return export