248
edits
m (1 revision imported) |
m (1 revision imported) |
||
| (One intermediate revision by one other user not shown) | |||
| Line 1: | Line 1: | ||
local yesNo = require("Module:Yesno") | local yesNo = require("Module:Yesno") | ||
local Text = { serial = " | local Text = { serial = "2024-09-21", | ||
suite = "Text" } | suite = "Text" } | ||
--[=[ | --[=[ | ||
Text utilities | Text utilities | ||
]=] | ]=] | ||
local function fiatQuote( apply, alien, advance ) | local function fiatQuote( apply, alien, advance ) | ||
| Line 127: | Line 16: | ||
advance = tonumber(advance) or 0 | advance = tonumber(advance) or 0 | ||
local suite | local suite | ||
local data = mw.loadData('Module:Text/data') | |||
local QuoteLang = data.QuoteLang | |||
local QuoteType = data.QuoteType | |||
local slang = alien:match( "^(%l+)-" ) | local slang = alien:match( "^(%l+)-" ) | ||
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] | suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] | ||
| Line 230: | Line 121: | ||
-- Returns: true, if CJK detected | -- Returns: true, if CJK detected | ||
s = s and tostring(s) or "" | s = s and tostring(s) or "" | ||
local patternCJK = mw.loadData('Module:Text/data').PatternCJK | |||
return mw.ustring.find( s, patternCJK ) ~= nil | return mw.ustring.find( s, patternCJK ) ~= nil | ||
end -- Text.containsCJK() | end -- Text.containsCJK() | ||
| Line 294: | Line 174: | ||
-- Returns: true, if valid for latin only | -- Returns: true, if valid for latin only | ||
s = s and tostring(s) or "" --- ensure input is always string | s = s and tostring(s) or "" --- ensure input is always string | ||
local PatternLatin = mw.loadData('Module:Text/data').PatternLatin | |||
return mw.ustring.match(s, PatternLatin) ~= nil | return mw.ustring.match(s, PatternLatin) ~= nil | ||
end -- Text.isLatinRange() | end -- Text.isLatinRange() | ||
| Line 309: | Line 189: | ||
return false | return false | ||
end | end | ||
local SeekQuote = mw.loadData('Module:Text/data').SeekQuote | |||
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil | return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil | ||
end -- Text.isQuote() | end -- Text.isQuote() | ||
| Line 398: | Line 261: | ||
-- or basic greek or cyrillic or symbols etc. | -- or basic greek or cyrillic or symbols etc. | ||
local cleanup, decomposed | local cleanup, decomposed | ||
local PatternCombined = mw.loadData('Module:Text/data').PatternCombined | |||
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) | decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) | ||
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | ||
| Line 420: | Line 276: | ||
-- Returns: true, if sentence terminated | -- Returns: true, if sentence terminated | ||
local r | local r | ||
local PatternTerminated = mw.loadData('Module:Text/data').PatternTerminated | |||
if mw.ustring.find( analyse, PatternTerminated ) then | if mw.ustring.find( analyse, PatternTerminated ) then | ||
r = true | r = true | ||
| Line 476: | Line 325: | ||
-- Returns: string with non-latin parts enclosed in <span> | -- Returns: string with non-latin parts enclosed in <span> | ||
local r | local r | ||
local data = mw.loadData('Module:Text/data') | |||
local PatternLatin = data.PatternLatin | |||
local RangesLatin = data.RangesLatin | |||
local NumLatinRanges = data.NumLatinRanges | |||
if mw.ustring.match( adjust, PatternLatin ) then | if mw.ustring.match( adjust, PatternLatin ) then | ||
-- latin only, horizontal dashes, quotes | -- latin only, horizontal dashes, quotes | ||
| Line 490: | Line 342: | ||
-- isLatin | -- isLatin | ||
local range | local range | ||
for i = 1, | -- NumLatinRanges has to be precomputed because # does not work from loadData | ||
for i = 1, NumLatinRanges do | |||
range = RangesLatin[ i ] | range = RangesLatin[ i ] | ||
if a >= range[ 1 ] and a <= range[ 2 ] then | if a >= range[ 1 ] and a <= range[ 2 ] then | ||
| Line 569: | Line 422: | ||
local r | local r | ||
if about == "quote" then | if about == "quote" then | ||
data = mw.loadData('Module:Text/data') | |||
r = { } | r = { } | ||
r.QuoteLang = QuoteLang | r.QuoteLang = data.QuoteLang | ||
r.QuoteType = QuoteType | r.QuoteType = data.QuoteType | ||
end | end | ||
return r | return r | ||
end -- Text.test() | end -- Text.test() | ||
-- Non Unicode-aware version of mw.text.split and mw.text.gsplit | |||
-- based on [[phab:diffusion/ELUA/browse/master/includes/Engines/LuaCommon/lualib/mw.text.lua]] | |||
-- These run up to 60 times faster than the Unicode-aware versions | |||
Text.split = function ( text, pattern, plain ) | |||
local ret = {} | |||
for m in Text.gsplit( text, pattern, plain ) do | |||
ret[#ret+1] = m | |||
end | |||
return ret | |||
end | |||
Text.gsplit = function ( text, pattern, plain ) | |||
local s, l = 1, string.len( text ) | |||
return function () | |||
if s then | |||
local e, n = string.find( text, pattern, s, plain ) | |||
local ret | |||
if not e then | |||
ret = string.sub( text, s ) | |||
s = nil | |||
elseif n < e then | |||
-- Empty separator! | |||
ret = string.sub( text, s, e ) | |||
if e < l then | |||
s = e + 1 | |||
else | |||
s = nil | |||
end | |||
else | |||
ret = e > s and string.sub( text, s, e - 1 ) or '' | |||
s = n + 1 | |||
end | |||
return ret | |||
end | |||
end, nil, nil | |||
end | |||
-- Export | -- Export | ||
| Line 756: | Line 644: | ||
end | end | ||
function p.split(frame) | |||
local text = frame.args.text or frame.args[1] or '' | |||
local pattern = frame.args.pattern or frame.args[2] or '' | |||
local plain = yesNo(frame.args.plain or frame.args[3]) | |||
local index = tonumber(frame.args.index) or tonumber(frame.args[4]) or 1 | |||
local a = Text.split(text, pattern, plain) | |||
if index < 0 then index = #a + index + 1 end | |||
return a[index] | |||
end | |||
| Line 761: | Line 659: | ||
return Text.serial | return Text.serial | ||
end | end | ||