Module:Text: Difference between revisions

From Good Old TV Fan Wiki
m (1 revision imported)
GOTV>Hike395
(update date)
Line 1: Line 1:
local Text = { serial = "2017-11-01",
local yesNo = require("Module:Yesno")
local Text = { serial = "2022-07-21",
               suite  = "Text" }
               suite  = "Text" }
--[=[
--[=[
Line 17: Line 18:
local SeekQuote        = false
local SeekQuote        = false


local function initLatinData()
    if not RangesLatin then
        RangesLatin = { {    7,  687 },
                        { 7531, 7578 },
                        { 7680, 7935 },
                        { 8194, 8250 } }
    end
    if not PatternLatin then
        local range
        PatternLatin = "^["
        for i = 1, #RangesLatin do
            range = RangesLatin[ i ]
            PatternLatin = PatternLatin ..
                          mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
        end    -- for i
        PatternLatin = PatternLatin .. "]*$"
    end
end


 
local function initQuoteData()
local function factoryQuote()
     -- Create quote definitions
     -- Create quote definitions
     QuoteLang = { af        = "bd",
     if not QuoteLang then
    QuoteLang =  
            { af        = "bd",
                   ar        = "la",
                   ar        = "la",
                   be        = "labd",
                   be        = "labd",
Line 76: Line 96:
                   ["zh-tw"] = "x300C",
                   ["zh-tw"] = "x300C",
                   ["zh-cn"] = "ld" }
                   ["zh-cn"] = "ld" }
     QuoteType = { bd    = { { 8222, 8220 },  { 8218, 8217 } },
     end
    if not QuoteType then
    QuoteType =  
            { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                   bdla  = { { 8222, 8220 },  {  171,  187 } },
                   bdla  = { { 8222, 8220 },  {  171,  187 } },
                   bx    = { { 8222, 8221 },  { 8218, 8217 } },
                   bx    = { { 8222, 8221 },  { 8218, 8217 } },
Line 89: Line 112:
                   x300C = { { 0x300C, 0x300D },
                   x300C = { { 0x300C, 0x300D },
                             { 0x300E, 0x300F } } }
                             { 0x300E, 0x300F } } }
     return r
     end
end -- factoryQuote()
end -- initQuoteData()




Line 100: Line 123:
     --    alien    -- string, with language code
     --    alien    -- string, with language code
     --    advance  -- number, with level 1 or 2
     --    advance  -- number, with level 1 or 2
     local r = apply
     local r = apply and tostring(apply) or ""
    alien = alien or "en"
    advance = tonumber(advance) or 0
     local suite
     local suite
     if not QuoteLang then
     initQuoteData()
        factoryQuote()
     local slang = alien:match( "^(%l+)-" )
     end
    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
    suite = QuoteLang[ alien ]
    if not suite then
        local slang = alien:match( "^(%l+)-" )
        if slang then
            suite = QuoteLang[ slang ]
        end
        if not suite then
            suite = QuoteLang[ "en" ]
        end
    end
     if suite then
     if suite then
         local quotes = QuoteType[ suite ]
         local quotes = QuoteType[ suite ]
Line 149: Line 164:
     --    accept  -- true, if no error messages to be appended
     --    accept  -- true, if no error messages to be appended
     -- Returns: string
     -- Returns: string
     local r
     local r = ""
     if type( apply ) == "table" then
     apply = type(apply) == "table" and apply or {}
        local bad  = { }
    again = math.floor(tonumber(again) or 1)
        local codes = { }
    if again < 1 then
        local s
    return ""
        for k, v in pairs( apply ) do
    end
            s = type( v )
    local bad  = { }
            if s == "number" then
    local codes = { }
                if v < 32 and v ~= 9 and v ~= 10 then
    for _, v in ipairs( apply ) do
                    v = tostring( v )
    local n = tonumber(v)
                else
    if not n or (n < 32 and n ~= 9 and n ~= 10) then
                    v = math.floor( v )
    table.insert(bad, tostring(v))
                    s = false
    else
                end
    table.insert(codes, math.floor(n))
            elseif s ~= "string" then
end
                v = tostring( v )
    end  
            end
    if #bad > 0 then
            if s then
    if not accept then
                table.insert( bad, v )
    r = tostring(  mw.html.create( "span" )
            else
                    :addClass( "error" )
                table.insert( codes, v )
                    :wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
            end
    end
        end -- for k, v
    return r
        if #bad == 0 then
            if #codes > 0 then
                r = mw.ustring.char( unpack( codes ) )
                if again then
                    if type( again ) == "number" then
                        local n = math.floor( again )
                        if n > 1 then
                            r = r:rep( n )
                        elseif n < 1 then
                            r = ""
                        end
                    else
                        s = "bad repetitions: " .. tostring( again )
                    end
                end
            end
        else
            s = "bad codepoints: " .. table.concat( bad, " " )
        end
        if s  and  not accept then
            r = tostring(  mw.html.create( "span" )
                                  :addClass( "error" )
                                  :wikitext( s ) )
        end
     end
     end
     return r or ""
    if #codes > 0 then
    r = mw.ustring.char( unpack( codes ) )
    if again > 1 then
    r = r:rep(again)
    end
end
     return r
end -- Text.char()
end -- Text.char()


 
local function trimAndFormat(args, fmt)
local result = {}
if type(args) ~= 'table' then
args = {args}
end
for _, v in ipairs(args) do
v = mw.text.trim(tostring(v))
if v ~= "" then
table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
end
end
return result
end


Text.concatParams = function ( args, apply, adapt )
Text.concatParams = function ( args, apply, adapt )
Line 210: Line 219:
     -- Returns: string
     -- Returns: string
     local collect = { }
     local collect = { }
     args = type(args) == 'table' and args or {} -- ensure args is table
     return table.concat(trimAndFormat(args,adapt), apply or "|")
    for k, v in pairs( args ) do
        if type( k ) == "number" then
            v = mw.text.trim( v )
            if v ~= "" then
                if adapt then
                    v = mw.ustring.format( adapt, v )
                end
                table.insert( collect, v )
            end
        end
    end -- for k, v
    return table.concat( collect, apply or "|" )
end -- Text.concatParams()
end -- Text.concatParams()






Text.containsCJK = function ( analyse )
Text.containsCJK = function ( s )
     -- Is any CJK code within?
     -- Is any CJK code within?
     -- Parameter:
     -- Parameter:
     --    analyse -- string
     --    s -- string
     -- Returns: true, if CJK detected
     -- Returns: true, if CJK detected
     analyse = analyse or ""
     s = s and tostring(s) or ""
     if not patternCJK then
     if not patternCJK then
         patternCJK = mw.ustring.char( 91,
         patternCJK = mw.ustring.char( 91,
Line 245: Line 242:
                                       93 )
                                       93 )
     end
     end
     if mw.ustring.find( analyse, patternCJK ) then
     return mw.ustring.find( s, patternCJK ) ~= nil
    return true
    end
    return false
end -- Text.containsCJK()
end -- Text.containsCJK()


Line 258: Line 252:
--    suffix = ending delimiter
--    suffix = ending delimiter
-- Returns: stripped string
-- Returns: stripped string
s = s and tostring(s) or ""
prefix = prefix and tostring(prefix) or ""
suffix = suffix and tostring(suffix) or ""
local prefixLen = mw.ustring.len(prefix)
local prefixLen = mw.ustring.len(prefix)
local suffixLen = mw.ustring.len(suffix)
local suffixLen = mw.ustring.len(suffix)
if prefixLen == 0 or suffixLen == 0 then
return s
end
local i = s:find(prefix, 1, true)
local i = s:find(prefix, 1, true)
local r = s
local r = s
Line 288: Line 288:
end -- Text.getPlain()
end -- Text.getPlain()


 
Text.isLatinRange = function (s)
 
Text.isLatinRange = function ( adjust )
     -- Are characters expected to be latin or symbols within latin texts?
     -- Are characters expected to be latin or symbols within latin texts?
     -- Precondition:
     -- Arguments:
     --     adjust -- string, or nil for initialization
     --  s = string to analyze
     -- Returns: true, if valid for latin only
     -- Returns: true, if valid for latin only
     local r
     s = s and tostring(s) or "" --- ensure input is always string
    if not RangesLatin then
     initLatinData()
        RangesLatin = { {    7,  687 },
     return mw.ustring.match(s, PatternLatin) ~= nil
                        { 7531, 7578 },
                        { 7680, 7935 },
                        { 8194, 8250 } }
    end
    if not PatternLatin then
        local range
        PatternLatin = "^["
        for i = 1, #RangesLatin do
            range = RangesLatin[ i ]
            PatternLatin = PatternLatin ..
                          mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
        end    -- for i
        PatternLatin = PatternLatin .. "]*$"
     end
     if adjust then
        if mw.ustring.match( adjust, PatternLatin ) then
            r = true
        else
            r = false
        end
    end
    return r
end -- Text.isLatinRange()
end -- Text.isLatinRange()






Text.isQuote = function ( ask )
Text.isQuote = function ( s )
     -- Is this character any quotation mark?
     -- Is this character any quotation mark?
     -- Parameter:
     -- Parameter:
     --    ask  -- string, with single character
     --    s = single character to analyze
     -- Returns: true, if ask is quotation mark
     -- Returns: true, if s is quotation mark
     local r
     s = s and tostring(s) or ""
    if s == "" then
    return false
    end
     if not SeekQuote then
     if not SeekQuote then
         SeekQuote = mw.ustring.char(  34,      -- "
         SeekQuote = mw.ustring.char(  34,      -- "
Line 348: Line 327:
                                     0x300F )    -- CJK
                                     0x300F )    -- CJK
     end
     end
     if ask == "" then
     return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
        r = false
    elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
        r = true
    else
        r = false
    end
    return r
end -- Text.isQuote()
end -- Text.isQuote()


Line 366: Line 338:
     --    adapt  -- string (optional); format including "%s"
     --    adapt  -- string (optional); format including "%s"
     -- Returns: string
     -- Returns: string
     local collect = { }
     return mw.text.listToText(trimAndFormat(args, adapt))
    for k, v in pairs( args ) do
        if type( k ) == "number" then
            v = mw.text.trim( v )
            if v ~= "" then
                if adapt then
                    v = mw.ustring.format( adapt, v )
                end
                table.insert( collect, v )
            end
        end
    end -- for k, v
    return mw.text.listToText( collect )
end -- Text.listToText()
end -- Text.listToText()


Line 390: Line 350:
     --    advance  -- number, with level 1 or 2, or nil
     --    advance  -- number, with level 1 or 2, or nil
     -- Returns: quoted string
     -- Returns: quoted string
    apply = apply and tostring(apply) or ""
     local mode, slang
     local mode, slang
     if type( alien ) == "string" then
     if type( alien ) == "string" then
Line 417: Line 378:
     --    advance  -- number, with level 1 or 2, or nil
     --    advance  -- number, with level 1 or 2, or nil
     -- Returns: string; possibly quoted
     -- Returns: string; possibly quoted
     local r = mw.text.trim( apply )
     local r = mw.text.trim( apply and tostring(apply) or "" )
     local s = mw.ustring.sub( r, 1, 1 )
     local s = mw.ustring.sub( r, 1, 1 )
     if s ~= ""  and  not Text.isQuote( s, advance ) then
     if s ~= ""  and  not Text.isQuote( s, advance ) then
Line 445: Line 406:
                                           93 )
                                           93 )
     end
     end
     decomposed = mw.ustring.toNFD( adjust )
     decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
     cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
     cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
     return mw.ustring.toNFC( cleanup )
     return mw.ustring.toNFC( cleanup )
Line 477: Line 438:




Text.ucfirstAll = function ( adjust )
Text.ucfirstAll = function ( adjust)
     -- Capitalize all words
     -- Capitalize all words
     -- Precondition:
     -- Arguments:
     --    adjust -- string
     --    adjust = string to adjust
     -- Returns: string with all first letters in upper case
     -- Returns: string with all first letters in upper case
     local r = " " .. adjust
    adjust = adjust and tostring(adjust) or ""
     local r = mw.text.decode(adjust,true)
     local i = 1
     local i = 1
     local c, j, m
     local c, j, m
     if adjust:find( "&" ) then
     m = (r ~= adjust)
        r = r:gsub( "&amp;",      "&#38;" )
    r = " "..r
            :gsub( "&lt;",      "&#60;" )
            :gsub( "&gt;",      "&#62;" )
            :gsub( "&nbsp;",    "&#160;" )
            :gsub( "&thinsp;", "&#8201;" )
            :gsub( "&zwnj;",  "&#8204;" )
            :gsub( "&zwj;",    "&#8205;" )
            :gsub( "&lrm;",    "&#8206;" )
            :gsub( "&rlm;",    "&#8207;" )
        m = true
    end
     while i do
     while i do
         i = mw.ustring.find( r, "%W%l", i )
         i = mw.ustring.find( r, "%W%l", i )
Line 511: Line 463:
     r = r:sub( 2 )
     r = r:sub( 2 )
     if m then
     if m then
        r = r:gsub(    "&#38;", "&amp;" )
    r = mw.text.encode(r)
            :gsub(    "&#60;", "&lt;" )
            :gsub(    "&#62;", "&gt;" )
            :gsub(    "&#160;", "&nbsp;" )
            :gsub(  "&#8201;", "&thinsp;" )
            :gsub(  "&#8204;", "&zwnj;" )
            :gsub(  "&#8205;", "&zwj;" )
            :gsub(  "&#8206;", "&lrm;" )
            :gsub(  "&#8207;", "&rlm;" )
            :gsub( "&#X(%x+);", "&#x%1;" )
     end
     end
     return r
     return r
end -- Text.ucfirstAll()
end -- Text.ucfirstAll()




Line 534: Line 476:
     -- Returns: string with non-latin parts enclosed in <span>
     -- Returns: string with non-latin parts enclosed in <span>
     local r
     local r
     Text.isLatinRange()
     initLatinData()
     if mw.ustring.match( adjust, PatternLatin ) then
     if mw.ustring.match( adjust, PatternLatin ) then
         -- latin only, horizontal dashes, quotes
         -- latin only, horizontal dashes, quotes
Line 622: Line 564:
     return r
     return r
end -- Text.uprightNonlatin()
end -- Text.uprightNonlatin()




Line 628: Line 569:
     local r
     local r
     if about == "quote" then
     if about == "quote" then
         factoryQuote()
         initQuoteData()
         r = { }
         r = { }
         r.QuoteLang = QuoteLang
         r.QuoteLang = QuoteLang
Line 640: Line 581:
-- Export
-- Export
local p = { }
local p = { }
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
end
end
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" )
end
end


function p.char( frame )
function p.char( frame )
Line 650: Line 603:
     end
     end
     if story then
     if story then
         local items = mw.text.split( story, "%s+" )
         local items = mw.text.split( mw.text.trim(story), "%s+" )
         if #items > 0 then
         if #items > 0 then
             local j
             local j
             lenient  = ( params.errors == "0" )
             lenient  = (yesNo(params.errors) == false)
             codes    = { }
             codes    = { }
             multiple = tonumber( params[ "*" ] )
             multiple = tonumber( params[ "*" ] )
             for k, v in pairs( items ) do
             for _, v in ipairs( items ) do
                if v:sub( 1, 1 ) == "x" then
            j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                    j = tonumber( "0" .. v )
                 table.insert( codes,  j or v )
                 elseif v == "" then
             end  
                    v = false
                else
                    j = tonumber( v )
                end
                if v then
                    table.insert( codes,  j or v )
                end
             end -- for k, v
         end
         end
     end
     end
Line 689: Line 634:
                               frame.args.format )
                               frame.args.format )
end
end
function p.containsCJK( frame )
    return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.getPlain( frame )
    return Text.getPlain( frame.args[ 1 ] or "" )
end
function p.isLatinRange( frame )
    return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.isQuote( frame )
    return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end




Line 786: Line 714:
                               tonumber( frame.args[3] ) )
                               tonumber( frame.args[3] ) )
end
end
function p.removeDiacritics( frame )
    return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
function p.sentenceTerminated( frame )
    return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.ucfirstAll( frame )
    return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.uprightNonlatin( frame )
    return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end





Revision as of 12:43, 21 July 2022

Text – Module containing methods for the manipulation of text, wikimarkup and some HTML.

Functions for templates[edit]

All methods have an unnamed parameter containing the text.

The return value is an empty string if the parameter does not meet the conditions. When the condition is matched or some result is successfully found, strings of at least one character are returned.

char
Creates a string from a list of character codes.
1
Space-separated list of character codes
*
Number of repetitions of the list in parameter 1; (Default 1).
errors
0 – Silence errors
concatParams
Combine any number of elements into a list, like table.concat() in Lua.
From a template:
1
First element; missing and empty elements are ignored.
2 3 4 5 6 …
Further list elements
From Lua
args
table (sequence) of the elements
apply
Separator between elements; defaults to |
adapt
optional formatting, which will be applied to each element; must contain %s.
containsCJK
Returns whether the input string contains any CJK characters
  • Returns nothing if there are no CJK characters
removeDelimited
Remove all text between delimiters, including the delimiters themselves.
getPlain
Remove wikimarkup (except templates): comments, tags, bold, italic, nbsp
isLatinRange
Returns some content, unless the string contains a character that would not normally be found in Latin text.
  • Returns nothing if there is a non-Latin string.
isQuote
Returns some content if the parameter passed is a single character, and that character is a quote, such as '.
  • Returns nothing for multiple characters, or if the character passed is not a quote.
listToText
Formats list elements analogously to mw.text.listToText().
The elements are separated by a comma and space ; the word "and" appears between the first and last.
Unnamed parameters become the list items.
Optional parameters for #invoke:
  • format – Every list element will first be formatted with this format string; see here for how to construct this string. The string must contain at least one %s sequence.
  • template=1 – List elements should be taken from the calling template.
Returns the resulting string.
quote
Wrap the string in quotes; quotes can be chosen for a specific language.
1
Input text (will be automatically trimmed); may be empty.
2
(optional) the ISO 639 language code for the quote marks; should be one of the supported languages Template:In lang
3
(optional) 2 for second level quotes. This means the single quote marks in a statement such as: Jack said, “Jill said ‘fish’ last Tuesday.”
quoteUnquoted
Wrap the string in quotes; quotes can be chosen for a specific language. Will not quote an empty string, and will not quote if there is a quote at the start or end of the (trimmed) string.
1
Input text (will be automatically trimmed); may be empty.
2
(optional) the ISO 639 language code for the quote marks; should be one of the supported languages Template:In lang
3
(optional) 2 for second level quotes. This means the single quote marks in a statement such as: Jack said, “Jill said ‘fish’ last Tuesday.”
removeDiacritics
Removes all diacritical marks from the input.
1
Input text
sentenceTerminated
Is this sentence terminated? Should work with CJK, and allows quotation marks to follow.
  • Returns nothing if the sentence is unterminated.
ucfirstAll
The first letter of every recognized word is converted to upper case. This contrasts with the parser function {{[[:|ucfirst:]]}} which changes only the first character of the whole string passed.
A few common HTML entities are protected; the implementation of this may mean that numerical entities passed (e.g. &#38;) are converted to &amp; form
uprightNonlatin
Takes a string. Italicized non-Latin characters are un-italicized, unless they are a single Greek letter.
zip
Combines a tuple of lists by convolution. This is easiest to explain by example: given two lists, list1 = "a b c" and list2 = "1 2 3", then
zip(liste1, liste2, sep = " ", isep = "-", osep = "/")
outputs
a-1/b-2/c-3
  • 1, 2, 3, … – Lists to be combined
  • sep – A separator (in Lua regex form) used to split the lists. If empty, the lists are split into individual characters.
  • sep1, sep2, sep3, … – Allows a different separator to be used for each list.
  • isep – Output separator; placed between elements which were at the same index in their lists.
  • osep – Output separator; placed between elements which had different original indices; i.e. between the groups joined with isep

Examples and test page[edit]

There are tests available Template:In lang to illustrate this in practice.

Use in another Lua module[edit]

All of the above functions can be called from other Lua modules. Use require(); the below code checks for errors loading it:

local lucky, Text = pcall( require, "Module:Text" )
if type( Text ) == "table" then
    Text = Text.Text()
else
    -- In the event of errors, Text is an error message.
    return "<span class=\"error\">" .. Text .. "</span>"
end

You may then call:

  • Text.char( apply, again, accept )
  • Text.concatParams( args, separator, format )
  • Text.containsCJK( s )
  • Text.removeDelimited( s )
  • Text.getPlain( s )
  • Text.isLatinRange( s )
  • Text.isQuote( c )
  • Text.listToText( table, format )
  • Text.quote( s, lang, mode )
  • Text.quoteUnquoted( s, lang, mode )
  • Text.removeDiacritics( s )
  • Text.sentenceTerminated( s )
  • Text.ucfirstAll( s )
  • Text.uprightNonlatin( s )
  • Text.zip(…)
  • Text.test( s )

Usage[edit]

This is a general library; use it anywhere.

Dependencies[edit]

None.

See also[edit]

Template:String-handling templates


local yesNo = require("Module:Yesno")
local Text = { serial = "2022-07-21",
               suite  = "Text" }
--[=[
Text utilities
]=]



-- local globals
local PatternCJK        = false
local PatternCombined   = false
local PatternLatin      = false
local PatternTerminated = false
local QuoteLang         = false
local QuoteType         = false
local RangesLatin       = false
local SeekQuote         = false

local function initLatinData()
    if not RangesLatin then
        RangesLatin = { {    7,  687 },
                        { 7531, 7578 },
                        { 7680, 7935 },
                        { 8194, 8250 } }
    end
    if not PatternLatin then
        local range
        PatternLatin = "^["
        for i = 1, #RangesLatin do
            range = RangesLatin[ i ]
            PatternLatin = PatternLatin ..
                           mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
        end    -- for i
        PatternLatin = PatternLatin .. "]*$"
    end
end

local function initQuoteData()
    -- Create quote definitions
    if not QuoteLang then
    	QuoteLang = 
    	        { af        = "bd",
                  ar        = "la",
                  be        = "labd",
                  bg        = "bd",
                  ca        = "la",
                  cs        = "bd",
                  da        = "bd",
                  de        = "bd",
                  dsb       = "bd",
                  et        = "bd",
                  el        = "lald",
                  en        = "ld",
                  es        = "la",
                  eu        = "la",
            --    fa        = "la",
                  fi        = "rd",
                  fr        = "laSPC",
                  ga        = "ld",
                  he        = "ldla",
                  hr        = "bd",
                  hsb       = "bd",
                  hu        = "bd",
                  hy        = "labd",
                  id        = "rd",
                  is        = "bd",
                  it        = "ld",
                  ja        = "x300C",
                  ka        = "bd",
                  ko        = "ld",
                  lt        = "bd",
                  lv        = "bd",
                  nl        = "ld",
                  nn        = "la",
                  no        = "la",
                  pl        = "bdla",
                  pt        = "lald",
                  ro        = "bdla",
                  ru        = "labd",
                  sk        = "bd",
                  sl        = "bd",
                  sq        = "la",
                  sr        = "bx",
                  sv        = "rd",
                  th        = "ld",
                  tr        = "ld",
                  uk        = "la",
                  zh        = "ld",
                  ["de-ch"] = "la",
                  ["en-gb"] = "lsld",
                  ["en-us"] = "ld",
                  ["fr-ch"] = "la",
                  ["it-ch"] = "la",
                  ["pt-br"] = "ldla",
                  ["zh-tw"] = "x300C",
                  ["zh-cn"] = "ld" }
    end
    if not QuoteType then
    	QuoteType = 
    	        { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                  bdla  = { { 8222, 8220 },  {  171,  187 } },
                  bx    = { { 8222, 8221 },  { 8218, 8217 } },
                  la    = { {  171,  187 },  { 8249, 8250 } },
                  laSPC = { {  171,  187 },  { 8249, 8250 },  true },
                  labd  = { {  171,  187 },  { 8222, 8220 } },
                  lald  = { {  171,  187 },  { 8220, 8221 } },
                  ld    = { { 8220, 8221 },  { 8216, 8217 } },
                  ldla  = { { 8220, 8221 },  {  171,  187 } },
                  lsld  = { { 8216, 8217 },  { 8220, 8221 } },
                  rd    = { { 8221, 8221 },  { 8217, 8217 } },
                  x300C = { { 0x300C, 0x300D },
                            { 0x300E, 0x300F } } }
    end
end -- initQuoteData()



local function fiatQuote( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code
    --     advance  -- number, with level 1 or 2
    local r = apply and tostring(apply) or ""
    alien = alien or "en"
    advance = tonumber(advance) or 0
    local suite
    initQuoteData()
    local slang = alien:match( "^(%l+)-" )
    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
    if suite then
        local quotes = QuoteType[ suite ]
        if quotes then
            local space
            if quotes[ 3 ] then
                space = "&#160;"
            else
                space = ""
            end
            quotes = quotes[ advance ]
            if quotes then
                r = mw.ustring.format( "%s%s%s%s%s",
                                       mw.ustring.char( quotes[ 1 ] ),
                                       space,
                                       apply,
                                       space,
                                       mw.ustring.char( quotes[ 2 ] ) )
            end
        else
            mw.log( "fiatQuote() " .. suite )
        end
    end
    return r
end -- fiatQuote()



Text.char = function ( apply, again, accept )
    -- Create string from codepoints
    -- Parameter:
    --     apply   -- table (sequence) with numerical codepoints, or nil
    --     again   -- number of repetitions, or nil
    --     accept  -- true, if no error messages to be appended
    -- Returns: string
    local r = ""
    apply = type(apply) == "table" and apply or {}
    again = math.floor(tonumber(again) or 1)
    if again < 1 then
    	return ""
    end
    local bad   = { }
    local codes = { }
    for _, v in ipairs( apply ) do
    	local n = tonumber(v)
    	if not n or (n < 32 and n ~= 9 and n ~= 10) then
    		table.insert(bad, tostring(v))
    	else
    		table.insert(codes, math.floor(n))
		end
    end 
    if #bad > 0 then
    	if not accept then
    		r = tostring(  mw.html.create( "span" )
                    		:addClass( "error" )
                    		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
    	end
    	return r
    end
    if #codes > 0 then
    	r = mw.ustring.char( unpack( codes ) )
    	if again > 1 then
    		r = r:rep(again)
    	end
	end
    return r
end -- Text.char()

local function trimAndFormat(args, fmt)
	local result = {}
	if type(args) ~= 'table' then
		args = {args}
	end
	for _, v in ipairs(args) do
		v = mw.text.trim(tostring(v))
		if v ~= "" then
			table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
		end
	end
	return result
end

Text.concatParams = function ( args, apply, adapt )
    -- Concat list items into one string
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     apply  -- string (optional); separator (default: "|")
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    local collect = { }
    return table.concat(trimAndFormat(args,adapt), apply or "|")
end -- Text.concatParams()



Text.containsCJK = function ( s )
    -- Is any CJK code within?
    -- Parameter:
    --     s  -- string
    -- Returns: true, if CJK detected
    s = s and tostring(s) or ""
    if not patternCJK then
        patternCJK = mw.ustring.char( 91,
        	                            4352, 45,   4607,
        	                           11904, 45,  42191,
        	                           43072, 45,  43135,
        	                           44032, 45,  55215,
        	                           63744, 45,  64255,
        	                           65072, 45,  65103,
        	                           65381, 45,  65500,
                                      131072, 45, 196607,
                                      93 )
    end
    return mw.ustring.find( s, patternCJK ) ~= nil
end -- Text.containsCJK()

Text.removeDelimited = function (s, prefix, suffix)
	-- Remove all text in s delimited by prefix and suffix (inclusive)
	-- Arguments:
	--    s = string to process
	--    prefix = initial delimiter
	--    suffix = ending delimiter
	-- Returns: stripped string
	s = s and tostring(s) or ""
	prefix = prefix and tostring(prefix) or ""
	suffix = suffix and tostring(suffix) or ""
	local prefixLen = mw.ustring.len(prefix)
	local suffixLen = mw.ustring.len(suffix)
	if prefixLen == 0 or suffixLen == 0 then
		return s
	end
	local i = s:find(prefix, 1, true)
	local r = s
	local j
	while i do
		j = r:find(suffix, i + prefixLen)
		if j then
			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
		else
			r = r:sub(1, i - 1)
		end
		i = r:find(prefix, 1, true)
	end
	return r
end

Text.getPlain = function ( adjust )
    -- Remove wikisyntax from string, except templates
    -- Parameter:
    --     adjust  -- string
    -- Returns: string
    local r = Text.removeDelimited(adjust,"<!--","-->")
    r = r:gsub( "(</?%l[^>]*>)", "" )
         :gsub( "'''", "" )
         :gsub( "''", "" )
         :gsub( "&nbsp;", " " )
    return r
end -- Text.getPlain()

Text.isLatinRange = function (s)
    -- Are characters expected to be latin or symbols within latin texts?
    -- Arguments:
    --  s = string to analyze
    -- Returns: true, if valid for latin only
    s = s and tostring(s) or ""  --- ensure input is always string
    initLatinData()
    return mw.ustring.match(s, PatternLatin) ~= nil
end -- Text.isLatinRange()



Text.isQuote = function ( s )
    -- Is this character any quotation mark?
    -- Parameter:
    --     s = single character to analyze
    -- Returns: true, if s is quotation mark
    s = s and tostring(s) or ""
    if s == "" then
    	return false
    end
    if not SeekQuote then
        SeekQuote = mw.ustring.char(   34,       -- "
                                       39,       -- '
                                      171,       -- laquo
                                      187,       -- raquo
                                     8216,       -- lsquo
                                     8217,       -- rsquo
                                     8218,       -- sbquo
                                     8220,       -- ldquo
                                     8221,       -- rdquo
                                     8222,       -- bdquo
                                     8249,       -- lsaquo
                                     8250,       -- rsaquo
                                     0x300C,     -- CJK
                                     0x300D,     -- CJK
                                     0x300E,     -- CJK
                                     0x300F )    -- CJK
    end
    return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
end -- Text.isQuote()



Text.listToText = function ( args, adapt )
    -- Format list items similar to mw.text.listToText()
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    return mw.text.listToText(trimAndFormat(args, adapt))
end -- Text.listToText()



Text.quote = function ( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: quoted string
    apply = apply and tostring(apply) or ""
    local mode, slang
    if type( alien ) == "string" then
        slang = mw.text.trim( alien ):lower()
    else
        slang = mw.title.getCurrentTitle().pageLanguage
        if not slang then
            -- TODO FIXME: Introduction expected 2017-04
            slang = mw.language.getContentLanguage():getCode()
        end
    end
    if advance == 2 then
        mode = 2
    else
        mode = 1
    end
    return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()



Text.quoteUnquoted = function ( apply, alien, advance )
    -- Quote text, if not yet quoted and not empty
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: string; possibly quoted
    local r = mw.text.trim( apply and tostring(apply) or "" )
    local s = mw.ustring.sub( r, 1, 1 )
    if s ~= ""  and  not Text.isQuote( s, advance ) then
        s = mw.ustring.sub( r, -1, 1 )
        if not Text.isQuote( s ) then
            r = Text.quote( r, alien, advance )
        end
    end
    return r
end -- Text.quoteUnquoted()



Text.removeDiacritics = function ( adjust )
    -- Remove all diacritics
    -- Parameter:
    --     adjust  -- string
    -- Returns: string; all latin letters should be ASCII
    --                  or basic greek or cyrillic or symbols etc.
    local cleanup, decomposed
    if not PatternCombined then
        PatternCombined = mw.ustring.char( 91,
                                            0x0300, 45, 0x036F,
                                            0x1AB0, 45, 0x1AFF,
                                            0x1DC0, 45, 0x1DFF,
                                            0xFE20, 45, 0xFE2F,
                                           93 )
    end
    decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
    cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
    return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()



Text.sentenceTerminated = function ( analyse )
    -- Is string terminated by dot, question or exclamation mark?
    --     Quotation, link termination and so on granted
    -- Parameter:
    --     analyse  -- string
    -- Returns: true, if sentence terminated
    local r
    if not PatternTerminated then
        PatternTerminated = mw.ustring.char( 91,
                                             12290,
                                             65281,
                                             65294,
                                             65311 )
                            .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
    end
    if mw.ustring.find( analyse, PatternTerminated ) then
        r = true
    else
        r = false
    end
    return r
end -- Text.sentenceTerminated()



Text.ucfirstAll = function ( adjust)
    -- Capitalize all words
    -- Arguments:
    --     adjust = string to adjust
    -- Returns: string with all first letters in upper case
    adjust = adjust and tostring(adjust) or ""
    local r = mw.text.decode(adjust,true)
    local i = 1
    local c, j, m
    m = (r ~= adjust)
    r = " "..r
    while i do
        i = mw.ustring.find( r, "%W%l", i )
        if i then
            j = i + 1
            c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
            r = string.format( "%s%s%s",
                               mw.ustring.sub( r, 1, i ),
                               c,
                               mw.ustring.sub( r, i + 2 ) )
            i = j
        end
    end -- while i
    r = r:sub( 2 )
    if m then
    	r = mw.text.encode(r)
    end
    return r
end -- Text.ucfirstAll()


Text.uprightNonlatin = function ( adjust )
    -- Ensure non-italics for non-latin text parts
    --     One single greek letter might be granted
    -- Precondition:
    --     adjust  -- string
    -- Returns: string with non-latin parts enclosed in <span>
    local r
    initLatinData()
    if mw.ustring.match( adjust, PatternLatin ) then
        -- latin only, horizontal dashes, quotes
        r = adjust
    else
        local c
        local j    = false
        local k    = 1
        local m    = false
        local n    = mw.ustring.len( adjust )
        local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
        local flat = function ( a )
                  -- isLatin
                  local range
                  for i = 1, #RangesLatin do
                      range = RangesLatin[ i ]
                      if a >= range[ 1 ]  and  a <= range[ 2 ] then
                          return true
                      end
                  end    -- for i
              end -- flat()
        local focus = function ( a )
                  -- char is not ambivalent
                  local r = ( a > 64 )
                  if r then
                      r = ( a < 8192  or  a > 8212 )
                  else
                      r = ( a == 38  or  a == 60 )    -- '&' '<'
                  end
                  return r
              end -- focus()
        local form = function ( a )
                return string.format( span,
                                      r,
                                      mw.ustring.sub( adjust, k, j - 1 ),
                                      mw.ustring.sub( adjust, j, a ) )
              end -- form()
        r = ""
        for i = 1, n do
            c = mw.ustring.codepoint( adjust, i, i )
            if focus( c ) then
                if flat( c ) then
                    if j then
                        if m then
                            if i == m then
                                -- single greek letter.
                                j = false
                            end
                            m = false
                        end
                        if j then
                            local nx = i - 1
                            local s  = ""
                            for ix = nx, 1, -1 do
                                c = mw.ustring.sub( adjust, ix, ix )
                                if c == " "  or  c == "(" then
                                    nx = nx - 1
                                    s  = c .. s
                                else
                                    break -- for ix
                                end
                            end -- for ix
                            r = form( nx ) .. s
                            j = false
                            k = i
                        end
                    end
                elseif not j then
                    j = i
                    if c >= 880  and  c <= 1023 then
                        -- single greek letter?
                        m = i + 1
                    else
                        m = false
                    end
                end
            elseif m then
                m = m + 1
            end
        end    -- for i
        if j  and  ( not m  or  m < n ) then
            r = form( n )
        else
            r = r .. mw.ustring.sub( adjust, k )
        end
    end
    return r
end -- Text.uprightNonlatin()


Text.test = function ( about )
    local r
    if about == "quote" then
        initQuoteData()
        r = { }
        r.QuoteLang = QuoteLang
        r.QuoteType = QuoteType
    end
    return r
end -- Text.test()



-- Export
local p = { }

for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
	end
end

for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" )
	end
end

function p.char( frame )
    local params = frame:getParent().args
    local story = params[ 1 ]
    local codes, lenient, multiple
    if not story then
        params = frame.args
        story  = params[ 1 ]
    end
    if story then
        local items = mw.text.split( mw.text.trim(story), "%s+" )
        if #items > 0 then
            local j
            lenient  = (yesNo(params.errors) == false)
            codes    = { }
            multiple = tonumber( params[ "*" ] )
            for _, v in ipairs( items ) do
            	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                table.insert( codes,  j or v )
            end 
        end
    end
    return Text.char( codes, multiple, lenient )
end

function p.concatParams( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.concatParams( args,
                              frame.args.separator,
                              frame.args.format )
end


function p.listToFormat(frame)
    local lists = {}
    local pformat = frame.args["format"]
    local sep = frame.args["sep"] or ";"

    -- Parameter parsen: Listen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], sep)
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    -- Ergebnisstring generieren
    local result = ""
    local result_line = ""
    for i = 1, maxListLen do
        result_line = pformat
        for j = 1, #lists do
            result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
        end
        result = result .. result_line
    end

    return result
end



function p.listToText( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.listToText( args, frame.args.format )
end



function p.quote( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quote( frame.args[ 1 ] or "",
                       slang,
                       tonumber( frame.args[3] ) )
end



function p.quoteUnquoted( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quoteUnquoted( frame.args[ 1 ] or "",
                               slang,
                               tonumber( frame.args[3] ) )
end


function p.zip(frame)
    local lists = {}
    local seps = {}
    local defaultsep = frame.args["sep"] or ""
    local innersep = frame.args["isep"] or ""
    local outersep = frame.args["osep"] or ""

    -- Parameter parsen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v else
            if string.sub(k, 1, 3) == "sep" then
                local sepnum = tonumber(string.sub(k, 4))
                if sepnum then seps[sepnum] = v end
            end
        end
    end
    -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
    for i = 1, math.max(#seps, #lists) do
        if not seps[i] then seps[i] = defaultsep end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], seps[i])
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    local result = ""
    for i = 1, maxListLen do
        if i ~= 1 then result = result .. outersep end
        for j = 1, #lists do
            if j ~= 1 then result = result .. innersep end
            result = result .. (lists[j][i] or "")
        end
    end
    return result
end



function p.failsafe()
    return Text.serial
end



p.Text = function ()
    return Text
end -- p.Text

return p