Fully support UTF-8 decoding
`Utf8ToAnsi` now parses all UTF-8 code points, chars not in `utf8_decode` are represented by `&#u<hex>;` sequence. Note: `&#x...;` and `&#u...;` have different meanings. The former represents a char with an individual PNG glyph, and the latter a char without one (and have to fallback to Unifont).
This commit is contained in:
parent
2da37c48c0
commit
2ba26cc9cc
50
encoding.lua
50
encoding.lua
|
@ -259,9 +259,13 @@ function AnsiToUtf8(s)
|
|||
end
|
||||
|
||||
function Utf8ToAnsi(s)
|
||||
local a, j, r, b, scope = 0, 0, ""
|
||||
local r, b = ""
|
||||
local scope
|
||||
local j, l, u
|
||||
for i = 1, s and s:len() or 0 do
|
||||
b = s:byte(i)
|
||||
|
||||
-- legacy parser
|
||||
if b == 0x26 then
|
||||
r = r .. "&"
|
||||
elseif b < 128 then
|
||||
|
@ -275,15 +279,53 @@ function Utf8ToAnsi(s)
|
|||
scope = scope[b]
|
||||
if "string" == type(scope) then
|
||||
r, scope = r .. scope
|
||||
j = -1 -- supress general UTF-8 parser
|
||||
end
|
||||
else
|
||||
r, scope = r .. "_"
|
||||
scope = nil
|
||||
end
|
||||
elseif utf8_decode[b] then
|
||||
scope = utf8_decode[b]
|
||||
else
|
||||
r = r .. "_"
|
||||
end
|
||||
|
||||
-- general UTF-8 parser
|
||||
if j == -1 then -- supressed by legacy parser
|
||||
j, l, u = nil
|
||||
elseif b < 0x80 then
|
||||
if j then
|
||||
r = r .. "&#ufffd;"
|
||||
j, l, u = nil
|
||||
end
|
||||
-- ASCII handled by legacy parser
|
||||
elseif b >= 0xc0 then
|
||||
if j then
|
||||
r = r .. "&#ufffd;"
|
||||
end
|
||||
j = i
|
||||
if b >= 0xf8 then
|
||||
r = r .. "&#ufffd;"
|
||||
j, l, u = nil
|
||||
elseif b >= 0xf0 then
|
||||
l, u = 4, b % (2 ^ 3)
|
||||
elseif b >= 0xe0 then
|
||||
l, u = 3, b % (2 ^ 4)
|
||||
else
|
||||
l, u = 2, b % (2 ^ 5)
|
||||
end
|
||||
else
|
||||
if j then
|
||||
u = u * (2 ^ 6) + b % (2 ^ 6)
|
||||
if i == j + l - 1 then
|
||||
r = r .. string.format("&#u%x;", u)
|
||||
j, l, u = nil
|
||||
end
|
||||
else
|
||||
r = r .. "&#ufffd;"
|
||||
end
|
||||
end
|
||||
end
|
||||
if j then
|
||||
r = r .. "&#ufffd;"
|
||||
end
|
||||
return r
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue