add substitute chars "éèà"(no displayed) to "eea"

This commit is contained in:
crabman77 2015-04-18 17:59:40 +02:00
parent e310eacb47
commit 11e6bccb32
2 changed files with 30 additions and 2 deletions

View File

@ -7,6 +7,20 @@ local b64e = require("mime").b64
irc.hooks = {}
irc.registered_hooks = {}
local accent_chars = { {"À", "A"},{"Á", "A"},{"Â", "A"},{"Ã", "A"},{"Ä", "A"},{"Å", "A"},{"Ç", "C"},{"È", "E"},{"É", "E"},{"Ê", "E"},
{"Ë", "E"},{"<EFBFBD>", "I"},{"Í", "I"},{"<EFBFBD>", "I"},{"Ï", "I"},{"Ñ", "N"},{"Ò", "O"},{"Ó", "O"},{"Ô", "O"},{"Õ", "O"},
{"Ö", "O"},{"<EFBFBD>", "O"},{"Ù", "U"},{"Ú", "U"},{"Û", "U"},{"Ü", "U"},{"<EFBFBD>", "Y"},{"à", "a"},{"á", "a"},{"â", "a"},
{"ã", "a"},{"ä", "a"},{"å", "a"},{"ç", "c"},{"è", "e"},{"é", "e"},{"ê", "e"},{"ë", "e"},{"ì", "i"},{"í", "i"},
{"î", "i"},{"ï", "i"},{"ñ", "n"},{"ò", "o"},{"ó", "o"},{"ô", "o"},{"õ", "o"},{"ö", "o"},{"<EFBFBD>", "o"},{"ù", "u"},
{"ú", "u"},{"û", "u"},{"ü", "u"},{"<EFBFBD>", "Y"}
}
local function subaccent(text)
for _, c in pairs(accent_chars) do
text = text:gsub(c[1], c[2])
end
return text:gsub(stripped_chars, "")
end
-- TODO: Add proper conversion from CP1252 to UTF-8.
local stripped_chars = {"\2", "\31"}
@ -18,7 +32,7 @@ stripped_chars = "["..table.concat(stripped_chars, "").."]"
local function normalize(text)
-- Strip colors
text = text:gsub("\3[0-9][0-9,]*", "")
text = subaccent(text)
return text:gsub(stripped_chars, "")
end

View File

@ -7,6 +7,20 @@ local b64e = require("mime").b64
irc.hooks = {}
irc.registered_hooks = {}
local accent_chars = { {"À", "A"},{"Á", "A"},{"Â", "A"},{"Ã", "A"},{"Ä", "A"},{"Å", "A"},{"Ç", "C"},{"È", "E"},{"É", "E"},{"Ê", "E"},
{"Ë", "E"},{"<EFBFBD>", "I"},{"Í", "I"},{"<EFBFBD>", "I"},{"Ï", "I"},{"Ñ", "N"},{"Ò", "O"},{"Ó", "O"},{"Ô", "O"},{"Õ", "O"},
{"Ö", "O"},{"<EFBFBD>", "O"},{"Ù", "U"},{"Ú", "U"},{"Û", "U"},{"Ü", "U"},{"<EFBFBD>", "Y"},{"à", "a"},{"á", "a"},{"â", "a"},
{"ã", "a"},{"ä", "a"},{"å", "a"},{"ç", "c"},{"è", "e"},{"é", "e"},{"ê", "e"},{"ë", "e"},{"ì", "i"},{"í", "i"},
{"î", "i"},{"ï", "i"},{"ñ", "n"},{"ò", "o"},{"ó", "o"},{"ô", "o"},{"õ", "o"},{"ö", "o"},{"<EFBFBD>", "o"},{"ù", "u"},
{"ú", "u"},{"û", "u"},{"ü", "u"},{"<EFBFBD>", "Y"}
}
local function subaccent(text)
for _, c in pairs(accent_chars) do
text = text:gsub(c[1], c[2])
end
return text:gsub(stripped_chars, "")
end
-- TODO: Add proper conversion from CP1252 to UTF-8.
local stripped_chars = {"\2", "\31"}
@ -18,7 +32,7 @@ stripped_chars = "["..table.concat(stripped_chars, "").."]"
local function normalize(text)
-- Strip colors
text = text:gsub("\3[0-9][0-9,]*", "")
text = subaccent(text)
return text:gsub(stripped_chars, "")
end