From 2b096f050d8ef46ec0a669ea4e125ae79745090e Mon Sep 17 00:00:00 2001
From: Christopher Head <chead@chead.ca>
Date: Sat, 13 Jan 2018 11:27:01 -0800
Subject: [PATCH] Limit and optimize digiline_send (#379)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Close vulnerability and optimize digiline_send

`digiline_send` as it previously existed was vulnerable to a
time-of-check-to-time-of-use vulnerability in which a table could be
sent, size-checked, and then modified after the send but before
delivery. This would allow larger tables to be sent. It was also slow
because it called `minetest.serialize`. Fix both of these by
implementing custom message cleanup logic which simultaneously computes
the message’s cost.

* Clean up interaction with Digilines

Use `minetest.global_exists` to avoid an undefined global variable
warning when operating a Luacontroller with Digilines not available. Use
the new `digilines` table in preference to the old `digiline` table.

* Copy received messages

When a Digiline message is received at a Luacontroller, copy it so that
local modifications made by the Luacontroller code will not modify
copies of the table that are being passed to other nodes on the Digiline
network.
---
 mesecons_luacontroller/init.lua | 104 ++++++++++++++++++++++++++++----
 1 file changed, 93 insertions(+), 11 deletions(-)

diff --git a/mesecons_luacontroller/init.lua b/mesecons_luacontroller/init.lua
index 85d16fa..0e3440e 100644
--- a/mesecons_luacontroller/init.lua
+++ b/mesecons_luacontroller/init.lua
@@ -270,27 +270,108 @@ local function get_interrupt(pos)
 end
 
 
+-- Given a message object passed to digiline_send, clean it up into a form
+-- which is safe to transmit over the network and compute its "cost" (a very
+-- rough estimate of its memory usage).
+--
+-- The cleaning comprises the following:
+-- 1. Functions (and userdata, though user scripts ought not to get hold of
+--    those in the first place) are removed, because they break the model of
+--    Digilines as a network that carries basic data, and they could exfiltrate
+--    references to mutable objects from one Luacontroller to another, allowing
+--    inappropriate high-bandwidth, no-wires communication.
+-- 2. Tables are duplicated because, being mutable, they could otherwise be
+--    modified after the send is complete in order to change what data arrives
+--    at the recipient, perhaps in violation of the previous cleaning rule or
+--    in violation of the message size limit.
+--
+-- The cost indication is only approximate; it’s not a perfect measurement of
+-- the number of bytes of memory used by the message object.
+--
+-- Parameters:
+-- msg -- the message to clean
+-- back_references -- for internal use only; do not provide
+--
+-- Returns:
+-- 1. The cleaned object.
+-- 2. The approximate cost of the object.
+local function clean_and_weigh_digiline_message(msg, back_references)
+	local t = type(msg)
+	if t == "string" then
+		-- Strings are immutable so can be passed by reference, and cost their
+		-- length plus the size of the Lua object header (24 bytes on a 64-bit
+		-- platform) plus one byte for the NUL terminator.
+		return msg, #msg + 25
+	elseif t == "number" then
+		-- Numbers are passed by value so need not be touched, and cost 8 bytes
+		-- as all numbers in Lua are doubles.
+		return msg, 8
+	elseif t == "boolean" then
+		-- Booleans are passed by value so need not be touched, and cost 1
+		-- byte.
+		return msg, 1
+	elseif t == "table" then
+		-- Tables are duplicated. Check if this table has been seen before
+		-- (self-referential or shared table); if so, reuse the cleaned value
+		-- of the previous occurrence, maintaining table topology and avoiding
+		-- infinite recursion, and charge zero bytes for this as the object has
+		-- already been counted.
+		back_references = back_references or {}
+		local bref = back_references[msg]
+		if bref then
+			return bref, 0
+		end
+		-- Construct a new table by cleaning all the keys and values and adding
+		-- up their costs, plus 8 bytes as a rough estimate of table overhead.
+		local cost = 8
+		local ret = {}
+		back_references[msg] = ret
+		for k, v in pairs(msg) do
+			local k_cost, v_cost
+			k, k_cost = clean_and_weigh_digiline_message(k, back_references)
+			v, v_cost = clean_and_weigh_digiline_message(v, back_references)
+			if k ~= nil and v ~= nil then
+				-- Only include an element if its key and value are of legal
+				-- types.
+				ret[k] = v
+			end
+			-- If we only counted the cost of a table element when we actually
+			-- used it, we would be vulnerable to the following attack:
+			-- 1. Construct a huge table (too large to pass the cost limit).
+			-- 2. Insert it somewhere in a table, with a function as a key.
+			-- 3. Insert it somewhere in another table, with a number as a key.
+			-- 4. The first occurrence doesn’t pay the cost because functions
+			--    are stripped and therefore the element is dropped.
+			-- 5. The second occurrence doesn’t pay the cost because it’s in
+			--    back_references.
+			-- By counting the costs regardless of whether the objects will be
+			-- included, we avoid this attack; it may overestimate the cost of
+			-- some messages, but only those that won’t be delivered intact
+			-- anyway because they contain illegal object types.
+			cost = cost + k_cost + v_cost
+		end
+		return ret, cost
+	else
+		return nil, 0
+	end
+end
+
+
 local function get_digiline_send(pos)
-	if not digiline then return end
+	if not minetest.global_exists("digilines") then return end
 	return function(channel, msg)
 		-- Make sure channel is string, number or boolean
 		if (type(channel) ~= "string" and type(channel) ~= "number" and type(channel) ~= "boolean") then
 			return false
 		end
 
-		-- It is technically possible to send functions over the wire since
-		-- the high performance impact of stripping those from the data has
-		-- been decided to not be worth the added realism.
-		-- Make sure serialized version of the data is not insanely long to
-		-- prevent DoS-like attacks
-		local msg_ser = minetest.serialize(msg)
-		if #msg_ser > mesecon.setting("luacontroller_digiline_maxlen", 50000) then
+		local msg_cost
+		msg, msg_cost = clean_and_weigh_digiline_message(msg)
+		if msg == nil or msg_cost > mesecon.setting("luacontroller_digiline_maxlen", 50000) then
 			return false
 		end
 
-		minetest.after(0, function()
-			digiline:receptor_send(pos, digiline.rules.default, channel, msg)
-		end)
+		minetest.after(0, digilines.receptor_send, pos, digilines.rules.default, channel, msg)
 		return true
 	end
 end
@@ -518,6 +599,7 @@ local digiline = {
 	receptor = {},
 	effector = {
 		action = function(pos, node, channel, msg)
+			msg = clean_and_weigh_digiline_message(msg)
 			run(pos, {type = "digiline", channel = channel, msg = msg})
 		end
 	}