minetest/src/client/imagefilters.cpp

/*
Copyright (C) 2015 Aaron Suen <warr1024@gmail.com>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include "imagefilters.h"
#include "debug.h"
#include "util/numeric.h"
#include <cmath>
#include <cassert>
#include <vector>
#include <algorithm>
#include <array>

// Simple 2D bitmap class with just the functionality needed here
class Bitmap {
	u32 linesize, lines;
	std::vector<u8> data;

	static inline u32 bytepos(u32 index) { return index >> 3; }
	static inline u8 bitpos(u32 index) { return index & 7; }

public:
	Bitmap(u32 width, u32 height) :  linesize(width), lines(height),
		data(bytepos(width * height) + 1) {}

	inline bool get(u32 x, u32 y) const {
		u32 index = y * linesize + x;
		return data[bytepos(index)] & (1 << bitpos(index));
	}

	inline void set(u32 x, u32 y) {
		u32 index = y * linesize + x;
		data[bytepos(index)] |= 1 << bitpos(index);
	}

	inline bool all() const {
		for (u32 i = 0; i < data.size() - 1; i++) {
			if (data[i] != 0xff)
				return false;
		}
		// last byte not entirely filled
		for (u8 i = 0; i < bitpos(linesize * lines); i++) {
			bool value_of_bit = data.back() & (1 << i);
			if (!value_of_bit)
				return false;
		}
		return true;
	}

	inline void copy(Bitmap &to) const {
		assert(to.linesize == linesize && to.lines == lines);
		to.data = data;
	}
};

template <bool IS_A8R8G8B8>
static void imageCleanTransparentWithInlining(video::IImage *src, u32 threshold)
{
	void *const src_data = src->getData();
	const core::dimension2d<u32> dim = src->getDimension();

	auto get_pixel = [=](u32 x, u32 y) -> video::SColor {
		if constexpr (IS_A8R8G8B8) {
			return reinterpret_cast<u32 *>(src_data)[y*dim.Width + x];
		} else {
			return src->getPixel(x, y);
		}
	};
	auto set_pixel = [=](u32 x, u32 y, video::SColor color) {
		if constexpr (IS_A8R8G8B8) {
			u32 *dest = &reinterpret_cast<u32 *>(src_data)[y*dim.Width + x];
			*dest = color.color;
		} else {
			src->setPixel(x, y, color);
		}
	};

	Bitmap bitmap(dim.Width, dim.Height);

	// First pass: Mark all opaque pixels
	// Note: loop y around x for better cache locality.
	for (u32 ctry = 0; ctry < dim.Height; ctry++)
	for (u32 ctrx = 0; ctrx < dim.Width; ctrx++) {
		if (get_pixel(ctrx, ctry).getAlpha() > threshold)
			bitmap.set(ctrx, ctry);
	}

	// Exit early if all pixels opaque
	if (bitmap.all())
		return;

	Bitmap newmap = bitmap;

	// Cap iterations to keep runtime reasonable, for higher-res textures we can
	// get away with filling less pixels.
	int iter_max = 11 - std::max(dim.Width, dim.Height) / 16;
	iter_max = std::max(iter_max, 2);

	// Then repeatedly look for transparent pixels, filling them in until
	// we're finished.
	for (int iter = 0; iter < iter_max; iter++) {

	for (u32 ctry = 0; ctry < dim.Height; ctry++)
	for (u32 ctrx = 0; ctrx < dim.Width; ctrx++) {
		// Skip pixels we have already processed
		if (bitmap.get(ctrx, ctry))
			continue;

		// Sample size and total weighted r, g, b values
		u32 ss = 0, sr = 0, sg = 0, sb = 0;

		// Walk each neighbor pixel (clipped to image bounds)
		for (u32 sy = (ctry < 1) ? 0 : (ctry - 1);
				sy <= (ctry + 1) && sy < dim.Height; sy++)
		for (u32 sx = (ctrx < 1) ? 0 : (ctrx - 1);
				sx <= (ctrx + 1) && sx < dim.Width; sx++) {
			// Ignore pixels we haven't processed
			if (!bitmap.get(sx, sy))
				continue;

			// Add RGB values weighted by alpha IF the pixel is opaque, otherwise
			// use full weight since we want to propagate colors.
			// FIXME: But why are we weighting them more than opaque pixels?
			video::SColor d = get_pixel(sx, sy);
			u32 a = d.getAlpha() <= threshold ? 255 : d.getAlpha();
			ss += a;
			sr += a * d.getRed();
			sg += a * d.getGreen();
			sb += a * d.getBlue();
		}

		// Set pixel to average weighted by alpha
		if (ss > 0) {
			video::SColor c = get_pixel(ctrx, ctry);
			c.setRed(sr / ss);
			c.setGreen(sg / ss);
			c.setBlue(sb / ss);
			set_pixel(ctrx, ctry, c);
			newmap.set(ctrx, ctry);
		}
	}

	if (newmap.all())
		return;

	// Apply changes to bitmap for next run. This is done so we don't introduce
	// a bias in color propagation in the direction pixels are processed.
	newmap.copy(bitmap);

	}
}

static void imageCleanTransparentNew(video::IImage *src, u32 threshold)
{
	// with threshold = 127, the average of the whole texture is far too dominant
	threshold = 0; //TODO

	using ImgLvl = std::pair<u32 *, core::dimension2d<u32>>;

	sanity_check(src->getColorFormat() == video::ECF_A8R8G8B8);

	// Early return if no alpha < threshold
	{
		const core::dimension2d<u32> dim = src->getDimension();
		u32 *const data = reinterpret_cast<u32 *>(src->getData());
		bool has_transparent = false;
		for (u32 idx = 0; idx < dim.Width * dim.Height; ++idx) {
			if (video::SColor(data[idx]).getAlpha() <= threshold) {
				has_transparent = true;
				break;
			}
		}
		if (!has_transparent)
			return;
	}

	// Step 0: Allocate images
	// levels[i+1] is 2 times smaller than levels[i], rounded up

	std::vector<std::unique_ptr<u32[]>> level_ups;
	std::vector<ImgLvl> levels;
	{
		core::dimension2d<u32> dim = src->getDimension();
		levels.emplace_back(reinterpret_cast<u32 *>(src->getData()), dim);

		while (dim.Width > 1 || dim.Height > 1) {
			dim.Width  = (dim.Width  + 1) / 2;
			dim.Height = (dim.Height + 1) / 2;
			auto data = std::unique_ptr<u32[]>(new u32[dim.Width * dim.Height]);
			levels.emplace_back(data.get(), dim);
			level_ups.push_back(std::move(data));
		}
	}

	if (levels.size() <= 1) {
		// just one pixel. can't do anything
		return;
	}

	// Step 1: Scale down

	auto mix4cols = [](std::array<video::SColor, 4> colors) -> video::SColor {
		u32 sr = 0, sg = 0, sb = 0, sa = 0;
		auto add_color = [&](video::SColor c) {
			u32 alph = c.getAlpha();
			sr += alph * c.getRed();
			sg += alph * c.getGreen();
			sb += alph * c.getBlue();
			sa += alph;
		};
		for (auto c : colors)
			add_color(c);
		if (sa == 0)
			return 0;
		//~ if (sa == 255 * 4) { // common case
			//~ sr = 0, sg = 0, sb = 0;
			//~ for (auto c : colors) {
				//~ sr += c.getRed();
				//~ sg += c.getGreen();
				//~ sb += c.getBlue();
			//~ }
			//~ sr /= 4;
			//~ sg /= 4;
			//~ sb /= 4;
			//~ return video::SColor(255, sr, sg, sb);
		//~ }
		//~ u64 d = (1 << 16) / sa;
		//~ sr = (sr * d) >> 16;
		//~ sg = (sg * d) >> 16;
		//~ sb = (sb * d) >> 16;
		//~ sa = ((sa + 1) * d) >> 16;

		sr /= sa;
		sg /= sa;
		sb /= sa;
		sa = (sa + 1) / 4; // +1 for better rounding // TODO: maybe always round up, to make sure colors are preserved? (+3)
		return video::SColor(sa, sr, sg, sb);
	};

	for (size_t lvl = 0; lvl + 1 < levels.size(); ++lvl) {
		u32 *const data_large = levels[lvl].first;
		u32 *const data_small = levels[lvl+1].first;
		const core::dimension2d<u32> dim_large = levels[lvl].second;
		const core::dimension2d<u32> dim_small = levels[lvl+1].second;

		// round dim_large down. odd rows and columns are handled separately
		u32 idx_small = 0;
		u32 idx_large = 0; // index of upper left pixel in large image
		u32 y_small;
		for (y_small = 0; y_small < dim_large.Height / 2; ++y_small) {
			u32 x_small;
			for (x_small = 0; x_small < dim_large.Width / 2; ++x_small) {
				assert(idx_small == y_small * dim_small.Width + x_small);
				assert(idx_large == y_small * 2 * dim_large.Width + x_small * 2);

				data_small[idx_small] = mix4cols({
						data_large[idx_large],
						data_large[idx_large + 1],
						data_large[idx_large + dim_large.Width],
						data_large[idx_large + dim_large.Width + 1],
					}).color;

				idx_small += 1;
				idx_large += 2;
			}

			// odd column
			if (x_small != dim_small.Width) {
				assert(idx_small == y_small * dim_small.Width + x_small);
				assert(idx_large == y_small * 2 * dim_large.Width + x_small * 2);

				data_small[idx_small] = mix4cols({
						data_large[idx_large],
						0,
						data_large[idx_large + dim_large.Width],
						0,
					}).color;

				idx_small += 1;
				idx_large += 1;
			}

			idx_large += dim_large.Width;
		}

		// odd row
		if (y_small != dim_small.Height) {
			u32 x_small;
			for (x_small = 0; x_small < dim_large.Width / 2; ++x_small) {
				assert(idx_small == y_small * dim_small.Width + x_small);
				assert(idx_large == y_small * 2 * dim_large.Width + x_small * 2);

				data_small[idx_small] = mix4cols({
						data_large[idx_large],
						data_large[idx_large + 1],
						0,
						0,
					}).color;

				idx_small += 1;
				idx_large += 2;
			}

			// odd column (corner pixel)
			if (x_small != dim_small.Width) {
				assert(idx_small == y_small * dim_small.Width + x_small);
				assert(idx_large == y_small * 2 * dim_large.Width + x_small * 2);

				//~ data_small[idx_small] = data_large[idx_large];
				data_small[idx_small] = mix4cols({
						data_large[idx_large],
						0,
						0,
						0,
					}).color;

				idx_small += 1;
				idx_large += 1;
			}
		}
	}

	// Step 2: Propagate back
	// If a pixel's alpha is < threshold, we sample the smaller level with bilinear
	// interpolation.

	for (int lvl = levels.size() - 2; lvl >= 0; --lvl) {
		u32 *const data_large = levels[lvl].first;
		u32 *const data_small = levels[lvl+1].first;
		const core::dimension2d<u32> dim_large = levels[lvl].second;
		const core::dimension2d<u32> dim_small = levels[lvl+1].second;

		bool even_width = !(dim_large.Width & 1);
		bool even_height = !(dim_large.Height & 1);

		// c0 is near, c1 middle-far
		auto bilinear_filter_2 = [](video::SColor c0, video::SColor c1) -> video::SColor {
			u8 r = std::min<u32>(255, (c0.getRed()   * 3 + c1.getRed()   + 1) / 4);
			u8 g = std::min<u32>(255, (c0.getGreen() * 3 + c1.getGreen() + 1) / 4);
			u8 b = std::min<u32>(255, (c0.getBlue()  * 3 + c1.getBlue()  + 1) / 4);
			u8 a = std::min<u32>(255, (c0.getAlpha() * 3 + c1.getAlpha() + 1) / 4);
			return video::SColor(a, r, g, b);
			//~ return c0;
		};

		// c0 is near, c1 and c2 middle-far, c3 far
		// we sample in the quarter of c0:
		// +----+----+
		// |    |    |
		// | c0 | c1 |
		// |   x|    |
		// +----+----+
		// |    |    |
		// | c2 | c3 |
		// |    |    |
		// +----+----+
		auto bilinear_filter_4 = [](video::SColor c0, video::SColor c1,
				video::SColor c2, video::SColor c3) -> video::SColor {
			//~ return c0 * 0.75 * 0.75 + (c1 + c2) * 0.25 * 0.75 + c3 * 0.25 * 0.25;
			u8 r = std::min<u32>(255, (c0.getRed()   * 3 * 3 + (c1.getRed()   + c2.getRed())   * 1 * 3 + c3.getRed()   * 1 * 1 + 7) / 16);
			u8 g = std::min<u32>(255, (c0.getGreen() * 3 * 3 + (c1.getGreen() + c2.getGreen()) * 1 * 3 + c3.getGreen() * 1 * 1 + 7) / 16);
			u8 b = std::min<u32>(255, (c0.getBlue()  * 3 * 3 + (c1.getBlue()  + c2.getBlue())  * 1 * 3 + c3.getBlue()  * 1 * 1 + 7) / 16);
			u8 a = std::min<u32>(255, (c0.getAlpha() * 3 * 3 + (c1.getAlpha() + c2.getAlpha()) * 1 * 3 + c3.getAlpha() * 1 * 1 + 7) / 16);
			return video::SColor(a, r, g, b);
			//~ return c0;
		};

		// Corners
		auto handle_pixel_from_1 = [&](u32 idx_large, u32 idx_small) {
			u8 alpha = video::SColor(data_large[idx_large]).getAlpha();
			if (alpha <= threshold) {
				video::SColor col = data_small[idx_small];
				col.setAlpha(alpha);
				data_large[idx_large] = col.color;
			}
		};
		handle_pixel_from_1(0, 0); // (0,0)
		if (even_width)
			handle_pixel_from_1(dim_large.Width - 1, dim_small.Width - 1); // (b,0)
		if (even_height)
			handle_pixel_from_1(dim_large.Width * (dim_large.Height - 1),
					dim_small.Width * (dim_small.Height - 1)); // (0,b)
		if (even_height && even_width)
			handle_pixel_from_1(dim_large.Width * dim_large.Height - 1,
					dim_small.Width * dim_small.Height - 1); // (b,b)

		// Borders (without corners)
		auto handle_pixel_from_2 = [&](u32 idx_large, u32 idx_small_0, u32 idx_small_1) {
			u8 alpha = video::SColor(data_large[idx_large]).getAlpha();
			if (alpha <= threshold) {
				video::SColor col = bilinear_filter_2(data_small[idx_small_0],
						data_small[idx_small_1]);
				col.setAlpha(alpha);
				data_large[idx_large] = col.color;
			}
		};
		// top row
		{
			u32 idx_large = 1; // (1,0)
			u32 idx_small = 0; // (0,0)
			for (u32 x_small = 0; x_small + 1 < dim_small.Width; ++x_small) {
				// left pixel
				handle_pixel_from_2(idx_large, idx_small, idx_small + 1);
				idx_large += 1;
				// right pixel
				handle_pixel_from_2(idx_large, idx_small + 1, idx_small);
				idx_large += 1;
				idx_small += 1;
			}
		}
		// bottom row
		if (even_height) {
			u32 idx_large = dim_large.Width * (dim_large.Height - 1) + 1; // (1,b)
			u32 idx_small = dim_small.Width * (dim_small.Height - 1); // (0,b)
			for (u32 x_small = 0; x_small + 1 < dim_small.Width; ++x_small) {
				// left pixel
				handle_pixel_from_2(idx_large, idx_small, idx_small + 1);
				idx_large += 1;
				// right pixel
				handle_pixel_from_2(idx_large, idx_small + 1, idx_small);
				idx_large += 1;
				idx_small += 1;
			}
		}
		// left column
		{
			u32 idx_large = dim_large.Width; // (0,1)
			u32 idx_small = 0; // (0,0)
			for (u32 y_small = 0; y_small + 1 < dim_small.Height; ++y_small) {
				// left pixel
				handle_pixel_from_2(idx_large, idx_small, idx_small + dim_small.Width);
				idx_large += dim_large.Width;
				// right pixel
				handle_pixel_from_2(idx_large, idx_small + dim_small.Width, idx_small);
				idx_large += dim_large.Width;
				idx_small += dim_small.Width;
			}
		}
		// right column
		if (even_width) {
			u32 idx_large = dim_large.Width * 2 - 1; // (b,1)
			u32 idx_small = dim_small.Width - 1; // (b,0)
			for (u32 y_small = 0; y_small + 1 < dim_small.Height; ++y_small) {
				// left pixel
				handle_pixel_from_2(idx_large, idx_small, idx_small + dim_small.Width);
				idx_large += dim_large.Width;
				// right pixel
				handle_pixel_from_2(idx_large, idx_small + dim_small.Width, idx_small);
				idx_large += dim_large.Width;
				idx_small += dim_small.Width;
			}
		}

		// Inner pixels
		auto handle_pixel_from_4 = [&](u32 idx_large, u32 idx_small_0, u32 idx_small_1,
				u32 idx_small_2, u32 idx_small_3) {
			u8 alpha = video::SColor(data_large[idx_large]).getAlpha();
			if (alpha <= threshold) {
				video::SColor col = bilinear_filter_4(data_small[idx_small_0],
						data_small[idx_small_1], data_small[idx_small_2],
						data_small[idx_small_3]);
				col.setAlpha(alpha);
				data_large[idx_large] = col.color;
			}
		};
		{
			//~ u32 idx_large = dim_large.Width + 1; // (1,1)
			u32 idx_small = 0; // (0,0)
			for (u32 y_small = 0; y_small + 1 < dim_small.Height; ++y_small) {
				u32 idx_large = (y_small * 2 + 1) * dim_large.Width + 1; // (1,y)
				for (u32 x_small = 0; x_small + 1 < dim_small.Width; ++x_small) {
					assert(idx_small == y_small * dim_small.Width + x_small);
					// left up
					handle_pixel_from_4(idx_large,
							idx_small,
							idx_small + 1,
							idx_small + dim_small.Width,
							idx_small + dim_small.Width + 1
						);
					// right up
					handle_pixel_from_4(idx_large + 1,
							idx_small + 1,
							idx_small,
							idx_small + dim_small.Width + 1,
							idx_small + dim_small.Width
						);
					// left down
					handle_pixel_from_4(idx_large + dim_large.Width,
							idx_small + dim_small.Width,
							idx_small + dim_small.Width + 1,
							idx_small,
							idx_small + 1
						);
					// right down
					handle_pixel_from_4(idx_large + dim_large.Width + 1,
							idx_small + dim_small.Width + 1,
							idx_small + dim_small.Width,
							idx_small + 1,
							idx_small
						);
					idx_small += 1;
					idx_large += 2;
				}
				idx_large += dim_large.Width;
				idx_small += 1;
			}
		}
	}
}

/* Fill in RGB values for transparent pixels, to correct for odd colors
 * appearing at borders when blending.  This is because many PNG optimizers
 * like to discard RGB values of transparent pixels, but when blending then
 * with non-transparent neighbors, their RGB values will show up nonetheless.
 *
 * This function modifies the original image in-place.
 *
 * Parameter "threshold" is the alpha level below which pixels are considered
 * transparent. Should be 127 when the texture is used with ALPHA_CHANNEL_REF,
 * 0 when alpha blending is used.
 * FIXME: Why threshold? PNG optimizers only do their stuff if alpha = 0.
 */
void imageCleanTransparent(video::IImage *src, u32 threshold)
{
	if (src->getColorFormat() == video::ECF_A8R8G8B8)
		//~ imageCleanTransparentWithInlining<true>(src, threshold);
		imageCleanTransparentNew(src, threshold);
	else
		imageCleanTransparentWithInlining<false>(src, threshold);
}

/* Scale a region of an image into another image, using nearest-neighbor with
 * anti-aliasing; treat pixels as crisp rectangles, but blend them at boundaries
 * to prevent non-integer scaling ratio artifacts.  Note that this may cause
 * some blending at the edges where pixels don't line up perfectly, but this
 * filter is designed to produce the most accurate results for both upscaling
 * and downscaling.
 */
void imageScaleNNAA(video::IImage *src, const core::rect<s32> &srcrect, video::IImage *dest)
{
	double sx, sy, minsx, maxsx, minsy, maxsy, area, ra, ga, ba, aa, pw, ph, pa;
	u32 dy, dx;
	video::SColor pxl;

	// Cache rectangle boundaries.
	double sox = srcrect.UpperLeftCorner.X * 1.0;
	double soy = srcrect.UpperLeftCorner.Y * 1.0;
	double sw = srcrect.getWidth() * 1.0;
	double sh = srcrect.getHeight() * 1.0;

	// Walk each destination image pixel.
	// Note: loop y around x for better cache locality.
	core::dimension2d<u32> dim = dest->getDimension();
	for (dy = 0; dy < dim.Height; dy++)
	for (dx = 0; dx < dim.Width; dx++) {

		// Calculate floating-point source rectangle bounds.
		// Do some basic clipping, and for mirrored/flipped rects,
		// make sure min/max are in the right order.
		minsx = sox + (dx * sw / dim.Width);
		minsx = rangelim(minsx, 0, sox + sw);
		maxsx = minsx + sw / dim.Width;
		maxsx = rangelim(maxsx, 0, sox + sw);
		if (minsx > maxsx)
			SWAP(double, minsx, maxsx);
		minsy = soy + (dy * sh / dim.Height);
		minsy = rangelim(minsy, 0, soy + sh);
		maxsy = minsy + sh / dim.Height;
		maxsy = rangelim(maxsy, 0, soy + sh);
		if (minsy > maxsy)
			SWAP(double, minsy, maxsy);

		// Total area, and integral of r, g, b values over that area,
		// initialized to zero, to be summed up in next loops.
		area = 0;
		ra = 0;
		ga = 0;
		ba = 0;
		aa = 0;

		// Loop over the integral pixel positions described by those bounds.
		for (sy = floor(minsy); sy < maxsy; sy++)
		for (sx = floor(minsx); sx < maxsx; sx++) {

			// Calculate width, height, then area of dest pixel
			// that's covered by this source pixel.
			pw = 1;
			if (minsx > sx)
				pw += sx - minsx;
			if (maxsx < (sx + 1))
				pw += maxsx - sx - 1;
			ph = 1;
			if (minsy > sy)
				ph += sy - minsy;
			if (maxsy < (sy + 1))
				ph += maxsy - sy - 1;
			pa = pw * ph;

			// Get source pixel and add it to totals, weighted
			// by covered area and alpha.
			pxl = src->getPixel((u32)sx, (u32)sy);
			area += pa;
			ra += pa * pxl.getRed();
			ga += pa * pxl.getGreen();
			ba += pa * pxl.getBlue();
			aa += pa * pxl.getAlpha();
		}

		// Set the destination image pixel to the average color.
		if (area > 0) {
			pxl.setRed(ra / area + 0.5);
			pxl.setGreen(ga / area + 0.5);
			pxl.setBlue(ba / area + 0.5);
			pxl.setAlpha(aa / area + 0.5);
		} else {
			pxl.setRed(0);
			pxl.setGreen(0);
			pxl.setBlue(0);
			pxl.setAlpha(0);
		}
		dest->setPixel(dx, dy, pxl);
	}
}

/* Check and align image to npot2 if required by hardware
 * @param image image to check for npot2 alignment
 * @param driver driver to use for image operations
 * @return image or copy of image aligned to npot2
 */
video::IImage *Align2Npot2(video::IImage *image, video::IVideoDriver *driver)
{
	if (image == nullptr)
		return image;

	if (driver->queryFeature(video::EVDF_TEXTURE_NPOT))
		return image;

	core::dimension2d<u32> dim = image->getDimension();
	unsigned int height = npot2(dim.Height);
	unsigned int width  = npot2(dim.Width);

	if (dim.Height == height && dim.Width == width)
		return image;

	if (dim.Height > height)
		height *= 2;
	if (dim.Width > width)
		width *= 2;

	video::IImage *targetimage =
			driver->createImage(video::ECF_A8R8G8B8,
					core::dimension2d<u32>(width, height));

	if (targetimage != nullptr)
		image->copyToScaling(targetimage);
	image->drop();
	return targetimage;
}