Allow database backend to optimize group-by-XZ operation

This commit is contained in:
sfan5 2025-02-19 22:57:58 +01:00
parent 7685e548f0
commit cd36f16775
11 changed files with 89 additions and 70 deletions

1
.gitignore vendored
View File

@ -13,3 +13,4 @@ Makefile
cmake_install.cmake
cmake_config.h
compile_commands.json
.vscode/

View File

@ -464,26 +464,20 @@ void TileGenerator::loadBlocks()
const int16_t yMin = mod16(m_yMin);
if (m_exhaustiveSearch == EXH_NEVER || m_exhaustiveSearch == EXH_Y) {
std::vector<BlockPos> vec = m_db->getBlockPos(
std::vector<BlockPos> vec = m_db->getBlockPosXZ(
BlockPos(m_geomX, yMin, m_geomY),
BlockPos(m_geomX2, yMax, m_geomY2)
);
for (auto pos : vec) {
assert(pos.x >= m_geomX && pos.x < m_geomX2);
assert(pos.y >= yMin && pos.y < yMax);
assert(pos.z >= m_geomY && pos.z < m_geomY2);
// Adjust minimum and maximum positions to the nearest block
if (pos.x < m_xMin)
m_xMin = pos.x;
if (pos.x > m_xMax)
m_xMax = pos.x;
if (pos.z < m_zMin)
m_zMin = pos.z;
if (pos.z > m_zMax)
m_zMax = pos.z;
m_xMin = mymin<int>(m_xMin, pos.x);
m_xMax = mymax<int>(m_xMax, pos.x);
m_zMin = mymin<int>(m_zMin, pos.z);
m_zMax = mymax<int>(m_zMax, pos.z);
m_positions[pos.z].emplace(pos.x);
}

View File

@ -1,5 +1,6 @@
#include <stdexcept>
#include <sstream>
#include <algorithm>
#include "db-leveldb.h"
#include "types.h"
@ -18,6 +19,12 @@ static inline std::string i64tos(int64_t i)
return os.str();
}
// finds the first position in the list where it.x >= x
#define lower_bound_x(container, find_x) \
std::lower_bound((container).begin(), (container).end(), (find_x), \
[] (const vec2 &left, int16_t right) { \
return left.x < right; \
})
DBLevelDB::DBLevelDB(const std::string &mapdir)
{
@ -25,7 +32,7 @@ DBLevelDB::DBLevelDB(const std::string &mapdir)
options.create_if_missing = false;
leveldb::Status status = leveldb::DB::Open(options, mapdir + "map.db", &db);
if (!status.ok()) {
throw std::runtime_error(std::string("Failed to open Database: ") + status.ToString());
throw std::runtime_error(std::string("Failed to open database: ") + status.ToString());
}
/* LevelDB is a dumb key-value store, so the only optimization we can do
@ -41,18 +48,24 @@ DBLevelDB::~DBLevelDB()
}
std::vector<BlockPos> DBLevelDB::getBlockPos(BlockPos min, BlockPos max)
std::vector<BlockPos> DBLevelDB::getBlockPosXZ(BlockPos min, BlockPos max)
{
std::vector<BlockPos> res;
for (const auto &it : posCache) {
if (it.first < min.z || it.first >= max.z)
const int16_t zpos = it.first;
if (zpos < min.z || zpos >= max.z)
continue;
for (auto pos2 : it.second) {
if (pos2.first < min.x || pos2.first >= max.x)
auto it2 = lower_bound_x(it.second, min.x);
for (; it2 != it.second.end(); it2++) {
const auto &pos2 = *it2;
if (pos2.x >= max.x)
break; // went past
if (pos2.y < min.y || pos2.y >= max.y)
continue;
if (pos2.second < min.y || pos2.second >= max.y)
// skip duplicates
if (!res.empty() && res.back().x == pos2.x && res.back().z == zpos)
continue;
res.emplace_back(pos2.first, pos2.second, it.first);
res.emplace_back(pos2.x, pos2.y, zpos);
}
}
return res;
@ -61,7 +74,7 @@ std::vector<BlockPos> DBLevelDB::getBlockPos(BlockPos min, BlockPos max)
void DBLevelDB::loadPosCache()
{
leveldb::Iterator * it = db->NewIterator(leveldb::ReadOptions());
leveldb::Iterator *it = db->NewIterator(leveldb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next()) {
int64_t posHash = stoi64(it->key().ToString());
BlockPos pos = decodeBlockPos(posHash);
@ -69,6 +82,9 @@ void DBLevelDB::loadPosCache()
posCache[pos.z].emplace_back(pos.x, pos.y);
}
delete it;
for (auto &it : posCache)
std::sort(it.second.begin(), it.second.end());
}
@ -81,13 +97,18 @@ void DBLevelDB::getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
auto it = posCache.find(z);
if (it == posCache.cend())
return;
for (auto pos2 : it->second) {
if (pos2.first != x)
continue;
if (pos2.second < min_y || pos2.second >= max_y)
auto it2 = lower_bound_x(it->second, x);
if (it2 == it->second.end() || it2->x != x)
return;
// it2 is now pointing to a contigous part where it2->x == x
for (; it2 != it->second.end(); it2++) {
const auto &pos2 = *it2;
if (pos2.x != x)
break; // went past
if (pos2.y < min_y || pos2.y >= max_y)
continue;
BlockPos pos(x, pos2.second, z);
BlockPos pos(x, pos2.y, z);
status = db->Get(leveldb::ReadOptions(), i64tos(encodeBlockPos(pos)), &datastr);
if (status.ok()) {
blocks.emplace_back(

View File

@ -8,7 +8,7 @@
class DBLevelDB : public DB {
public:
DBLevelDB(const std::string &mapdir);
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
int16_t min_y, int16_t max_y) override;
void getBlocksByPos(BlockList &blocks,
@ -18,11 +18,24 @@ public:
bool preferRangeQueries() const override { return false; }
private:
using pos2d = std::pair<int16_t, int16_t>;
struct vec2 {
int16_t x, y;
constexpr vec2() : x(0), y(0) {}
constexpr vec2(int16_t x, int16_t y) : x(x), y(y) {}
inline bool operator<(const vec2 &p) const
{
if (x < p.x)
return true;
if (x > p.x)
return false;
return y < p.y;
}
};
void loadPosCache();
// indexed by Z, contains all (x,y) position pairs
std::unordered_map<int16_t, std::vector<pos2d>> posCache;
leveldb::DB *db;
std::unordered_map<int16_t, std::vector<vec2>> posCache;
leveldb::DB *db = NULL;
};

View File

@ -27,10 +27,10 @@ DBPostgreSQL::DBPostgreSQL(const std::string &mapdir)
prepareStatement(
"get_block_pos",
"SELECT posX::int4, posY::int4, posZ::int4 FROM blocks WHERE"
"SELECT posX::int4, posZ::int4 FROM blocks WHERE"
" (posX BETWEEN $1::int4 AND $2::int4) AND"
" (posY BETWEEN $3::int4 AND $4::int4) AND"
" (posZ BETWEEN $5::int4 AND $6::int4)"
" (posZ BETWEEN $5::int4 AND $6::int4) GROUP BY posX, posZ"
);
prepareStatement(
"get_blocks",
@ -60,7 +60,7 @@ DBPostgreSQL::~DBPostgreSQL()
}
std::vector<BlockPos> DBPostgreSQL::getBlockPos(BlockPos min, BlockPos max)
std::vector<BlockPos> DBPostgreSQL::getBlockPosXZ(BlockPos min, BlockPos max)
{
int32_t const x1 = htonl(min.x);
int32_t const x2 = htonl(max.x - 1);
@ -83,11 +83,14 @@ std::vector<BlockPos> DBPostgreSQL::getBlockPos(BlockPos min, BlockPos max)
std::vector<BlockPos> positions;
positions.reserve(numrows);
for (int row = 0; row < numrows; ++row)
positions.emplace_back(pg_to_blockpos(results, row, 0));
BlockPos pos;
for (int row = 0; row < numrows; ++row) {
pos.x = pg_binary_to_int(results, row, 0);
pos.z = pg_binary_to_int(results, row, 1);
positions.push_back(pos);
}
PQclear(results);
return positions;
}
@ -215,12 +218,3 @@ int DBPostgreSQL::pg_binary_to_int(PGresult *res, int row, int col)
int32_t* raw = reinterpret_cast<int32_t*>(PQgetvalue(res, row, col));
return ntohl(*raw);
}
BlockPos DBPostgreSQL::pg_to_blockpos(PGresult *res, int row, int col)
{
BlockPos result;
result.x = pg_binary_to_int(res, row, col);
result.y = pg_binary_to_int(res, row, col + 1);
result.z = pg_binary_to_int(res, row, col + 2);
return result;
}

View File

@ -6,7 +6,7 @@
class DBPostgreSQL : public DB {
public:
DBPostgreSQL(const std::string &mapdir);
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
int16_t min_y, int16_t max_y) override;
void getBlocksByPos(BlockList &blocks,
@ -25,7 +25,6 @@ protected:
bool clear = true
);
int pg_binary_to_int(PGresult *res, int row, int col);
BlockPos pg_to_blockpos(PGresult *res, int row, int col);
private:
PGconn *db;

View File

@ -68,7 +68,7 @@ DBRedis::~DBRedis()
}
std::vector<BlockPos> DBRedis::getBlockPos(BlockPos min, BlockPos max)
std::vector<BlockPos> DBRedis::getBlockPosXZ(BlockPos min, BlockPos max)
{
std::vector<BlockPos> res;
for (const auto &it : posCache) {

View File

@ -9,7 +9,7 @@
class DBRedis : public DB {
public:
DBRedis(const std::string &mapdir);
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
int16_t min_y, int16_t max_y) override;
void getBlocksByPos(BlockList &blocks,

View File

@ -79,9 +79,9 @@ DBSQLite3::DBSQLite3(const std::string &mapdir)
"SELECT data FROM blocks WHERE x = ? AND y = ? AND z = ?"));
SQLOK(prepare(stmt_get_block_pos_range,
"SELECT x, y, z FROM blocks WHERE "
"SELECT x, z FROM blocks WHERE "
"x >= ? AND y >= ? AND z >= ? AND "
"x < ? AND y < ? AND z < ?"));
"x < ? AND y < ? AND z < ? GROUP BY x, z"));
} else {
SQLOK(prepare(stmt_get_blocks_z,
"SELECT pos, data FROM blocks WHERE pos BETWEEN ? AND ?"));
@ -119,7 +119,7 @@ inline void DBSQLite3::getPosRange(int64_t &min, int64_t &max,
}
std::vector<BlockPos> DBSQLite3::getBlockPos(BlockPos min, BlockPos max)
std::vector<BlockPos> DBSQLite3::getBlockPosXZ(BlockPos min, BlockPos max)
{
int result;
sqlite3_stmt *stmt;
@ -152,12 +152,13 @@ std::vector<BlockPos> DBSQLite3::getBlockPos(BlockPos min, BlockPos max)
if (newFormat) {
pos.x = sqlite3_column_int(stmt, 0);
pos.y = sqlite3_column_int(stmt, 1);
pos.z = sqlite3_column_int(stmt, 2);
pos.z = sqlite3_column_int(stmt, 1);
} else {
pos = decodeBlockPos(sqlite3_column_int64(stmt, 0));
if (pos.x < min.x || pos.x >= max.x || pos.y < min.y || pos.y >= max.y)
continue;
// note that we can't try to deduplicate these because the order
// of the encoded pos (if sorted) is ZYX.
}
positions.emplace_back(pos);
}

View File

@ -47,7 +47,7 @@ protected:
class DBSQLite3 : public DB, SQLite3Base {
public:
DBSQLite3(const std::string &mapdir);
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
int16_t min_y, int16_t max_y) override;
void getBlocksByPos(BlockList &blocks,

View File

@ -6,18 +6,15 @@
#include <utility>
#include "types.h"
struct BlockPos {
int16_t x;
int16_t y;
int16_t z;
int16_t x, y, z;
BlockPos() : x(0), y(0), z(0) {}
explicit BlockPos(int16_t v) : x(v), y(v), z(v) {}
BlockPos(int16_t x, int16_t y, int16_t z) : x(x), y(y), z(z) {}
constexpr BlockPos() : x(0), y(0), z(0) {}
explicit constexpr BlockPos(int16_t v) : x(v), y(v), z(v) {}
constexpr BlockPos(int16_t x, int16_t y, int16_t z) : x(x), y(y), z(z) {}
// Implements the inverse ordering so that (2,2,2) < (1,1,1)
bool operator < (const BlockPos &p) const
inline bool operator<(const BlockPos &p) const
{
if (z > p.z)
return true;
@ -27,11 +24,7 @@ struct BlockPos {
return true;
if (y < p.y)
return false;
if (x > p.x)
return true;
if (x < p.x)
return false;
return false;
return x > p.x;
}
};
@ -47,25 +40,28 @@ protected:
static inline BlockPos decodeBlockPos(int64_t hash);
public:
/* Return all block positions inside the range given by min and max,
* so that min.x <= x < max.x, ...
/* Return all unique (X, Z) position pairs inside area given by min and max,
* so that min.x <= x < max.x && min.z <= z < max.z
* Note: duplicates are allowed, but results in wasted time.
*/
virtual std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) = 0;
virtual std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) = 0;
/* Read all blocks in column given by x and z
* and inside the given Y range (min_y <= y < max_y) into list
*/
virtual void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
int16_t min_y, int16_t max_y) = 0;
/* Read blocks at given positions into list
*/
virtual void getBlocksByPos(BlockList &blocks,
const std::vector<BlockPos> &positions) = 0;
/* Can this database efficiently do range queries?
* (for large data sets, more efficient that brute force)
*/
virtual bool preferRangeQueries() const = 0;
virtual ~DB() {}
};