|
|
|
@@ -122,6 +122,12 @@ namespace {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CachedMeshBuffer::drop()
|
|
|
|
|
{
|
|
|
|
|
for (auto *it : buf)
|
|
|
|
|
it->drop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
ClientMap
|
|
|
|
|
*/
|
|
|
|
@@ -191,6 +197,9 @@ void ClientMap::onSettingChanged(std::string_view name, bool all)
|
|
|
|
|
ClientMap::~ClientMap()
|
|
|
|
|
{
|
|
|
|
|
g_settings->deregisterAllChangedCallbacks(this);
|
|
|
|
|
|
|
|
|
|
for (auto &it : m_dynamic_buffers)
|
|
|
|
|
it.second.drop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ClientMap::updateCamera(v3f pos, v3f dir, f32 fov, v3s16 offset, video::SColor light_color)
|
|
|
|
@@ -788,27 +797,24 @@ void MeshBufListMaps::addFromBlock(v3s16 block_pos, MapBlockMesh *block_mesh,
|
|
|
|
|
* @param src buffer list
|
|
|
|
|
* @param dst draw order
|
|
|
|
|
* @param get_world_pos returns translation for a buffer
|
|
|
|
|
* @param buffer_trash output container for temporary mesh buffers
|
|
|
|
|
* @param dynamic_buffers cache structure for merged buffers
|
|
|
|
|
* @return number of buffers that were merged
|
|
|
|
|
*/
|
|
|
|
|
template <typename F, typename C>
|
|
|
|
|
template <typename F>
|
|
|
|
|
static u32 transformBuffersToDrawOrder(
|
|
|
|
|
const MeshBufListMaps::MeshBufList &src, DrawDescriptorList &draw_order,
|
|
|
|
|
F get_world_pos, C &buffer_trash)
|
|
|
|
|
F get_world_pos, CachedMeshBuffers &dynamic_buffers)
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* This is a tradeoff between time spent merging buffers and time spent
|
|
|
|
|
* due to excess drawcalls.
|
|
|
|
|
* Testing has shown that the ideal value is in the low hundreds, as extra
|
|
|
|
|
* CPU work quickly eats up the benefits.
|
|
|
|
|
* CPU work quickly eats up the benefits (though alleviated by a cache).
|
|
|
|
|
* In MTG landscape scenes this was found to save around 20-40% of drawcalls.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: if you attempt to test this with quicktune, it won't give you valid
|
|
|
|
|
* results since HW buffers stick around and Irrlicht handles large amounts
|
|
|
|
|
* inefficiently.
|
|
|
|
|
*
|
|
|
|
|
* TODO: as a next step we should cache merged meshes, so they do not need
|
|
|
|
|
* to be re-built *and* can be kept in GPU memory.
|
|
|
|
|
*/
|
|
|
|
|
const u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices");
|
|
|
|
|
|
|
|
|
@@ -826,6 +832,51 @@ static u32 transformBuffersToDrawOrder(
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// iterate in reverse to get closest blocks first
|
|
|
|
|
std::vector<std::pair<v3f, scene::IMeshBuffer*>> to_merge;
|
|
|
|
|
for (auto it = src.rbegin(); it != src.rend(); ++it) {
|
|
|
|
|
v3f translate = get_world_pos(it->first);
|
|
|
|
|
auto *buf = it->second;
|
|
|
|
|
if (can_merge < 2 || buf->getVertexCount() >= target_min_vertices) {
|
|
|
|
|
draw_order.emplace_back(translate, buf);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
to_merge.emplace_back(translate, buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Tracking buffers, their contents and modifications would be quite complicated
|
|
|
|
|
* so we opt for something simple here: We identify buffers by their location
|
|
|
|
|
* in memory.
|
|
|
|
|
* This imposes the following assumptions:
|
|
|
|
|
* - buffers don't move in memory
|
|
|
|
|
* - vertex and index data is immutable
|
|
|
|
|
* - we know when to invalidate (invalidateMapBlockMesh does this)
|
|
|
|
|
*/
|
|
|
|
|
std::sort(to_merge.begin(), to_merge.end(), [] (const auto &l, const auto &r) {
|
|
|
|
|
return static_cast<void*>(l.second) < static_cast<void*>(r.second);
|
|
|
|
|
});
|
|
|
|
|
// cache key is a string of sorted raw pointers
|
|
|
|
|
std::string key;
|
|
|
|
|
key.reserve(sizeof(void*) * to_merge.size());
|
|
|
|
|
for (auto &it : to_merge)
|
|
|
|
|
key.append(reinterpret_cast<const char*>(&it.second), sizeof(void*));
|
|
|
|
|
|
|
|
|
|
// try to take from cache
|
|
|
|
|
auto it2 = dynamic_buffers.find(key);
|
|
|
|
|
if (it2 != dynamic_buffers.end()) {
|
|
|
|
|
g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 1);
|
|
|
|
|
const auto &use_mat = to_merge.front().second->getMaterial();
|
|
|
|
|
for (auto *buf : it2->second.buf) {
|
|
|
|
|
// material is not part of the cache key, so make sure it still matches
|
|
|
|
|
buf->getMaterial() = use_mat;
|
|
|
|
|
draw_order.emplace_back(v3f(0), buf);
|
|
|
|
|
}
|
|
|
|
|
it2->second.age = 0;
|
|
|
|
|
} else if (!key.empty()) {
|
|
|
|
|
g_profiler->avg("CM::transformBuffersToDO: cache hit rate", 0);
|
|
|
|
|
// merge and save to cache
|
|
|
|
|
auto &put_buffers = dynamic_buffers[key];
|
|
|
|
|
scene::SMeshBuffer *tmp = nullptr;
|
|
|
|
|
const auto &finish_buf = [&] () {
|
|
|
|
|
if (tmp) {
|
|
|
|
@@ -842,14 +893,9 @@ static u32 transformBuffersToDrawOrder(
|
|
|
|
|
tmp = nullptr;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// iterate in reverse to get closest blocks first
|
|
|
|
|
for (auto it = src.rbegin(); it != src.rend(); ++it) {
|
|
|
|
|
v3f translate = get_world_pos(it->first);
|
|
|
|
|
auto *buf = it->second;
|
|
|
|
|
if (can_merge < 2 || buf->getVertexCount() >= target_min_vertices) {
|
|
|
|
|
draw_order.emplace_back(translate, buf);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
for (auto &it : to_merge) {
|
|
|
|
|
v3f translate = it.first;
|
|
|
|
|
auto *buf = it.second;
|
|
|
|
|
|
|
|
|
|
bool new_buffer = false;
|
|
|
|
|
if (!tmp)
|
|
|
|
@@ -859,16 +905,18 @@ static u32 transformBuffersToDrawOrder(
|
|
|
|
|
if (new_buffer) {
|
|
|
|
|
finish_buf();
|
|
|
|
|
tmp = new scene::SMeshBuffer();
|
|
|
|
|
buffer_trash.push_back(tmp);
|
|
|
|
|
put_buffers.buf.push_back(tmp);
|
|
|
|
|
assert(tmp->getPrimitiveType() == buf->getPrimitiveType());
|
|
|
|
|
tmp->Material = buf->getMaterial();
|
|
|
|
|
// preallocate
|
|
|
|
|
tmp->Vertices->Data.reserve(total_vtx);
|
|
|
|
|
// preallocate approximately
|
|
|
|
|
tmp->Vertices->Data.reserve(MYMIN(U16_MAX, total_vtx));
|
|
|
|
|
tmp->Indices->Data.reserve(total_idx);
|
|
|
|
|
}
|
|
|
|
|
appendToMeshBuffer(tmp, buf, translate);
|
|
|
|
|
}
|
|
|
|
|
finish_buf();
|
|
|
|
|
assert(!put_buffers.buf.empty());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// first call needs to set the material
|
|
|
|
|
if (draw_order.size() > draw_order_pre)
|
|
|
|
@@ -921,7 +969,6 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
|
|
|
|
|
TimeTaker tt_collect("");
|
|
|
|
|
|
|
|
|
|
MeshBufListMaps grouped_buffers;
|
|
|
|
|
std::vector<scene::IMeshBuffer*> buffer_trash;
|
|
|
|
|
DrawDescriptorList draw_order;
|
|
|
|
|
|
|
|
|
|
auto is_frustum_culled = m_client->getCamera()->getFrustumCuller();
|
|
|
|
@@ -979,7 +1026,7 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
|
|
|
|
|
for (auto &map : grouped_buffers.maps) {
|
|
|
|
|
for (auto &list : map) {
|
|
|
|
|
merged_count += transformBuffersToDrawOrder(
|
|
|
|
|
list.second, draw_order, get_block_wpos, buffer_trash);
|
|
|
|
|
list.second, draw_order, get_block_wpos, m_dynamic_buffers);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -1036,6 +1083,20 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
|
|
|
|
|
if (pass == scene::ESNRP_SOLID) {
|
|
|
|
|
g_profiler->avg("renderMap(): animated meshes [#]", mesh_animate_count);
|
|
|
|
|
g_profiler->avg(prefix + "merged buffers [#]", merged_count);
|
|
|
|
|
|
|
|
|
|
u32 cached_count = 0;
|
|
|
|
|
for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) {
|
|
|
|
|
// prune aggressively since every new/changed block or camera
|
|
|
|
|
// rotation can have big effects
|
|
|
|
|
if (++it->second.age > 1) {
|
|
|
|
|
it->second.drop();
|
|
|
|
|
it = m_dynamic_buffers.erase(it);
|
|
|
|
|
} else {
|
|
|
|
|
cached_count += it->second.buf.size();
|
|
|
|
|
it++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
g_profiler->avg(prefix + "merged buffers in cache [#]", cached_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pass == scene::ESNRP_TRANSPARENT) {
|
|
|
|
@@ -1045,9 +1106,51 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass)
|
|
|
|
|
g_profiler->avg(prefix + "vertices drawn [#]", vertex_count);
|
|
|
|
|
g_profiler->avg(prefix + "drawcalls [#]", drawcall_count);
|
|
|
|
|
g_profiler->avg(prefix + "material swaps [#]", material_swaps);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto &x : buffer_trash)
|
|
|
|
|
x->drop();
|
|
|
|
|
void ClientMap::invalidateMapBlockMesh(MapBlockMesh *mesh)
|
|
|
|
|
{
|
|
|
|
|
// find all buffers for this block
|
|
|
|
|
MeshBufListMaps tmp;
|
|
|
|
|
tmp.addFromBlock(v3s16(), mesh, getSceneManager()->getVideoDriver());
|
|
|
|
|
|
|
|
|
|
std::vector<void*> to_delete;
|
|
|
|
|
void *maxp = 0;
|
|
|
|
|
for (auto &it : tmp.maps) {
|
|
|
|
|
for (auto &it2 : it) {
|
|
|
|
|
for (auto &it3 : it2.second) {
|
|
|
|
|
void *const p = it3.second; // explicit downcast
|
|
|
|
|
to_delete.push_back(p);
|
|
|
|
|
maxp = std::max(maxp, p);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (to_delete.empty())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// we know which buffers were used to produce a merged buffer
|
|
|
|
|
// so go through the cache and drop any entries that match
|
|
|
|
|
const auto &match_any = [&] (const std::string &key) {
|
|
|
|
|
assert(key.size() % sizeof(void*) == 0);
|
|
|
|
|
void *v;
|
|
|
|
|
for (size_t off = 0; off < key.size(); off += sizeof(void*)) {
|
|
|
|
|
// no alignment guarantee so *(void**)&key[off] is not allowed!
|
|
|
|
|
memcpy(&v, &key[off], sizeof(void*));
|
|
|
|
|
if (v > maxp) // early exit, since it's sorted
|
|
|
|
|
break;
|
|
|
|
|
if (CONTAINS(to_delete, v))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) {
|
|
|
|
|
if (match_any(it->first)) {
|
|
|
|
|
it->second.drop();
|
|
|
|
|
it = m_dynamic_buffers.erase(it);
|
|
|
|
|
} else {
|
|
|
|
|
it++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool getVisibleBrightness(Map *map, const v3f &p0, v3f dir, float step,
|
|
|
|
@@ -1263,7 +1366,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
MeshBufListMaps grouped_buffers;
|
|
|
|
|
std::vector<scene::IMeshBuffer*> buffer_trash;
|
|
|
|
|
DrawDescriptorList draw_order;
|
|
|
|
|
|
|
|
|
|
std::size_t count = 0;
|
|
|
|
@@ -1308,7 +1410,7 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
|
|
|
|
|
for (auto &map : grouped_buffers.maps) {
|
|
|
|
|
for (auto &list : map) {
|
|
|
|
|
transformBuffersToDrawOrder(
|
|
|
|
|
list.second, draw_order, get_block_wpos, buffer_trash);
|
|
|
|
|
list.second, draw_order, get_block_wpos, m_dynamic_buffers);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -1373,9 +1475,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver,
|
|
|
|
|
g_profiler->avg(prefix + "vertices drawn [#]", vertex_count);
|
|
|
|
|
g_profiler->avg(prefix + "drawcalls [#]", drawcall_count);
|
|
|
|
|
g_profiler->avg(prefix + "material swaps [#]", material_swaps);
|
|
|
|
|
|
|
|
|
|
for (auto &x : buffer_trash)
|
|
|
|
|
x->drop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|