From c684f2433cfe65e93d6ff31ae82e98644964520b Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Sat, 17 Sep 2022 23:07:21 -0400 Subject: Continue hollowing out the testdrive application... ...and stuff everything into signature_db which is now becoming the new ragbag. Includes half-finished disjoint set implementation to absorb some logic originally in mingui's main.cpp into this ever-growing signature_db. Changed how batching is handled. Now different type of batches can be interleaved. --- signature_db.cpp | 219 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 194 insertions(+), 25 deletions(-) (limited to 'signature_db.cpp') diff --git a/signature_db.cpp b/signature_db.cpp index 607d1ad..71f2142 100644 --- a/signature_db.cpp +++ b/signature_db.cpp @@ -2,26 +2,36 @@ //License: MPL-2.0 #include +#include +#include + #include "signature_db.hpp" +#include "subslice_signature.hpp" +#include "thread_pool.hpp" -const int SIGDB_VERSION = 2; +const int SIGDB_VERSION = 3; enum batch_status { - single = 0, + none = 0, putsub, - findsub + findsub, + setpar, + getpar, + + BATCH_STATUS_MAX }; struct signature_db_priv { sqlite3 *db; sqlite3_mutex *mtx; - sqlite3_stmt *bst; - batch_status batch_mode; + sqlite3_stmt *bst[batch_status::BATCH_STATUS_MAX]; void init_db(); bool verify_db(); + + void batch_end(batch_status s); }; void signature_db_priv::init_db() @@ -62,7 +72,14 @@ void signature_db_priv::init_db() id2 integer, dist real, primary key (id1, id2), - foreign key (id1, id2) references images (id, id) + constraint fk_ids foreign key (id1, id2) references images (id, id) + ); + )sql", nullptr, nullptr, nullptr); + sqlite3_exec(db, R"sql( + create table dspar( + id integer, + parent integer, + constraint fk_ids foreign key (id, parent) references images (id, id) ); )sql", nullptr, nullptr, nullptr); } @@ -77,6 +94,13 @@ bool signature_db_priv::verify_db() return true; } +void signature_db_priv::batch_end(batch_status s) +{ + if (!db || !bst[s]) [[ unlikely ]] return; + sqlite3_finalize(bst[s]); + bst[s] = nullptr; +} + signature_db::signature_db(const fs::path &dbpath) { p = new signature_db_priv(); @@ -97,8 +121,8 @@ signature_db::signature_db(const fs::path &dbpath) } p->mtx = sqlite3_db_mutex(p->db); - p->bst = nullptr; - p->batch_mode = batch_status::single; + for (int i = 0; i < batch_status::BATCH_STATUS_MAX; ++i) + p->bst[i] = nullptr; if (!p->verify_db()) { sqlite3_close(p->db); @@ -114,8 +138,9 @@ signature_db::~signature_db() delete p; return; } - if (p->bst) - sqlite3_finalize(p->bst); + for (int i = 0; i < batch_status::BATCH_STATUS_MAX; ++i) + if (p->bst[i]) + sqlite3_finalize(p->bst[i]); sqlite3_close(p->db); delete p; } @@ -172,16 +197,15 @@ std::pair signature_db::get_signature(size_t id) void signature_db::batch_put_subslice_begin() { if (!p->db) [[ unlikely ]] return; - p->batch_mode = batch_status::putsub; - sqlite3_prepare_v2(p->db, "insert into subslices (image, slice, slicesig) values(?, ?, ?);", -1, &p->bst, 0); + sqlite3_prepare_v2(p->db, "insert into subslices (image, slice, slicesig) values(?, ?, ?);", -1, &p->bst[batch_status::putsub], 0); } void signature_db::put_subslice(size_t id, size_t slice, const signature &slicesig) { if (!p->db) [[ unlikely ]] return; sqlite3_stmt *st = nullptr; - if (p->batch_mode == batch_status::putsub) - st = p->bst; + if (p->bst[batch_status::putsub]) + st = p->bst[batch_status::putsub]; else sqlite3_prepare_v2(p->db, "insert into subslices (image, slice, slicesig) values(?, ?, ?);", -1, &st, 0); sqlite3_bind_int(st, 1, id); @@ -189,25 +213,29 @@ void signature_db::put_subslice(size_t id, size_t slice, const signature &slices std::string slicesigs = slicesig.to_string(); sqlite3_bind_text(st, 3, slicesigs.c_str(), -1, nullptr); sqlite3_step(st); - if (p->batch_mode == batch_status::putsub) + if (p->bst[batch_status::putsub]) sqlite3_reset(st); else sqlite3_finalize(st); } +void signature_db::batch_put_subslice_end() +{ + p->batch_end(batch_status::putsub); +} + void signature_db::batch_find_subslice_begin() { if (!p->db) [[ unlikely ]] return; - p->batch_mode = batch_status::findsub; - sqlite3_prepare_v2(p->db, "select image, slice from subslices where slicesig = ?;", -1, &p->bst, 0); + sqlite3_prepare_v2(p->db, "select image, slice from subslices where slicesig = ?;", -1, &p->bst[batch_status::findsub], 0); } std::vector signature_db::find_subslice(const signature &slicesig) { if (!p->db) [[ unlikely ]] return {}; sqlite3_stmt *st = nullptr; - if (p->batch_mode == batch_status::findsub) - st = p->bst; + if (p->bst[batch_status::findsub]) + st = p->bst[batch_status::findsub]; else sqlite3_prepare_v2(p->db, "select image, slice from subslices where slicesig = ?;", -1, &st, 0); @@ -223,19 +251,16 @@ std::vector signature_db::find_subslice(const signature &slicesig) size_t sl = sqlite3_column_int(st, 1); ret.push_back({im, sl}); } - if (p->batch_mode == batch_status::findsub) + if (p->bst[batch_status::findsub]) sqlite3_reset(st); else sqlite3_finalize(st); return ret; } -void signature_db::batch_end() +void signature_db::batch_find_subslice_end() { - if (!p->db) [[ unlikely ]] return; - p->batch_mode = batch_status::single; - sqlite3_finalize(p->bst); - p->bst = nullptr; + p->batch_end(batch_status::findsub); } void signature_db::put_dupe_pair(size_t ida, size_t idb, double dist) @@ -328,3 +353,147 @@ bool signature_db::from_db_file(const fs::path &path) ret &= (SQLITE_OK == sqlite3_close(src)); return ret; } + +void signature_db::populate(const std::vector &paths, const populate_cfg_t &cfg) +{ + std::atomic count(0); + auto job_func = [&, this](int thid, const fs::path& path) + { + subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice); + + this->lock(); + std::set v; + size_t dbid = this->put_signature(path, ss.full); + + this->batch_find_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + { + std::vector ssmatches = this->find_subslice(ss.subslices[i]); + for (auto &match : ssmatches) + { + if (match.slice == i && v.find(match.id) == v.end()) + { + signature othersig; + std::tie(std::ignore, othersig) = this->get_signature(match.id); + double dist = ss.full.distance(othersig); + if (dist < cfg.threshold) + this->put_dupe_pair(dbid, match.id, dist); + } + } + } + this->batch_find_subslice_end(); + + this->batch_put_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + this->put_subslice(dbid, i, ss.subslices[i]); + this->batch_put_subslice_end(); + + this->unlock(); + ++count; + cfg.callback(count.load(), thid); + }; + + thread_pool tp(cfg.njobs); + for(size_t i = 0; i < paths.size(); ++i) + { + tp.create_task(job_func, paths[i]); + } + tp.wait(); +} + +void signature_db::ds_init() +{ + sqlite3_exec(p->db, R"sql( + delete from dspar; + insert into dspar (id, parent) select id, id from images; + )sql", nullptr, nullptr, nullptr); +} + +void signature_db::batch_ds_get_parent_begin() +{ + if (!p->db) [[ unlikely ]] return; + sqlite3_prepare_v2(p->db, "select parent from dspar where id = ?;", -1, &p->bst[batch_status::getpar], 0); +} + +size_t signature_db::ds_get_parent(size_t id) +{ + if (!p->db) [[ unlikely ]] return ~size_t(0); + sqlite3_stmt *st = nullptr; + if (p->bst[batch_status::getpar]) + st = p->bst[batch_status::getpar]; + else + sqlite3_prepare_v2(p->db, "select parent from dspar where id = ?;", -1, &st, 0); + + sqlite3_bind_int(st, 1, id); + + size_t ret = ~size_t(0); + if (sqlite3_step(st) == SQLITE_ROW) + ret = sqlite3_column_int(st, 0); + + if (p->bst[batch_status::getpar]) + sqlite3_reset(st); + else + sqlite3_finalize(st); + return ret; +} + +void signature_db::batch_ds_get_parent_end() +{ + p->batch_end(batch_status::getpar); +} + +void signature_db::batch_ds_set_parent_begin() +{ + if (!p->db) [[ unlikely ]] return; + sqlite3_prepare_v2(p->db, "update dspar set parent = ? where id = ?;", -1, &p->bst[batch_status::setpar], 0); +} + +size_t signature_db::ds_set_parent(size_t id, size_t par) +{ + if (!p->db) [[ unlikely ]] return par; + sqlite3_stmt *st = nullptr; + if (p->bst[batch_status::setpar]) + st = p->bst[batch_status::setpar]; + else + sqlite3_prepare_v2(p->db, "update dspar set parent = ? where id = ?;", -1, &st, 0); + + sqlite3_bind_int(st, 1, par); + sqlite3_bind_int(st, 2, id); + + sqlite3_step(st); + + if (p->bst[batch_status::setpar]) + sqlite3_reset(st); + else + sqlite3_finalize(st); + return par; +} + +void signature_db::batch_ds_set_parent_end() +{ + p->batch_end(batch_status::setpar); +} + +size_t signature_db::ds_find(size_t id) +{ + size_t p = ds_get_parent(id); + if (id != p) + return ds_set_parent(id, ds_find(p)); + return id; +} + +void signature_db::ds_merge(size_t id1, size_t id2) +{ + id1 = ds_find(id1); + id2 = ds_find(id2); + ds_set_parent(id1, id2); +} + +void signature_db::group_similar() +{ +} + +std::vector> signature_db::groups_get() +{ + return {}; +} -- cgit v1.2.3