From 8ece6d3ec1b0105047c192c0aa044e4257118e01 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Thu, 22 Sep 2022 00:03:01 -0400 Subject: Add "reverse image search". Fixed a stupid performance degradation in the signature library in the process. --- xsig/include/signature_db.hpp | 2 ++ xsig/src/signature.cpp | 6 ++-- xsig/src/signature_db.cpp | 79 +++++++++++++++++++++++++++---------------- 3 files changed, 54 insertions(+), 33 deletions(-) (limited to 'xsig') diff --git a/xsig/include/signature_db.hpp b/xsig/include/signature_db.hpp index a74e90b..9b14fbb 100644 --- a/xsig/include/signature_db.hpp +++ b/xsig/include/signature_db.hpp @@ -87,6 +87,8 @@ public: void populate(const std::vector &paths, const populate_cfg_t &cfg); void populate_interrupt(); + std::vector> search_image(const fs::path &path, const populate_cfg_t &cfg, bool insert = false); + //disjoint set for keeping similar images in the same group //some of these probably shouldn't be public. TBD... void ds_init(); diff --git a/xsig/src/signature.cpp b/xsig/src/signature.cpp index 1f0ec28..0f0b2e9 100644 --- a/xsig/src/signature.cpp +++ b/xsig/src/signature.cpp @@ -226,19 +226,19 @@ signature signature::clone() const double signature::length() const { - if (!p) {fprintf(stderr, "length: null signature"); return -1;} + if (!p) {fprintf(stderr, "length: null signature\n"); return -1;} return p->length(); } double signature::distance(const signature &o) const { - if (!p || !o.p) {fprintf(stderr, "distance: null signature"); return -1;} + if (!p || !o.p) {fprintf(stderr, "distance: null signature\n"); return -1;} return p->distance(*o.p); } bool signature::operator==(const signature &o) const { - if (!p || !o.p) {fprintf(stderr, "eq: null signature"); return false;} + if (!p || !o.p) {fprintf(stderr, "eq: null signature\n"); return false;} return *p == *o.p; } diff --git a/xsig/src/signature_db.cpp b/xsig/src/signature_db.cpp index 6b328d6..5396d1d 100644 --- a/xsig/src/signature_db.cpp +++ b/xsig/src/signature_db.cpp @@ -398,36 +398,7 @@ void signature_db::populate(const std::vector &paths, const populate_c std::atomic count(0); auto job_func = [&, this](int thid, const fs::path& path) { - subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice); - - this->lock(); - std::set v; - size_t dbid = this->put_signature(path, ss.full); - - this->batch_find_subslice_begin(); - for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) - { - std::vector ssmatches = this->find_subslice(ss.subslices[i]); - for (auto &match : ssmatches) - { - if (match.slice == i && v.find(match.id) == v.end()) - { - signature othersig; - std::tie(std::ignore, othersig) = this->get_signature(match.id); - double dist = ss.full.distance(othersig); - if (dist < cfg.threshold) - this->put_dupe_pair(dbid, match.id, dist); - } - } - } - this->batch_find_subslice_end(); - - this->batch_put_subslice_begin(); - for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) - this->put_subslice(dbid, i, ss.subslices[i]); - this->batch_put_subslice_end(); - - this->unlock(); + this->search_image(path, cfg, true); ++count; cfg.callback(count.load(), thid); }; @@ -441,12 +412,60 @@ void signature_db::populate(const std::vector &paths, const populate_c delete p->tp; p->tp = nullptr; } + void signature_db::populate_interrupt() { if (p->tp) p->tp->terminate(); } +std::vector> signature_db::search_image(const fs::path &path, const populate_cfg_t &cfg, bool insert) +{ + subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice); + if (!ss.full.valid()) return {}; + + this->lock(); + std::set v; + std::vector> ret; + size_t dbid = 0; + if (insert) dbid = this->put_signature(path, ss.full); + + this->batch_find_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + { + std::vector ssmatches = this->find_subslice(ss.subslices[i]); + for (auto &match : ssmatches) + { + if (match.slice == i && v.find(match.id) == v.end()) + { + signature othersig; + std::tie(std::ignore, othersig) = this->get_signature(match.id); + double dist = ss.full.distance(othersig); + if (dist < cfg.threshold) + { + if (insert) + this->put_dupe_pair(dbid, match.id, dist); + else + ret.emplace_back(match.id, dist); + v.insert(match.id); + } + } + } + } + this->batch_find_subslice_end(); + + if (insert) + { + this->batch_put_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + this->put_subslice(dbid, i, ss.subslices[i]); + this->batch_put_subslice_end(); + } + + this->unlock(); + return ret; +} + void signature_db::ds_init() { sqlite3_exec(p->db, R"sql( -- cgit v1.2.3