diff options
author | Chris Xiong <chirs241097@gmail.com> | 2022-09-22 00:03:01 -0400 |
---|---|---|
committer | Chris Xiong <chirs241097@gmail.com> | 2022-09-22 00:03:01 -0400 |
commit | 8ece6d3ec1b0105047c192c0aa044e4257118e01 (patch) | |
tree | ca4202e6e41d31c6f73fc7514c237a5b2b2c2764 /xsig | |
parent | 1d41325a9685cf677f8eeaa4940f032931fd8780 (diff) | |
download | deduper-8ece6d3ec1b0105047c192c0aa044e4257118e01.tar.xz |
Add "reverse image search".
Fixed a stupid performance degradation in the signature library
in the process.
Diffstat (limited to 'xsig')
-rw-r--r-- | xsig/include/signature_db.hpp | 2 | ||||
-rw-r--r-- | xsig/src/signature.cpp | 6 | ||||
-rw-r--r-- | xsig/src/signature_db.cpp | 79 |
3 files changed, 54 insertions, 33 deletions
diff --git a/xsig/include/signature_db.hpp b/xsig/include/signature_db.hpp index a74e90b..9b14fbb 100644 --- a/xsig/include/signature_db.hpp +++ b/xsig/include/signature_db.hpp @@ -87,6 +87,8 @@ public: void populate(const std::vector<fs::path> &paths, const populate_cfg_t &cfg); void populate_interrupt(); + std::vector<std::pair<size_t, double>> search_image(const fs::path &path, const populate_cfg_t &cfg, bool insert = false); + //disjoint set for keeping similar images in the same group //some of these probably shouldn't be public. TBD... void ds_init(); diff --git a/xsig/src/signature.cpp b/xsig/src/signature.cpp index 1f0ec28..0f0b2e9 100644 --- a/xsig/src/signature.cpp +++ b/xsig/src/signature.cpp @@ -226,19 +226,19 @@ signature signature::clone() const double signature::length() const { - if (!p) {fprintf(stderr, "length: null signature"); return -1;} + if (!p) {fprintf(stderr, "length: null signature\n"); return -1;} return p->length(); } double signature::distance(const signature &o) const { - if (!p || !o.p) {fprintf(stderr, "distance: null signature"); return -1;} + if (!p || !o.p) {fprintf(stderr, "distance: null signature\n"); return -1;} return p->distance(*o.p); } bool signature::operator==(const signature &o) const { - if (!p || !o.p) {fprintf(stderr, "eq: null signature"); return false;} + if (!p || !o.p) {fprintf(stderr, "eq: null signature\n"); return false;} return *p == *o.p; } diff --git a/xsig/src/signature_db.cpp b/xsig/src/signature_db.cpp index 6b328d6..5396d1d 100644 --- a/xsig/src/signature_db.cpp +++ b/xsig/src/signature_db.cpp @@ -398,36 +398,7 @@ void signature_db::populate(const std::vector<fs::path> &paths, const populate_c std::atomic<size_t> count(0); auto job_func = [&, this](int thid, const fs::path& path) { - subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice); - - this->lock(); - std::set<size_t> v; - size_t dbid = this->put_signature(path, ss.full); - - this->batch_find_subslice_begin(); - for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) - { - std::vector<subslice_t> ssmatches = this->find_subslice(ss.subslices[i]); - for (auto &match : ssmatches) - { - if (match.slice == i && v.find(match.id) == v.end()) - { - signature othersig; - std::tie(std::ignore, othersig) = this->get_signature(match.id); - double dist = ss.full.distance(othersig); - if (dist < cfg.threshold) - this->put_dupe_pair(dbid, match.id, dist); - } - } - } - this->batch_find_subslice_end(); - - this->batch_put_subslice_begin(); - for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) - this->put_subslice(dbid, i, ss.subslices[i]); - this->batch_put_subslice_end(); - - this->unlock(); + this->search_image(path, cfg, true); ++count; cfg.callback(count.load(), thid); }; @@ -441,12 +412,60 @@ void signature_db::populate(const std::vector<fs::path> &paths, const populate_c delete p->tp; p->tp = nullptr; } + void signature_db::populate_interrupt() { if (p->tp) p->tp->terminate(); } +std::vector<std::pair<size_t, double>> signature_db::search_image(const fs::path &path, const populate_cfg_t &cfg, bool insert) +{ + subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice); + if (!ss.full.valid()) return {}; + + this->lock(); + std::set<size_t> v; + std::vector<std::pair<size_t, double>> ret; + size_t dbid = 0; + if (insert) dbid = this->put_signature(path, ss.full); + + this->batch_find_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + { + std::vector<subslice_t> ssmatches = this->find_subslice(ss.subslices[i]); + for (auto &match : ssmatches) + { + if (match.slice == i && v.find(match.id) == v.end()) + { + signature othersig; + std::tie(std::ignore, othersig) = this->get_signature(match.id); + double dist = ss.full.distance(othersig); + if (dist < cfg.threshold) + { + if (insert) + this->put_dupe_pair(dbid, match.id, dist); + else + ret.emplace_back(match.id, dist); + v.insert(match.id); + } + } + } + } + this->batch_find_subslice_end(); + + if (insert) + { + this->batch_put_subslice_begin(); + for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i) + this->put_subslice(dbid, i, ss.subslices[i]); + this->batch_put_subslice_end(); + } + + this->unlock(); + return ret; +} + void signature_db::ds_init() { sqlite3_exec(p->db, R"sql( |