From 8ece6d3ec1b0105047c192c0aa044e4257118e01 Mon Sep 17 00:00:00 2001
From: Chris Xiong <chirs241097@gmail.com>
Date: Thu, 22 Sep 2022 00:03:01 -0400
Subject: Add "reverse image search".

Fixed a stupid performance degradation in the signature library
in the process.
---
 xsig/src/signature.cpp    |  6 ++--
 xsig/src/signature_db.cpp | 79 +++++++++++++++++++++++++++++------------------
 2 files changed, 52 insertions(+), 33 deletions(-)

(limited to 'xsig/src')

diff --git a/xsig/src/signature.cpp b/xsig/src/signature.cpp
index 1f0ec28..0f0b2e9 100644
--- a/xsig/src/signature.cpp
+++ b/xsig/src/signature.cpp
@@ -226,19 +226,19 @@ signature signature::clone() const
 
 double signature::length() const
 {
-    if (!p) {fprintf(stderr, "length: null signature"); return -1;}
+    if (!p) {fprintf(stderr, "length: null signature\n"); return -1;}
     return p->length();
 }
 
 double signature::distance(const signature &o) const
 {
-    if (!p || !o.p) {fprintf(stderr, "distance: null signature"); return -1;}
+    if (!p || !o.p) {fprintf(stderr, "distance: null signature\n"); return -1;}
     return p->distance(*o.p);
 }
 
 bool signature::operator==(const signature &o) const
 {
-    if (!p || !o.p) {fprintf(stderr, "eq: null signature"); return false;}
+    if (!p || !o.p) {fprintf(stderr, "eq: null signature\n"); return false;}
     return *p == *o.p;
 }
 
diff --git a/xsig/src/signature_db.cpp b/xsig/src/signature_db.cpp
index 6b328d6..5396d1d 100644
--- a/xsig/src/signature_db.cpp
+++ b/xsig/src/signature_db.cpp
@@ -398,36 +398,7 @@ void signature_db::populate(const std::vector<fs::path> &paths, const populate_c
     std::atomic<size_t> count(0);
     auto job_func = [&, this](int thid, const fs::path& path)
     {
-        subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice);
-
-        this->lock();
-        std::set<size_t> v;
-        size_t dbid = this->put_signature(path, ss.full);
-
-        this->batch_find_subslice_begin();
-        for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i)
-        {
-            std::vector<subslice_t> ssmatches = this->find_subslice(ss.subslices[i]);
-            for (auto &match : ssmatches)
-            {
-                if (match.slice == i && v.find(match.id) == v.end())
-                {
-                    signature othersig;
-                    std::tie(std::ignore, othersig) = this->get_signature(match.id);
-                    double dist = ss.full.distance(othersig);
-                    if (dist < cfg.threshold)
-                        this->put_dupe_pair(dbid, match.id, dist);
-                }
-            }
-        }
-        this->batch_find_subslice_end();
-
-        this->batch_put_subslice_begin();
-        for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i)
-            this->put_subslice(dbid, i, ss.subslices[i]);
-        this->batch_put_subslice_end();
-
-        this->unlock();
+        this->search_image(path, cfg, true);
         ++count;
         cfg.callback(count.load(), thid);
     };
@@ -441,12 +412,60 @@ void signature_db::populate(const std::vector<fs::path> &paths, const populate_c
     delete p->tp;
     p->tp = nullptr;
 }
+
 void signature_db::populate_interrupt()
 {
     if (p->tp)
         p->tp->terminate();
 }
 
+std::vector<std::pair<size_t, double>> signature_db::search_image(const fs::path &path, const populate_cfg_t &cfg, bool insert)
+{
+    subsliced_signature ss = subsliced_signature::from_path(path, cfg.nsliceh, cfg.nslicev, cfg.scfg_full, cfg.scfg_subslice);
+    if (!ss.full.valid()) return {};
+
+    this->lock();
+    std::set<size_t> v;
+    std::vector<std::pair<size_t, double>> ret;
+    size_t dbid = 0;
+    if (insert) dbid = this->put_signature(path, ss.full);
+
+    this->batch_find_subslice_begin();
+    for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i)
+    {
+        std::vector<subslice_t> ssmatches = this->find_subslice(ss.subslices[i]);
+        for (auto &match : ssmatches)
+        {
+            if (match.slice == i && v.find(match.id) == v.end())
+            {
+                signature othersig;
+                std::tie(std::ignore, othersig) = this->get_signature(match.id);
+                double dist = ss.full.distance(othersig);
+                if (dist < cfg.threshold)
+                {
+                    if (insert)
+                        this->put_dupe_pair(dbid, match.id, dist);
+                    else
+                        ret.emplace_back(match.id, dist);
+                    v.insert(match.id);
+                }
+            }
+        }
+    }
+    this->batch_find_subslice_end();
+
+    if (insert)
+    {
+        this->batch_put_subslice_begin();
+        for (size_t i = 0; i < cfg.nsliceh * cfg.nslicev; ++i)
+            this->put_subslice(dbid, i, ss.subslices[i]);
+        this->batch_put_subslice_end();
+    }
+
+    this->unlock();
+    return ret;
+}
+
 void signature_db::ds_init()
 {
     sqlite3_exec(p->db, R"sql(
-- 
cgit v1.2.3