aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--signature_db.cpp35
-rw-r--r--signature_db.hpp3
-rw-r--r--tests/testdrive_sqlite.cpp21
3 files changed, 58 insertions, 1 deletions
diff --git a/signature_db.cpp b/signature_db.cpp
index 71f2142..429581d 100644
--- a/signature_db.cpp
+++ b/signature_db.cpp
@@ -491,9 +491,42 @@ void signature_db::ds_merge(size_t id1, size_t id2)
void signature_db::group_similar()
{
+ ds_init();
+ batch_ds_get_parent_begin();
+ batch_ds_set_parent_begin();
+ auto pairs = this->dupe_pairs();
+ for (auto &p : pairs)
+ ds_merge(p.id1, p.id2);
+ batch_ds_get_parent_end();
+ batch_ds_set_parent_end();
}
std::vector<std::vector<size_t>> signature_db::groups_get()
{
- return {};
+ sqlite3_stmt *sto = nullptr;
+ sqlite3_stmt *sti = nullptr;
+ sqlite3_prepare_v2(p->db, "select distinct parent from dspar;", -1, &sto, 0);
+ sqlite3_prepare_v2(p->db, "select id from dspar where parent = ?;", -1, &sti, 0);
+ std::vector<std::vector<size_t>> ret;
+
+ while (1)
+ {
+ int r = sqlite3_step(sto);
+ if (r != SQLITE_ROW) break;
+ size_t dpar = (size_t)sqlite3_column_int(sto, 0);
+ sqlite3_bind_int(sti, 1, dpar);
+ std::vector<size_t> v;
+ while (1)
+ {
+ int ri = sqlite3_step(sti);
+ if (ri != SQLITE_ROW) break;
+ size_t id = (size_t)sqlite3_column_int(sti, 0);
+ v.push_back(id);
+ }
+ ret.push_back(v);
+ sqlite3_reset(sti);
+ }
+ sqlite3_finalize(sto);
+ sqlite3_finalize(sti);
+ return ret;
}
diff --git a/signature_db.hpp b/signature_db.hpp
index c7e3997..a56ae1f 100644
--- a/signature_db.hpp
+++ b/signature_db.hpp
@@ -97,7 +97,10 @@ public:
size_t ds_find(size_t id);
void ds_merge(size_t id1, size_t id2);
+ //group similar images together using results from dupe_pairs()
+ //usually very fast, unless you have a crack ton of duplicates...
void group_similar();
+ //get all groups, each countained in their own lists.
std::vector<std::vector<size_t>> groups_get();
};
diff --git a/tests/testdrive_sqlite.cpp b/tests/testdrive_sqlite.cpp
index 3f1fe40..c9e9aad 100644
--- a/tests/testdrive_sqlite.cpp
+++ b/tests/testdrive_sqlite.cpp
@@ -226,6 +226,9 @@ int main(int argc,char** argv)
};
sdb->populate(files, pcfg);
+ puts("grouping similar images...");
+ sdb->group_similar();
+
std::vector<dupe_t> dupes = sdb->dupe_pairs();
for (auto &p : dupes)
{
@@ -238,6 +241,24 @@ int main(int argc,char** argv)
printf("%s %s %f\n", p1.c_str(), p2.c_str(), p.distance);
#endif
}
+
+ std::vector<std::vector<size_t>> gp = sdb->groups_get();
+ for (auto gi = gp.begin(); gi != gp.end(); ++gi)
+ {
+ if (gi->size() < 2) continue;
+ printf("group #%lu:\n", gi - gp.begin());
+ for (auto &id : *gi)
+ {
+ fs::path p;
+ std::tie(p, std::ignore) = sdb->get_signature(id);
+#if PATH_VALSIZE == 2
+ wprintf(L"\t%ls\n", p.c_str());
+#else
+ printf("\t%s\n", p.c_str());
+#endif
+ }
+ }
+
sdb->to_db_file("test.sigdb");
delete sdb;
return 0;