From 96fc17b99d56eb636c894c5be9ab39bfdb4ba454 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Sat, 27 Aug 2022 00:55:38 -0400 Subject: Initial code dump. --- tests/CMakeLists.txt | 31 +++++ tests/compressed_vector.cpp | 72 +++++++++++ tests/deduper_legacy.cpp | 194 ++++++++++++++++++++++++++++ tests/image_util_tests.cpp | 46 +++++++ tests/img/contrh.png | Bin 0 -> 681 bytes tests/img/luxmarket_tshirt01.jpg | Bin 0 -> 41128 bytes tests/img/luxmarket_tshirt01_sal.jpg | Bin 0 -> 24646 bytes tests/img/luxmarket_tshirt01_sheum.jpg | Bin 0 -> 16128 bytes tests/img/pic-a-0.jpg | Bin 0 -> 13946 bytes tests/img/pic-a-1.jpg | Bin 0 -> 27407 bytes tests/img/wrongextension.gif | 1 + tests/img/wrongextension.jpg | Bin 0 -> 151051 bytes tests/img/x.jpg | Bin 0 -> 202454 bytes tests/img/y.png | Bin 0 -> 51465 bytes tests/img/z.jpg | Bin 0 -> 130763 bytes tests/signature_test.cpp | 20 +++ tests/testdrive.cpp | 226 +++++++++++++++++++++++++++++++++ 17 files changed, 590 insertions(+) create mode 100644 tests/CMakeLists.txt create mode 100644 tests/compressed_vector.cpp create mode 100644 tests/deduper_legacy.cpp create mode 100644 tests/image_util_tests.cpp create mode 100644 tests/img/contrh.png create mode 100644 tests/img/luxmarket_tshirt01.jpg create mode 100644 tests/img/luxmarket_tshirt01_sal.jpg create mode 100644 tests/img/luxmarket_tshirt01_sheum.jpg create mode 100644 tests/img/pic-a-0.jpg create mode 100644 tests/img/pic-a-1.jpg create mode 120000 tests/img/wrongextension.gif create mode 100644 tests/img/wrongextension.jpg create mode 100644 tests/img/x.jpg create mode 100644 tests/img/y.png create mode 100644 tests/img/z.jpg create mode 100644 tests/signature_test.cpp create mode 100644 tests/testdrive.cpp (limited to 'tests') diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..2190875 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,31 @@ +add_executable(compressed_vector compressed_vector.cpp) +target_link_libraries(compressed_vector + ${OpenCV_LIBS} + xsig +) + +add_executable(image_util_tests image_util_tests.cpp) +target_link_libraries(image_util_tests + ${OpenCV_LIBS} + xsig +) + +add_executable(signature_test signature_test.cpp) +target_link_libraries(signature_test + ${OpenCV_LIBS} + xsig +) + +#add_executable(deduper_legacy deduper_legacy.cpp) +#target_link_libraries(deduper_legacy +# ${OpenCV_LIBS} +# ${CMAKE_THREAD_LIBS_INIT} +# xsig +#) + +add_executable(testdrive testdrive.cpp) +target_link_libraries(testdrive + ${OpenCV_LIBS} + ${CMAKE_THREAD_LIBS_INIT} + xsig +) diff --git a/tests/compressed_vector.cpp b/tests/compressed_vector.cpp new file mode 100644 index 0000000..a5d76e4 --- /dev/null +++ b/tests/compressed_vector.cpp @@ -0,0 +1,72 @@ +#include "compressed_vector.hpp" + +#include +#include +#include +#include +#include +#include + +int main() +{ + compressed_vector cv; + compressed_vector cv2; + std::vector v; + srand(time(NULL)); + for (int i = 0; i < 100; ++i) + { + int r = rand() % 8; + cv.push_back(r); + v.push_back(r); + } + for (int i = 0; i < 100; ++i) + { + if (cv.get(i) != v[i]) + { + printf("%u <=> %u @ %d\n", cv.get(i), v[i], i); + throw std::runtime_error(std::to_string(__LINE__)); + } + } + for (int i = 0; i < 1000; ++i) + { + if (rand() % 3) + { + int r = rand() % 8; + cv.push_back(r); + v.push_back(r); + } + else + { + if (cv.back() != v.back()) + throw std::runtime_error(std::to_string(__LINE__)); + cv.pop_back(); + v.pop_back(); + } + } + if (cv.size() != v.size()) + throw std::runtime_error(std::to_string(__LINE__)); + for (size_t i = 0; i < v.size(); ++i) + { + if (cv.get(i) != v[i]) + { + printf("%u <=> %u @ %lu\n", cv.get(i), v[i], i); + throw std::runtime_error(std::to_string(__LINE__)); + } + cv2.push_back(cv.get(i)); + } + for (size_t i = 0; i < v.size(); ++i) + { + if (cv.get(i) != cv2.get(i)) + { + throw std::runtime_error(std::to_string(__LINE__)); + } + } + size_t h1 = compressed_vector_hash{}(cv); + size_t h2 = compressed_vector_hash{}(cv2); + if (h1 != h2) + { + printf("%lu <=> %lu\n", h1, h2); + throw std::runtime_error(std::to_string(__LINE__)); + } + return 0; +} diff --git a/tests/deduper_legacy.cpp b/tests/deduper_legacy.cpp new file mode 100644 index 0000000..bcd8514 --- /dev/null +++ b/tests/deduper_legacy.cpp @@ -0,0 +1,194 @@ +#include "signature.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "thread_pool.hpp" + +int ctr; +int recursive; +int njobs=1; +double threshold=0.3; +std::vector paths; + +int parse_arguments(int argc,char **argv) +{ + recursive=0; + int help=0; + option longopt[]= + { + {"recursive",no_argument ,&recursive,1}, +// {"destdir" ,required_argument,0 ,'D'}, + {"jobs" ,required_argument,0 ,'j'}, + {"threshold",required_argument,0 ,'d'}, + {"help" ,no_argument ,&help ,1}, + {0 ,0 ,0 ,0} + }; + while(1) + { + int idx=0; + int c=getopt_long(argc,argv,"rhj:d:",longopt,&idx); + if(!~c)break; + switch(c) + { + case 0: + if(longopt[idx].flag)break; + if(std::string("jobs")==longopt[idx].name) + sscanf(optarg,"%d",&njobs); + if(std::string("threshold")==longopt[idx].name) + sscanf(optarg,"%lf",&threshold); + break; + case 'r': + recursive=1; + break; + case 'h': + help=1; + break; + case 'j': + sscanf(optarg,"%d",&njobs); + break; + case 'd': + sscanf(optarg,"%lf",&threshold); + break; + } + } + for(;optind1||threshold<0) + { + puts("Invalid threshold value."); + return 2; + } + if(threshold<1e-6)threshold=1e-6; + if(!paths.size()) + { + puts("Missing image path."); + return 2; + } + return 0; +} + +void build_file_list(std::filesystem::path path,bool recursive,std::vector&out) +{ + if(recursive) + { + auto dirit=std::filesystem::recursive_directory_iterator(path); + for(auto &p:dirit) + { + FILE* fp=fopen(p.path().c_str(),"r"); + char c[8]; + fread((void*)c,1,6,fp); + if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + out.push_back(p.path().string()); + fclose(fp); + } + } + else + { + auto dirit=std::filesystem::directory_iterator(path); + for(auto &p:dirit) + { + FILE* fp=fopen(p.path().c_str(),"r"); + char c[8]; + fread((void*)c,1,6,fp); + if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + out.push_back(p.path().string()); + fclose(fp); + } + } +} + +void compute_signature_vectors(const std::vector&files,std::vector&output) +{ + thread_pool tp(njobs); + for(size_t i=0;i&vec,std::vector>&out) +{ + thread_pool tp(njobs); + for(size_t i=0;ifile#%lu\n",thid,ida,idb); + if(true) + { + double d=vec[ida].distance(vec[idb]); + double l=vec[ida].length()+vec[idb].length(); + d/=l; + if(dfile#%lu: %lf\n",ida,idb,d); + } + printf("%d/%lu\r",++ctr,vec.size()*(vec.size()-1)/2); + fflush(stdout); + }; + tp.create_task(job_func,i,j); + }} + tp.wait(); +} + +int main(int argc,char** argv) +{ + if(int pr=parse_arguments(argc,argv))return pr-1; + puts("building list of files to compare..."); + std::vector x; + for(auto&p:paths) + build_file_list(p,recursive,x); + printf("%lu files to compare.\n",x.size()); + puts("computing signature vectors..."); + std::vector cvecs; + cvecs.resize(x.size()); + compute_signature_vectors(x,cvecs); + /*for(auto &v:cvecs) + { + fprintf(stderr,"%lu:",v.sizeof_vec); + for(size_t i=0;i> r; + compare_signature_vectors(cvecs,r); + puts(""); + for(auto &t:r) + printf("%s<->%s: %lf\n",x[std::get<0>(t)].c_str(),x[std::get<1>(t)].c_str(),std::get<2>(t)); + printf("%lu similar images.",r.size()); + cvecs.clear(); + return 0; +} diff --git a/tests/image_util_tests.cpp b/tests/image_util_tests.cpp new file mode 100644 index 0000000..af77f75 --- /dev/null +++ b/tests/image_util_tests.cpp @@ -0,0 +1,46 @@ +#include "imageutil.hpp" + +#include +#include + +#include +#include +#include + +int main(int argc, char** argv) +{ + if (argc < 2) + { + printf("usage: %s \n", argv[0]); + return 1; + } + cv::Mat i = cv::imread(argv[1], cv::IMREAD_UNCHANGED); + if (i.data == NULL) + { + printf("invalid image.\n"); + return 1; + } + cv::Mat fi, bw; + double sc = 1; + switch (i.depth()) + { + case CV_8U: sc = 1. / 255; break; + case CV_16U: sc = 1. / 65535; break; + } + i.convertTo(fi, CV_32F, sc); + if (fi.channels() == 4) + fi = image_util::blend_white(fi); + cv::cvtColor(fi, bw, cv::COLOR_RGB2GRAY); + cv::imshow(std::string("test"), bw); + cv::Range xr, yr; + double contrast_threshold = 0.05; + double max_crop_ratio = 0.25; + xr = image_util::crop_axis(bw, 0, contrast_threshold, max_crop_ratio); + yr = image_util::crop_axis(bw, 1, contrast_threshold, max_crop_ratio); + cv::Mat cfi = image_util::crop(bw, contrast_threshold, max_crop_ratio); + cv::imshow(std::string("cropped"), cfi); + printf("xxx [%d, %d) [%d, %d)\n", yr.start, yr.end, xr.start, xr.end); + puts("press q to quit."); + while (cv::waitKey(0) != 'q'); + return 0; +} diff --git a/tests/img/contrh.png b/tests/img/contrh.png new file mode 100644 index 0000000..e703342 Binary files /dev/null and b/tests/img/contrh.png differ diff --git a/tests/img/luxmarket_tshirt01.jpg b/tests/img/luxmarket_tshirt01.jpg new file mode 100644 index 0000000..ffaf7eb Binary files /dev/null and b/tests/img/luxmarket_tshirt01.jpg differ diff --git a/tests/img/luxmarket_tshirt01_sal.jpg b/tests/img/luxmarket_tshirt01_sal.jpg new file mode 100644 index 0000000..cb0cefe Binary files /dev/null and b/tests/img/luxmarket_tshirt01_sal.jpg differ diff --git a/tests/img/luxmarket_tshirt01_sheum.jpg b/tests/img/luxmarket_tshirt01_sheum.jpg new file mode 100644 index 0000000..185393c Binary files /dev/null and b/tests/img/luxmarket_tshirt01_sheum.jpg differ diff --git a/tests/img/pic-a-0.jpg b/tests/img/pic-a-0.jpg new file mode 100644 index 0000000..3dd4a3b Binary files /dev/null and b/tests/img/pic-a-0.jpg differ diff --git a/tests/img/pic-a-1.jpg b/tests/img/pic-a-1.jpg new file mode 100644 index 0000000..95f0e77 Binary files /dev/null and b/tests/img/pic-a-1.jpg differ diff --git a/tests/img/wrongextension.gif b/tests/img/wrongextension.gif new file mode 120000 index 0000000..151d738 --- /dev/null +++ b/tests/img/wrongextension.gif @@ -0,0 +1 @@ +/home/chrisoft/devel/deduper/tests/img/wrongextension.jpg \ No newline at end of file diff --git a/tests/img/wrongextension.jpg b/tests/img/wrongextension.jpg new file mode 100644 index 0000000..e699209 Binary files /dev/null and b/tests/img/wrongextension.jpg differ diff --git a/tests/img/x.jpg b/tests/img/x.jpg new file mode 100644 index 0000000..44574fc Binary files /dev/null and b/tests/img/x.jpg differ diff --git a/tests/img/y.png b/tests/img/y.png new file mode 100644 index 0000000..2a24968 Binary files /dev/null and b/tests/img/y.png differ diff --git a/tests/img/z.jpg b/tests/img/z.jpg new file mode 100644 index 0000000..5e81d31 Binary files /dev/null and b/tests/img/z.jpg differ diff --git a/tests/signature_test.cpp b/tests/signature_test.cpp new file mode 100644 index 0000000..0b6b1f9 --- /dev/null +++ b/tests/signature_test.cpp @@ -0,0 +1,20 @@ +#include +#include +#include "signature.hpp" +//#include +int main() +{ + std::vector a; + a.push_back(std::move(signature::from_file("img/x.jpg"))); + a.push_back(std::move(signature::from_file("img/z.jpg"))); + for (size_t i = 0; i < a.size(); ++i) + for (size_t j = 0; j < a.size(); ++j) + { + printf("%lu <-> %lu:", i, j); + double d = a[i].distance(a[j]); + double l = a[i].length() + a[j].length(); + printf("%f\n", d / l); + } + //while (cv::waitKey(0) != 'q'); + return 0; +} diff --git a/tests/testdrive.cpp b/tests/testdrive.cpp new file mode 100644 index 0000000..c104e8a --- /dev/null +++ b/tests/testdrive.cpp @@ -0,0 +1,226 @@ +#include "signature.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "thread_pool.hpp" + +int ctr; +int recursive; +int njobs=1; +double threshold=0.3; +std::vector paths; +std::vector files; + +int nsliceh = 3; +int nslicev = 3; + +struct sig_eq +{ + bool operator()(const signature& a, const signature& b) const + { + //return a.distance(b) < 0.1; + return a == b; + } +}; + +typedef std::pair slice_info; +std::unordered_map, signature_hash, sig_eq> slices; +std::vector signatures; +std::mutex sigmtx; +std::vector> out; + +int parse_arguments(int argc,char **argv) +{ + recursive=0; + int help=0; + option longopt[]= + { + {"recursive",no_argument ,&recursive,1}, +// {"destdir" ,required_argument,0 ,'D'}, + {"jobs" ,required_argument,0 ,'j'}, +// {"threshold",required_argument,0 ,'d'}, + {"help" ,no_argument ,&help ,1}, + {0 ,0 ,0 ,0} + }; + while(1) + { + int idx=0; + int c=getopt_long(argc,argv,"rhj:",longopt,&idx); + if(!~c)break; + switch(c) + { + case 0: + if(longopt[idx].flag)break; + if(std::string("jobs")==longopt[idx].name) + sscanf(optarg,"%d",&njobs); + //if(std::string("threshold")==longopt[idx].name) + //sscanf(optarg,"%lf",&threshold); + break; + case 'r': + recursive=1; + break; + case 'h': + help=1; + break; + case 'j': + sscanf(optarg,"%d",&njobs); + break; + case 'd': + sscanf(optarg,"%lf",&threshold); + break; + } + } + for(;optind1||threshold<0) + { + puts("Invalid threshold value."); + return 2; + } + if(threshold<1e-6)threshold=1e-6; + if(!paths.size()) + { + puts("Missing image path."); + return 2; + } + return 0; +} + +void build_file_list(std::filesystem::path path,bool recursive,std::vector&out) +{ + if(recursive) + { + auto dirit=std::filesystem::recursive_directory_iterator(path); + for(auto &p:dirit) + { + FILE* fp = fopen(p.path().c_str(),"r"); + char c[8]; + size_t sz = fread((void*)c,1,6,fp); + if (sz < 6) continue; + if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + out.push_back(p.path().string()); + fclose(fp); + } + } + else + { + auto dirit=std::filesystem::directory_iterator(path); + for(auto &p:dirit) + { + FILE* fp = fopen(p.path().c_str(),"r"); + char c[8]; + size_t sz = fread((void*)c,1,6,fp); + if (sz < 6) continue; + if(!memcmp(c,"\x89PNG\r\n",6)||!memcmp(c,"\xff\xd8\xff",3)) + out.push_back(p.path().string()); + fclose(fp); + } + } +} + +void job_func(int thid, size_t id) +{ + cv::Mat img = cv::imread(files[id].c_str(), cv::IMREAD_UNCHANGED); + signature s = signature::from_cvmatrix(img); + int ssw = img.size().width / nsliceh; + int ssh = img.size().height / nslicev; + std::vector subsigs; + for (int i = 0; i < nsliceh; ++i) + for (int j = 0; j < nslicev; ++j) + { + int l = i * ssw; + int r = (i == nsliceh) ? img.size().width : (i + 1) * ssw; + int t = j * ssh; + int b = (j == nslicev) ? img.size().height : (j + 1) * ssh; + subsigs.push_back(std::move(signature::from_cvmatrix(img(cv::Range(t, b), cv::Range(l, r))))); + } + + printf("%d %lu\r", thid, id); + fflush(stdout); + + sigmtx.lock(); + std::vector v; + v.resize(files.size()); + for (int i = 0; i < nsliceh * nslicev; ++i) + { + auto it = slices.find(subsigs[i]); + if (it != slices.end()) + { + for (auto &si : it->second) + { + if (si.second == i) + { + if (!v[si.first] && s.distance(signatures[si.first]) < threshold) + { + out.emplace_back(id, std::move(si.first)); + } + v[si.first] = true; + } + } + it->second.emplace_back(id, i); + } + else + { + slices.emplace(std::move(subsigs[i].clone()), + std::vector{{id, i}}); + } + } + signatures[id] = std::move(s); + sigmtx.unlock(); +} + +void run() +{ + thread_pool tp(njobs); + for(size_t i=0;i