00001 /* 00002 Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved. 00003 00004 This file is part of LSHKIT. 00005 00006 LSHKIT is free software: you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation, either version 3 of the License, or 00009 (at your option) any later version. 00010 00011 LSHKIT is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with LSHKIT. If not, see <http://www.gnu.org/licenses/>. 00018 */ 00019 00020 00021 #ifndef __LSHKIT_SKETCH__ 00022 #define __LSHKIT_SKETCH__ 00023 00104 #include <lshkit/matrix.h> 00105 00106 00107 namespace lshkit { 00108 00110 template <typename LSH, typename CHUNK = unsigned char> 00111 class Sketch 00112 { 00113 BOOST_CONCEPT_ASSERT((DeltaLshConcept<LSH>)); 00114 00115 unsigned dim_; 00116 std::vector<LSH> lsh_; 00117 00118 public: 00120 typedef typename LSH::Parameter Parameter; 00122 typedef typename LSH::Domain Domain; 00124 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8; // #bits in CHUNK 00125 00128 Sketch() { 00129 } 00130 00136 template <typename Engine> 00137 void reset (unsigned chunks, Parameter param, Engine &engine) { 00138 dim_ = chunks; 00139 lsh_.resize(dim_ * CHUNK_BIT); 00140 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine); 00141 } 00142 00148 template <typename Engine> 00149 Sketch(unsigned chunks, Parameter param, Engine &engine) : 00150 dim_(chunks), lsh_(dim_ * CHUNK_BIT) 00151 { 00152 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine); 00153 } 00154 00156 void apply (Domain in, CHUNK *out) const 00157 { 00158 unsigned l = 0; 00159 for (unsigned i = 0; i < dim_; ++i) { 00160 out[i] = 0; 00161 for (unsigned j = 0; j < CHUNK_BIT; ++j) { 00162 unsigned k = lsh_[l++](in); 00163 out[i] = out[i] | (k << j); 00164 } 00165 } 00166 } 00167 00169 00174 void apply (Domain in, CHUNK *out, float *asym) const 00175 { 00176 unsigned l = 0; 00177 for (unsigned i = 0; i < dim_; ++i) { 00178 out[i] = 0; 00179 for (unsigned j = 0; j < CHUNK_BIT; ++j) { 00180 unsigned k = lsh_[l](in, &asym[l]); 00181 out[i] = out[i] | (k << j); 00182 l++; 00183 } 00184 } 00185 } 00186 00188 template<class Archive> 00189 void serialize(Archive & ar, const unsigned int version) 00190 { 00191 ar & dim_; 00192 ar & lsh_; 00193 } 00194 00196 void load (std::istream &is) { 00197 serialize(is, 0); 00198 } 00199 00201 void save (std::ostream &os) { 00202 serialize(os, 0); 00203 } 00204 00206 unsigned getBits () const { 00207 return dim_ * CHUNK_BIT; 00208 } 00209 00211 unsigned getChunks () const { 00212 return dim_; 00213 } 00214 }; 00215 00217 00228 template <typename CHUNK = unsigned char> 00229 class WeightedHammingHelper 00230 { 00231 public: 00232 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8; 00234 00237 WeightedHammingHelper(unsigned chunks) 00238 : nchunk_(chunks), lookup_(1 << CHUNK_BIT, chunks) 00239 { 00240 } 00241 00243 00247 void update (const CHUNK *in, const float *asym) 00248 { 00249 unsigned l = 0; 00250 for (unsigned i = 0; i < nchunk_; ++i) { 00251 CHUNK q = in[i]; 00252 CHUNK p = 0; 00253 for (;;) { 00254 CHUNK c = q ^ p; 00255 float v = 0; 00256 for (unsigned j = 0; j < CHUNK_BIT; ++j) { 00257 if (c & (1 << j)) { 00258 v += asym[l + j]; 00259 } 00260 } 00261 lookup_[i][p] = v; 00262 ++p; 00263 if (p == 0) break; 00264 } 00265 l += CHUNK_BIT; 00266 } 00267 } 00268 00270 float distTo (const CHUNK *in) 00271 { 00272 float dist = 0; 00273 for (unsigned i = 0; i < nchunk_; ++i) { 00274 dist += lookup_[i][in[i]]; 00275 } 00276 return dist; 00277 } 00278 00279 private: 00280 unsigned nchunk_; 00281 Matrix<float> lookup_; 00282 }; 00283 00284 00285 } 00286 00287 #endif 00288