include/lshkit/sketch.h

Go to the documentation of this file.
00001 /* 
00002  Copyright (C) 2008 Wei Dong <wdong@princeton.edu>. All Rights Reserved.
00003 
00004  This file is part of LSHKIT.
00005 
00006  LSHKIT is free software: you can redistribute it and/or modify
00007  it under the terms of the GNU General Public License as published by
00008  the Free Software Foundation, either version 3 of the License, or
00009  (at your option) any later version.
00010 
00011  LSHKIT is distributed in the hope that it will be useful,
00012  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00014  GNU General Public License for more details.
00015 
00016  You should have received a copy of the GNU General Public License
00017  along with LSHKIT. If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020 
00021 #ifndef __LSHKIT_SKETCH__
00022 #define __LSHKIT_SKETCH__
00023 
00104 #include <lshkit/matrix.h>
00105 
00106 
00107 namespace lshkit {
00108 
00110 template <typename LSH, typename CHUNK = unsigned char>
00111 class Sketch
00112 {
00113 BOOST_CONCEPT_ASSERT((DeltaLshConcept<LSH>));
00114 
00115 unsigned dim_;
00116 std::vector<LSH> lsh_;
00117 
00118 public:
00120 typedef typename LSH::Parameter Parameter;
00122 typedef typename LSH::Domain Domain; 
00124 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8; // #bits in CHUNK
00125 
00128 Sketch() {
00129 }
00130 
00136 template <typename Engine>
00137 void reset (unsigned chunks, Parameter param, Engine &engine) {
00138 dim_ = chunks;
00139 lsh_.resize(dim_ * CHUNK_BIT);
00140 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00141 }
00142 
00148 template <typename Engine>
00149 Sketch(unsigned chunks, Parameter param, Engine &engine) : 
00150 dim_(chunks), lsh_(dim_ * CHUNK_BIT)
00151 {
00152 for (unsigned i = 0; i < lsh_.size(); ++i) lsh_[i].reset(param, engine);
00153 }
00154 
00156 void apply (Domain in, CHUNK *out) const
00157 {
00158 unsigned l = 0;
00159 for (unsigned i = 0; i < dim_; ++i) {
00160 out[i] = 0;
00161 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00162 unsigned k = lsh_[l++](in);
00163 out[i] = out[i] | (k << j);
00164 }
00165 }
00166 }
00167 
00169 
00174 void apply (Domain in, CHUNK *out, float *asym) const
00175 {
00176 unsigned l = 0;
00177 for (unsigned i = 0; i < dim_; ++i) {
00178 out[i] = 0;
00179 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00180 unsigned k = lsh_[l](in, &asym[l]);
00181 out[i] = out[i] | (k << j);
00182 l++;
00183 }
00184 }
00185 }
00186 
00188 template<class Archive>
00189 void serialize(Archive & ar, const unsigned int version)
00190 {
00191 ar & dim_;
00192 ar & lsh_;
00193 }
00194 
00196 void load (std::istream &is) {
00197 serialize(is, 0);
00198 }
00199 
00201 void save (std::ostream &os) {
00202 serialize(os, 0);
00203 }
00204 
00206 unsigned getBits () const {
00207 return dim_ * CHUNK_BIT;
00208 }
00209 
00211 unsigned getChunks () const {
00212 return dim_;
00213 }
00214 };
00215 
00217 
00228 template <typename CHUNK = unsigned char>
00229 class WeightedHammingHelper
00230 {
00231 public:
00232 static const unsigned CHUNK_BIT = sizeof(CHUNK) * 8;
00234 
00237 WeightedHammingHelper(unsigned chunks)
00238 : nchunk_(chunks), lookup_(1 << CHUNK_BIT, chunks)
00239 {
00240 }
00241 
00243 
00247 void update (const CHUNK *in, const float *asym)
00248 {
00249 unsigned l = 0;
00250 for (unsigned i = 0; i < nchunk_; ++i) {
00251 CHUNK q = in[i];
00252 CHUNK p = 0;
00253 for (;;) {
00254 CHUNK c = q ^ p;
00255 float v = 0;
00256 for (unsigned j = 0; j < CHUNK_BIT; ++j) {
00257 if (c & (1 << j)) {
00258 v += asym[l + j];
00259 }
00260 }
00261 lookup_[i][p] = v;
00262 ++p;
00263 if (p == 0) break;
00264 }
00265 l += CHUNK_BIT;
00266 }
00267 }
00268 
00270 float distTo (const CHUNK *in)
00271 {
00272 float dist = 0;
00273 for (unsigned i = 0; i < nchunk_; ++i) {
00274 dist += lookup_[i][in[i]];
00275 }
00276 return dist;
00277 }
00278 
00279 private:
00280 unsigned nchunk_;
00281 Matrix<float> lookup_;
00282 };
00283 
00284 
00285 }
00286 
00287 #endif
00288 

Get LSHKIT at SourceForge.net. Fast, secure and Free Open Source software downloads doxygen

AltStyle によって変換されたページ (->オリジナル) /