tesseract  3.04.00
intmatcher.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.c
3  ** Purpose: Generic high level classification routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created.
6  ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
7  ** support for adaptive matching.
8  ** (c) Copyright Hewlett-Packard Company, 1988.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  ******************************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 /*----------------------------------------------------------------------------
26  Include Files and Type Defines
27 ----------------------------------------------------------------------------*/
28 #include "intmatcher.h"
29 
30 #include "fontinfo.h"
31 #include "intproto.h"
32 #include "callcpp.h"
33 #include "scrollview.h"
34 #include "float2int.h"
35 #include "globals.h"
36 #include "helpers.h"
37 #include "classify.h"
38 #include "shapetable.h"
39 #include <math.h>
40 
43 
44 /*----------------------------------------------------------------------------
45  Global Data Definitions and Declarations
46 ----------------------------------------------------------------------------*/
47 // Parameters of the sigmoid used to convert similarity to evidence in the
48 // similarity_evidence_table_ that is used to convert distance metric to an
49 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
51 const float IntegerMatcher::kSimilarityCenter = 0.0075;
52 
53 #define offset_table_entries \
54  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
55  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
56  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \
57  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \
58  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
59  0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
60  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \
61  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \
62  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
63  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
64  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
65 
66 #define INTMATCHER_OFFSET_TABLE_SIZE 256
67 
68 #define next_table_entries \
69  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \
70  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \
71  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \
72  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \
73  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \
74  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \
75  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \
76  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \
77  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \
78  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \
79  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \
80  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \
81  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \
82  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \
83  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \
84  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \
85  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \
86  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \
87  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \
88  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \
89  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
90 
91 // See http://b/19318793 (#6) for a complete discussion. Merging arrays
92 // offset_table and next_table helps improve performance of PIE code.
93 static const uinT8 data_table[512] = {offset_table_entries, next_table_entries};
94 
95 static const uinT8* const offset_table = &data_table[0];
96 static const uinT8* const next_table =
97  &data_table[INTMATCHER_OFFSET_TABLE_SIZE];
98 
99 namespace tesseract {
100 
101 // Encapsulation of the intermediate data and computations made by the class
102 // pruner. The class pruner implements a simple linear classifier on binary
103 // features by heavily quantizing the feature space, and applying
104 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
105 // weights is compensated by a non-constant bias that is dependent on the
106 // number of features present.
107 class ClassPruner {
108  public:
109  ClassPruner(int max_classes) {
110  // The unrolled loop in ComputeScores means that the array sizes need to
111  // be rounded up so that the array is big enough to accommodate the extra
112  // entries accessed by the unrolling. Each pruner word is of sized
113  // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
114  // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
115  // See ComputeScores.
116  max_classes_ = max_classes;
117  rounded_classes_ = RoundUp(
119  class_count_ = new int[rounded_classes_];
120  norm_count_ = new int[rounded_classes_];
121  sort_key_ = new int[rounded_classes_ + 1];
122  sort_index_ = new int[rounded_classes_ + 1];
123  for (int i = 0; i < rounded_classes_; i++) {
124  class_count_[i] = 0;
125  }
126  pruning_threshold_ = 0;
127  num_features_ = 0;
128  num_classes_ = 0;
129  }
130 
132  delete []class_count_;
133  delete []norm_count_;
134  delete []sort_key_;
135  delete []sort_index_;
136  }
137 
138  // Computes the scores for every class in the character set, by summing the
139  // weights for each feature and stores the sums internally in class_count_.
140  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
141  int num_features, const INT_FEATURE_STRUCT* features) {
142  num_features_ = num_features;
143  int num_pruners = int_templates->NumClassPruners;
144  for (int f = 0; f < num_features; ++f) {
145  const INT_FEATURE_STRUCT* feature = &features[f];
146  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
147  int x = feature->X * NUM_CP_BUCKETS >> 8;
148  int y = feature->Y * NUM_CP_BUCKETS >> 8;
149  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
150  int class_id = 0;
151  // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
152  // we need a collection of them, indexed by pruner_set.
153  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
154  // Look up quantized feature in a 3-D array, an array of weights for
155  // each class.
156  const uinT32* pruner_word_ptr =
157  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
158  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
159  uinT32 pruner_word = *pruner_word_ptr++;
160  // This inner loop is unrolled to speed up the ClassPruner.
161  // Currently gcc would not unroll it unless it is set to O3
162  // level of optimization or -funroll-loops is specified.
163  /*
164  uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
165  for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
166  class_count_[class_id++] += pruner_word & class_mask;
167  pruner_word >>= NUM_BITS_PER_CLASS;
168  }
169  */
170  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
171  pruner_word >>= NUM_BITS_PER_CLASS;
172  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
173  pruner_word >>= NUM_BITS_PER_CLASS;
174  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
175  pruner_word >>= NUM_BITS_PER_CLASS;
176  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
177  pruner_word >>= NUM_BITS_PER_CLASS;
178  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
179  pruner_word >>= NUM_BITS_PER_CLASS;
180  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
181  pruner_word >>= NUM_BITS_PER_CLASS;
182  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
183  pruner_word >>= NUM_BITS_PER_CLASS;
184  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
185  pruner_word >>= NUM_BITS_PER_CLASS;
186  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
187  pruner_word >>= NUM_BITS_PER_CLASS;
188  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
189  pruner_word >>= NUM_BITS_PER_CLASS;
190  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
191  pruner_word >>= NUM_BITS_PER_CLASS;
192  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
193  pruner_word >>= NUM_BITS_PER_CLASS;
194  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
195  pruner_word >>= NUM_BITS_PER_CLASS;
196  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
197  pruner_word >>= NUM_BITS_PER_CLASS;
198  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
199  pruner_word >>= NUM_BITS_PER_CLASS;
200  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
201  }
202  }
203  }
204  }
205 
206  // Adjusts the scores according to the number of expected features. Used
207  // in lieu of a constant bias, this penalizes classes that expect more
208  // features than there are present. Thus an actual c will score higher for c
209  // than e, even though almost all the features match e as well as c, because
210  // e expects more features to be present.
211  void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
212  int cutoff_strength) {
213  for (int class_id = 0; class_id < max_classes_; ++class_id) {
214  if (num_features_ < expected_num_features[class_id]) {
215  int deficit = expected_num_features[class_id] - num_features_;
216  class_count_[class_id] -= class_count_[class_id] * deficit /
217  (num_features_ * cutoff_strength + deficit);
218  }
219  }
220  }
221 
222  // Zeros the scores for classes disabled in the unicharset.
223  // Implements the black-list to recognize a subset of the character set.
224  void DisableDisabledClasses(const UNICHARSET& unicharset) {
225  for (int class_id = 0; class_id < max_classes_; ++class_id) {
226  if (!unicharset.get_enabled(class_id))
227  class_count_[class_id] = 0; // This char is disabled!
228  }
229  }
230 
231  // Zeros the scores of fragments.
232  void DisableFragments(const UNICHARSET& unicharset) {
233  for (int class_id = 0; class_id < max_classes_; ++class_id) {
234  // Do not include character fragments in the class pruner
235  // results if disable_character_fragments is true.
236  if (unicharset.get_fragment(class_id)) {
237  class_count_[class_id] = 0;
238  }
239  }
240  }
241 
242  // Normalizes the counts for xheight, putting the normalized result in
243  // norm_count_. Applies a simple subtractive penalty for incorrect vertical
244  // position provided by the normalization_factors array, indexed by
245  // character class, and scaled by the norm_multiplier.
246  void NormalizeForXheight(int norm_multiplier,
247  const uinT8* normalization_factors) {
248  for (int class_id = 0; class_id < max_classes_; class_id++) {
249  norm_count_[class_id] = class_count_[class_id] -
250  ((norm_multiplier * normalization_factors[class_id]) >> 8);
251  }
252  }
253 
254  // The nop normalization copies the class_count_ array to norm_count_.
256  for (int class_id = 0; class_id < max_classes_; class_id++) {
257  norm_count_[class_id] = class_count_[class_id];
258  }
259  }
260 
261  // Prunes the classes using <the maximum count> * pruning_factor/256 as a
262  // threshold for keeping classes. If max_of_non_fragments, then ignore
263  // fragments in computing the maximum count.
264  void PruneAndSort(int pruning_factor, int keep_this,
265  bool max_of_non_fragments, const UNICHARSET& unicharset) {
266  int max_count = 0;
267  for (int c = 0; c < max_classes_; ++c) {
268  if (norm_count_[c] > max_count &&
269  // This additional check is added in order to ensure that
270  // the classifier will return at least one non-fragmented
271  // character match.
272  // TODO(daria): verify that this helps accuracy and does not
273  // hurt performance.
274  (!max_of_non_fragments || !unicharset.get_fragment(c))) {
275  max_count = norm_count_[c];
276  }
277  }
278  // Prune Classes.
279  pruning_threshold_ = (max_count * pruning_factor) >> 8;
280  // Select Classes.
281  if (pruning_threshold_ < 1)
282  pruning_threshold_ = 1;
283  num_classes_ = 0;
284  for (int class_id = 0; class_id < max_classes_; class_id++) {
285  if (norm_count_[class_id] >= pruning_threshold_ ||
286  class_id == keep_this) {
287  ++num_classes_;
288  sort_index_[num_classes_] = class_id;
289  sort_key_[num_classes_] = norm_count_[class_id];
290  }
291  }
292 
293  // Sort Classes using Heapsort Algorithm.
294  if (num_classes_ > 1)
295  HeapSort(num_classes_, sort_key_, sort_index_);
296  }
297 
298  // Prints debug info on the class pruner matches for the pruned classes only.
299  void DebugMatch(const Classify& classify,
300  const INT_TEMPLATES_STRUCT* int_templates,
301  const INT_FEATURE_STRUCT* features) const {
302  int num_pruners = int_templates->NumClassPruners;
303  int max_num_classes = int_templates->NumClasses;
304  for (int f = 0; f < num_features_; ++f) {
305  const INT_FEATURE_STRUCT* feature = &features[f];
306  tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
307  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
308  int x = feature->X * NUM_CP_BUCKETS >> 8;
309  int y = feature->Y * NUM_CP_BUCKETS >> 8;
310  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
311  int class_id = 0;
312  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
313  // Look up quantized feature in a 3-D array, an array of weights for
314  // each class.
315  const uinT32* pruner_word_ptr =
316  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
317  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
318  uinT32 pruner_word = *pruner_word_ptr++;
319  for (int word_class = 0; word_class < 16 &&
320  class_id < max_num_classes; ++word_class, ++class_id) {
321  if (norm_count_[class_id] >= pruning_threshold_) {
322  tprintf(" %s=%d,",
323  classify.ClassIDToDebugStr(int_templates,
324  class_id, 0).string(),
325  pruner_word & CLASS_PRUNER_CLASS_MASK);
326  }
327  pruner_word >>= NUM_BITS_PER_CLASS;
328  }
329  }
330  tprintf("\n");
331  }
332  }
333  }
334 
335  // Prints a summary of the pruner result.
336  void SummarizeResult(const Classify& classify,
337  const INT_TEMPLATES_STRUCT* int_templates,
338  const uinT16* expected_num_features,
339  int norm_multiplier,
340  const uinT8* normalization_factors) const {
341  tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
342  for (int i = 0; i < num_classes_; ++i) {
343  int class_id = sort_index_[num_classes_ - i];
344  STRING class_string = classify.ClassIDToDebugStr(int_templates,
345  class_id, 0);
346  tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
347  class_string.string(),
348  class_count_[class_id],
349  expected_num_features[class_id],
350  (norm_multiplier * normalization_factors[class_id]) >> 8,
351  sort_key_[num_classes_ - i],
352  100.0 - 100.0 * sort_key_[num_classes_ - i] /
353  (CLASS_PRUNER_CLASS_MASK * num_features_));
354  }
355  }
356 
357  // Copies the pruned, sorted classes into the output results and returns
358  // the number of classes.
360  CP_RESULT_STRUCT empty;
361  results->init_to_size(num_classes_, empty);
362  for (int c = 0; c < num_classes_; ++c) {
363  (*results)[c].Class = sort_index_[num_classes_ - c];
364  (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
365  (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
366  }
367  return num_classes_;
368  }
369 
370  private:
371  // Array[rounded_classes_] of initial counts for each class.
372  int *class_count_;
373  // Array[rounded_classes_] of modified counts for each class after normalizing
374  // for expected number of features, disabled classes, fragments, and xheights.
375  int *norm_count_;
376  // Array[rounded_classes_ +1] of pruned counts that gets sorted
377  int *sort_key_;
378  // Array[rounded_classes_ +1] of classes corresponding to sort_key_.
379  int *sort_index_;
380  // Number of classes in this class pruner.
381  int max_classes_;
382  // Rounded up number of classes used for array sizes.
383  int rounded_classes_;
384  // Threshold count applied to prune classes.
385  int pruning_threshold_;
386  // The number of features used to compute the scores.
387  int num_features_;
388  // Final number of pruned classes.
389  int num_classes_;
390 };
391 
392 /*----------------------------------------------------------------------------
393  Public Code
394 ----------------------------------------------------------------------------*/
395 /*---------------------------------------------------------------------------*/
396 // Runs the class pruner from int_templates on the given features, returning
397 // the number of classes output in results.
398 // int_templates Class pruner tables
399 // num_features Number of features in blob
400 // features Array of features
401 // normalization_factors Array of fudge factors from blob
402 // normalization process (by CLASS_INDEX)
403 // expected_num_features Array of expected number of features
404 // for each class (by CLASS_INDEX)
405 // results Sorted Array of pruned classes. Must be an array
406 // of size at least int_templates->NumClasses.
408  int num_features, int keep_this,
409  const INT_FEATURE_STRUCT* features,
410  const uinT8* normalization_factors,
411  const uinT16* expected_num_features,
413 /*
414  ** Operation:
415  ** Prunes the classes using a modified fast match table.
416  ** Returns a sorted list of classes along with the number
417  ** of pruned classes in that list.
418  ** Return: Number of pruned classes.
419  ** Exceptions: none
420  ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
421  */
422  ClassPruner pruner(int_templates->NumClasses);
423  // Compute initial match scores for all classes.
424  pruner.ComputeScores(int_templates, num_features, features);
425  // Adjust match scores for number of expected features.
426  pruner.AdjustForExpectedNumFeatures(expected_num_features,
427  classify_cp_cutoff_strength);
428  // Apply disabled classes in unicharset - only works without a shape_table.
429  if (shape_table_ == NULL)
430  pruner.DisableDisabledClasses(unicharset);
431  // If fragments are disabled, remove them, also only without a shape table.
432  if (disable_character_fragments && shape_table_ == NULL)
433  pruner.DisableFragments(unicharset);
434 
435  // If we have good x-heights, apply the given normalization factors.
436  if (normalization_factors != NULL) {
437  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
438  normalization_factors);
439  } else {
440  pruner.NoNormalization();
441  }
442  // Do the actual pruning and sort the short-list.
443  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
444  shape_table_ == NULL, unicharset);
445 
446  if (classify_debug_level > 2) {
447  pruner.DebugMatch(*this, int_templates, features);
448  }
449  if (classify_debug_level > 1) {
450  pruner.SummarizeResult(*this, int_templates, expected_num_features,
451  classify_class_pruner_multiplier,
452  normalization_factors);
453  }
454  // Convert to the expected output format.
455  return pruner.SetupResults(results);
456 }
457 
458 } // namespace tesseract
459 
460 /*---------------------------------------------------------------------------*/
461 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
462  BIT_VECTOR ProtoMask,
463  BIT_VECTOR ConfigMask,
464  inT16 NumFeatures,
465  const INT_FEATURE_STRUCT* Features,
466  UnicharRating* Result,
467  int AdaptFeatureThreshold,
468  int Debug,
469  bool SeparateDebugWindows) {
470 /*
471  ** Parameters:
472  ** ClassTemplate Prototypes & tables for a class
473  ** BlobLength Length of unormalized blob
474  ** NumFeatures Number of features in blob
475  ** Features Array of features
476  ** NormalizationFactor Fudge factor from blob
477  ** normalization process
478  ** Result Class rating & configuration:
479  ** (0.0 -> 1.0), 0=bad, 1=good
480  ** Debug Debugger flag: 1=debugger on
481  ** Globals:
482  ** local_matcher_multiplier_ Normalization factor multiplier
483  ** Operation:
484  ** IntegerMatcher returns the best configuration and rating
485  ** for a single class. The class matched against is determined
486  ** by the uniqueness of the ClassTemplate parameter. The
487  ** best rating and its associated configuration are returned.
488  ** Return:
489  ** Exceptions: none
490  ** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
491  */
492  ScratchEvidence *tables = new ScratchEvidence();
493  int Feature;
494  int BestMatch;
495 
496  if (MatchDebuggingOn (Debug))
497  cprintf ("Integer Matcher -------------------------------------------\n");
498 
499  tables->Clear(ClassTemplate);
500  Result->feature_misses = 0;
501 
502  for (Feature = 0; Feature < NumFeatures; Feature++) {
503  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
504  Feature, &Features[Feature],
505  tables, Debug);
506  // Count features that were missed over all configs.
507  if (csum == 0)
508  ++Result->feature_misses;
509  }
510 
511 #ifndef GRAPHICS_DISABLED
512  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
513  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
514  NumFeatures, Debug);
515  }
516 
517  if (DisplayProtoMatchesOn(Debug)) {
518  DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
519  *tables, SeparateDebugWindows);
520  }
521 
522  if (DisplayFeatureMatchesOn(Debug)) {
523  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
524  Features, AdaptFeatureThreshold, Debug,
525  SeparateDebugWindows);
526  }
527 #endif
528 
529  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
530  tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
531 
532  BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
533 
534 #ifndef GRAPHICS_DISABLED
535  if (PrintMatchSummaryOn(Debug))
536  Result->Print();
537 
538  if (MatchDebuggingOn(Debug))
539  cprintf("Match Complete --------------------------------------------\n");
540 #endif
541 
542  delete tables;
543 }
544 
545 
546 /*---------------------------------------------------------------------------*/
548  INT_CLASS ClassTemplate,
549  BIT_VECTOR ProtoMask,
550  BIT_VECTOR ConfigMask,
551  uinT16 BlobLength,
552  inT16 NumFeatures,
553  INT_FEATURE_ARRAY Features,
554  PROTO_ID *ProtoArray,
555  int AdaptProtoThreshold,
556  int Debug) {
557 /*
558  ** Parameters:
559  ** ClassTemplate Prototypes & tables for a class
560  ** ProtoMask AND Mask for proto word
561  ** ConfigMask AND Mask for config word
562  ** BlobLength Length of unormalized blob
563  ** NumFeatures Number of features in blob
564  ** Features Array of features
565  ** ProtoArray Array of good protos
566  ** AdaptProtoThreshold Threshold for good protos
567  ** Debug Debugger flag: 1=debugger on
568  ** Globals:
569  ** local_matcher_multiplier_ Normalization factor multiplier
570  ** Operation:
571  ** FindGoodProtos finds all protos whose normalized proto-evidence
572  ** exceed classify_adapt_proto_thresh. The list is ordered by increasing
573  ** proto id number.
574  ** Return:
575  ** Number of good protos in ProtoArray.
576  ** Exceptions: none
577  ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
578  */
579  ScratchEvidence *tables = new ScratchEvidence();
580  int NumGoodProtos = 0;
581 
582  /* DEBUG opening heading */
583  if (MatchDebuggingOn (Debug))
584  cprintf
585  ("Find Good Protos -------------------------------------------\n");
586 
587  tables->Clear(ClassTemplate);
588 
589  for (int Feature = 0; Feature < NumFeatures; Feature++)
590  UpdateTablesForFeature(
591  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
592  tables, Debug);
593 
594 #ifndef GRAPHICS_DISABLED
595  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
596  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
597  NumFeatures, Debug);
598 #endif
599 
600  /* Average Proto Evidences & Find Good Protos */
601  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
602  /* Compute Average for Actual Proto */
603  int Temp = 0;
604  for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
605  Temp += tables->proto_evidence_[proto][i];
606 
607  Temp /= ClassTemplate->ProtoLengths[proto];
608 
609  /* Find Good Protos */
610  if (Temp >= AdaptProtoThreshold) {
611  *ProtoArray = proto;
612  ProtoArray++;
613  NumGoodProtos++;
614  }
615  }
616 
617  if (MatchDebuggingOn (Debug))
618  cprintf ("Match Complete --------------------------------------------\n");
619  delete tables;
620 
621  return NumGoodProtos;
622 }
623 
624 
625 /*---------------------------------------------------------------------------*/
627  INT_CLASS ClassTemplate,
628  BIT_VECTOR ProtoMask,
629  BIT_VECTOR ConfigMask,
630  uinT16 BlobLength,
631  inT16 NumFeatures,
632  INT_FEATURE_ARRAY Features,
633  FEATURE_ID *FeatureArray,
634  int AdaptFeatureThreshold,
635  int Debug) {
636 /*
637  ** Parameters:
638  ** ClassTemplate Prototypes & tables for a class
639  ** ProtoMask AND Mask for proto word
640  ** ConfigMask AND Mask for config word
641  ** BlobLength Length of unormalized blob
642  ** NumFeatures Number of features in blob
643  ** Features Array of features
644  ** FeatureArray Array of bad features
645  ** AdaptFeatureThreshold Threshold for bad features
646  ** Debug Debugger flag: 1=debugger on
647  ** Operation:
648  ** FindBadFeatures finds all features with maximum feature-evidence <
649  ** AdaptFeatureThresh. The list is ordered by increasing feature number.
650  ** Return:
651  ** Number of bad features in FeatureArray.
652  ** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
653  */
654  ScratchEvidence *tables = new ScratchEvidence();
655  int NumBadFeatures = 0;
656 
657  /* DEBUG opening heading */
658  if (MatchDebuggingOn(Debug))
659  cprintf("Find Bad Features -------------------------------------------\n");
660 
661  tables->Clear(ClassTemplate);
662 
663  for (int Feature = 0; Feature < NumFeatures; Feature++) {
664  UpdateTablesForFeature(
665  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
666  tables, Debug);
667 
668  /* Find Best Evidence for Current Feature */
669  int best = 0;
670  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
671  if (tables->feature_evidence_[i] > best)
672  best = tables->feature_evidence_[i];
673 
674  /* Find Bad Features */
675  if (best < AdaptFeatureThreshold) {
676  *FeatureArray = Feature;
677  FeatureArray++;
678  NumBadFeatures++;
679  }
680  }
681 
682 #ifndef GRAPHICS_DISABLED
683  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
684  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
685  NumFeatures, Debug);
686 #endif
687 
688  if (MatchDebuggingOn(Debug))
689  cprintf("Match Complete --------------------------------------------\n");
690 
691  delete tables;
692  return NumBadFeatures;
693 }
694 
695 
696 /*---------------------------------------------------------------------------*/
697 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
698  classify_debug_level_ = classify_debug_level;
699 
700  /* Initialize table for evidence to similarity lookup */
701  for (int i = 0; i < SE_TABLE_SIZE; i++) {
702  uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
703  double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
704  double evidence = Similarity / kSimilarityCenter;
705  evidence = 255.0 / (evidence * evidence + 1.0);
706 
707  if (kSEExponentialMultiplier > 0.0) {
708  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
709  exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
710  evidence *= ClipToRange(scale, 0.0, 1.0);
711  }
712 
713  similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
714  }
715 
716  /* Initialize evidence computation variables */
717  evidence_table_mask_ =
718  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
719  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
720  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
721  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
722 }
723 
724 
728 void ScratchEvidence::Clear(const INT_CLASS class_template) {
729  memset(sum_feature_evidence_, 0,
730  class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
731  memset(proto_evidence_, 0,
732  class_template->NumProtos * sizeof(proto_evidence_[0]));
733 }
734 
736  memset(feature_evidence_, 0,
737  class_template->NumConfigs * sizeof(feature_evidence_[0]));
738 }
739 
740 
741 
742 /*---------------------------------------------------------------------------*/
743 void IMDebugConfiguration(int FeatureNum,
744  uinT16 ActualProtoNum,
745  uinT8 Evidence,
746  BIT_VECTOR ConfigMask,
747  uinT32 ConfigWord) {
748 /*
749  ** Parameters:
750  ** Globals:
751  ** Operation:
752  ** Print debugging information for Configuations
753  ** Return:
754  ** Exceptions: none
755  ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
756  */
757  cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
758  FeatureNum, (int) ActualProtoNum, (int) Evidence);
759  while (ConfigWord) {
760  if (ConfigWord & 1)
761  cprintf ("1");
762  else
763  cprintf ("0");
764  ConfigWord >>= 1;
765  }
766  cprintf ("\n");
767 }
768 
769 
770 /*---------------------------------------------------------------------------*/
771 void IMDebugConfigurationSum(int FeatureNum,
772  uinT8 *FeatureEvidence,
773  inT32 ConfigCount) {
774 /*
775  ** Parameters:
776  ** Globals:
777  ** Operation:
778  ** Print debugging information for Configuations
779  ** Return:
780  ** Exceptions: none
781  ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
782  */
783  cprintf("F=%3d, C=", FeatureNum);
784  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
785  cprintf("%4d", FeatureEvidence[ConfigNum]);
786  }
787  cprintf("\n");
788 }
789 
790 
791 
792 /*---------------------------------------------------------------------------*/
793 int IntegerMatcher::UpdateTablesForFeature(
794  INT_CLASS ClassTemplate,
795  BIT_VECTOR ProtoMask,
796  BIT_VECTOR ConfigMask,
797  int FeatureNum,
798  const INT_FEATURE_STRUCT* Feature,
799  ScratchEvidence *tables,
800  int Debug) {
801 /*
802  ** Parameters:
803  ** ClassTemplate Prototypes & tables for a class
804  ** FeatureNum Current feature number (for DEBUG only)
805  ** Feature Pointer to a feature struct
806  ** tables Evidence tables
807  ** Debug Debugger flag: 1=debugger on
808  ** Operation:
809  ** For the given feature: prune protos, compute evidence,
810  ** update Feature Evidence, Proto Evidence, and Sum of Feature
811  ** Evidence tables.
812  ** Return:
813  */
814  register uinT32 ConfigWord;
815  register uinT32 ProtoWord;
816  register uinT32 ProtoNum;
817  register uinT32 ActualProtoNum;
818  uinT8 proto_byte;
819  inT32 proto_word_offset;
820  inT32 proto_offset;
821  uinT8 config_byte;
822  inT32 config_offset;
823  PROTO_SET ProtoSet;
824  uinT32 *ProtoPrunerPtr;
825  INT_PROTO Proto;
826  int ProtoSetIndex;
827  uinT8 Evidence;
828  uinT32 XFeatureAddress;
829  uinT32 YFeatureAddress;
830  uinT32 ThetaFeatureAddress;
831  register uinT8 *UINT8Pointer;
832  register int ProtoIndex;
833  uinT8 Temp;
834  register int *IntPointer;
835  int ConfigNum;
836  register inT32 M3;
837  register inT32 A3;
838  register uinT32 A4;
839 
840  tables->ClearFeatureEvidence(ClassTemplate);
841 
842  /* Precompute Feature Address offset for Proto Pruning */
843  XFeatureAddress = ((Feature->X >> 2) << 1);
844  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
845  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
846 
847  for (ProtoSetIndex = 0, ActualProtoNum = 0;
848  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
849  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
850  ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
851  for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
852  ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
853  (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
854  /* Prune Protos of current Proto Set */
855  ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
856  ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
857  ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
858  ProtoWord &= *ProtoMask;
859 
860  if (ProtoWord != 0) {
861  proto_byte = ProtoWord & 0xff;
862  ProtoWord >>= 8;
863  proto_word_offset = 0;
864  while (ProtoWord != 0 || proto_byte != 0) {
865  while (proto_byte == 0) {
866  proto_byte = ProtoWord & 0xff;
867  ProtoWord >>= 8;
868  proto_word_offset += 8;
869  }
870  proto_offset = offset_table[proto_byte] + proto_word_offset;
871  proto_byte = next_table[proto_byte];
872  Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
873  ConfigWord = Proto->Configs[0];
874  A3 = (((Proto->A * (Feature->X - 128)) << 1)
875  - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
876  M3 =
877  (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
878 
879  if (A3 < 0)
880  A3 = ~A3;
881  if (M3 < 0)
882  M3 = ~M3;
883  A3 >>= mult_trunc_shift_bits_;
884  M3 >>= mult_trunc_shift_bits_;
885  if (A3 > evidence_mult_mask_)
886  A3 = evidence_mult_mask_;
887  if (M3 > evidence_mult_mask_)
888  M3 = evidence_mult_mask_;
889 
890  A4 = (A3 * A3) + (M3 * M3);
891  A4 >>= table_trunc_shift_bits_;
892  if (A4 > evidence_table_mask_)
893  Evidence = 0;
894  else
895  Evidence = similarity_evidence_table_[A4];
896 
897  if (PrintFeatureMatchesOn (Debug))
898  IMDebugConfiguration (FeatureNum,
899  ActualProtoNum + proto_offset,
900  Evidence, ConfigMask, ConfigWord);
901 
902  ConfigWord &= *ConfigMask;
903 
904  UINT8Pointer = tables->feature_evidence_ - 8;
905  config_byte = 0;
906  while (ConfigWord != 0 || config_byte != 0) {
907  while (config_byte == 0) {
908  config_byte = ConfigWord & 0xff;
909  ConfigWord >>= 8;
910  UINT8Pointer += 8;
911  }
912  config_offset = offset_table[config_byte];
913  config_byte = next_table[config_byte];
914  if (Evidence > UINT8Pointer[config_offset])
915  UINT8Pointer[config_offset] = Evidence;
916  }
917 
918  UINT8Pointer =
919  &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
920  for (ProtoIndex =
921  ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
922  ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
923  if (Evidence > *UINT8Pointer) {
924  Temp = *UINT8Pointer;
925  *UINT8Pointer = Evidence;
926  Evidence = Temp;
927  }
928  else if (Evidence == 0)
929  break;
930  }
931  }
932  }
933  }
934  }
935 
936  if (PrintFeatureMatchesOn(Debug)) {
937  IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
938  ClassTemplate->NumConfigs);
939  }
940 
941  IntPointer = tables->sum_feature_evidence_;
942  UINT8Pointer = tables->feature_evidence_;
943  int SumOverConfigs = 0;
944  for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
945  int evidence = *UINT8Pointer++;
946  SumOverConfigs += evidence;
947  *IntPointer++ += evidence;
948  }
949  return SumOverConfigs;
950 }
951 
952 
953 /*---------------------------------------------------------------------------*/
954 #ifndef GRAPHICS_DISABLED
955 void IntegerMatcher::DebugFeatureProtoError(
956  INT_CLASS ClassTemplate,
957  BIT_VECTOR ProtoMask,
958  BIT_VECTOR ConfigMask,
959  const ScratchEvidence& tables,
960  inT16 NumFeatures,
961  int Debug) {
962 /*
963  ** Parameters:
964  ** Globals:
965  ** Operation:
966  ** Print debugging information for Configuations
967  ** Return:
968  ** Exceptions: none
969  ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
970  */
971  FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
972  int ConfigNum;
973  uinT32 ConfigWord;
974  int ProtoSetIndex;
975  uinT16 ProtoNum;
976  uinT8 ProtoWordNum;
977  PROTO_SET ProtoSet;
978  uinT16 ActualProtoNum;
979 
980  if (PrintMatchSummaryOn(Debug)) {
981  cprintf("Configuration Mask:\n");
982  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
983  cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
984  cprintf("\n");
985 
986  cprintf("Feature Error for Configurations:\n");
987  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
988  cprintf(
989  " %5.1f",
990  100.0 * (1.0 -
991  (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
992  / NumFeatures / 256.0));
993  }
994  cprintf("\n\n\n");
995  }
996 
997  if (PrintMatchSummaryOn (Debug)) {
998  cprintf ("Proto Mask:\n");
999  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1000  ProtoSetIndex++) {
1001  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1002  for (ProtoWordNum = 0; ProtoWordNum < 2;
1003  ProtoWordNum++, ProtoMask++) {
1004  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1005  for (ProtoNum = 0;
1006  ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
1007  && (ActualProtoNum < ClassTemplate->NumProtos));
1008  ProtoNum++, ActualProtoNum++)
1009  cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
1010  cprintf ("\n");
1011  }
1012  }
1013  cprintf ("\n");
1014  }
1015 
1016  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
1017  ProtoConfigs[i] = 0;
1018 
1019  if (PrintProtoMatchesOn (Debug)) {
1020  cprintf ("Proto Evidence:\n");
1021  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1022  ProtoSetIndex++) {
1023  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1024  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1025  for (ProtoNum = 0;
1026  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1027  (ActualProtoNum < ClassTemplate->NumProtos));
1028  ProtoNum++, ActualProtoNum++) {
1029  cprintf ("P %3d =", ActualProtoNum);
1030  int temp = 0;
1031  for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
1032  uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
1033  cprintf(" %d", data);
1034  temp += data;
1035  }
1036 
1037  cprintf(" = %6.4f%%\n",
1038  temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
1039 
1040  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1041  ConfigNum = 0;
1042  while (ConfigWord) {
1043  cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1044  if (ConfigWord & 1)
1045  ProtoConfigs[ConfigNum] += temp;
1046  ConfigNum++;
1047  ConfigWord >>= 1;
1048  }
1049  cprintf("\n");
1050  }
1051  }
1052  }
1053 
1054  if (PrintMatchSummaryOn (Debug)) {
1055  cprintf ("Proto Error for Configurations:\n");
1056  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1057  cprintf (" %5.1f",
1058  100.0 * (1.0 -
1059  ProtoConfigs[ConfigNum] /
1060  ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1061  cprintf ("\n\n");
1062  }
1063 
1064  if (PrintProtoMatchesOn (Debug)) {
1065  cprintf ("Proto Sum for Configurations:\n");
1066  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1067  cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1068  cprintf ("\n\n");
1069 
1070  cprintf ("Proto Length for Configurations:\n");
1071  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1072  cprintf (" %4.1f",
1073  (float) ClassTemplate->ConfigLengths[ConfigNum]);
1074  cprintf ("\n\n");
1075  }
1076 
1077 }
1078 
1079 
1080 /*---------------------------------------------------------------------------*/
1081 void IntegerMatcher::DisplayProtoDebugInfo(
1082  INT_CLASS ClassTemplate,
1083  BIT_VECTOR ProtoMask,
1084  BIT_VECTOR ConfigMask,
1085  const ScratchEvidence& tables,
1086  bool SeparateDebugWindows) {
1087  uinT16 ProtoNum;
1088  uinT16 ActualProtoNum;
1089  PROTO_SET ProtoSet;
1090  int ProtoSetIndex;
1091 
1093  if (SeparateDebugWindows) {
1096  }
1097 
1098 
1099  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1100  ProtoSetIndex++) {
1101  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1102  ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1103  for (ProtoNum = 0;
1104  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1105  (ActualProtoNum < ClassTemplate->NumProtos));
1106  ProtoNum++, ActualProtoNum++) {
1107  /* Compute Average for Actual Proto */
1108  int temp = 0;
1109  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1110  temp += tables.proto_evidence_[ActualProtoNum][i];
1111 
1112  temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1113 
1114  if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1115  DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1116  }
1117  }
1118  }
1119 }
1120 
1121 
1122 /*---------------------------------------------------------------------------*/
1123 void IntegerMatcher::DisplayFeatureDebugInfo(
1124  INT_CLASS ClassTemplate,
1125  BIT_VECTOR ProtoMask,
1126  BIT_VECTOR ConfigMask,
1127  inT16 NumFeatures,
1128  const INT_FEATURE_STRUCT* Features,
1129  int AdaptFeatureThreshold,
1130  int Debug,
1131  bool SeparateDebugWindows) {
1132  ScratchEvidence *tables = new ScratchEvidence();
1133 
1134  tables->Clear(ClassTemplate);
1135 
1137  if (SeparateDebugWindows) {
1140  }
1141 
1142  for (int Feature = 0; Feature < NumFeatures; Feature++) {
1143  UpdateTablesForFeature(
1144  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1145  tables, 0);
1146 
1147  /* Find Best Evidence for Current Feature */
1148  int best = 0;
1149  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
1150  if (tables->feature_evidence_[i] > best)
1151  best = tables->feature_evidence_[i];
1152 
1153  /* Update display for current feature */
1154  if (ClipMatchEvidenceOn(Debug)) {
1155  if (best < AdaptFeatureThreshold)
1156  DisplayIntFeature(&Features[Feature], 0.0);
1157  else
1158  DisplayIntFeature(&Features[Feature], 1.0);
1159  } else {
1160  DisplayIntFeature(&Features[Feature], best / 255.0);
1161  }
1162  }
1163 
1164  delete tables;
1165 }
1166 #endif
1167 
1168 /*---------------------------------------------------------------------------*/
1169 // Add sum of Proto Evidences into Sum Of Feature Evidence Array
1171  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
1172 
1173  int *IntPointer;
1174  uinT32 ConfigWord;
1175  int ProtoSetIndex;
1176  uinT16 ProtoNum;
1177  PROTO_SET ProtoSet;
1178  int NumProtos;
1179  uinT16 ActualProtoNum;
1180 
1181  NumProtos = ClassTemplate->NumProtos;
1182 
1183  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1184  ProtoSetIndex++) {
1185  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1186  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1187  for (ProtoNum = 0;
1188  ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1189  ProtoNum++, ActualProtoNum++) {
1190  int temp = 0;
1191  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1192  temp += proto_evidence_[ActualProtoNum] [i];
1193 
1194  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1195  ConfigWord &= *ConfigMask;
1196  IntPointer = sum_feature_evidence_;
1197  while (ConfigWord) {
1198  if (ConfigWord & 1)
1199  *IntPointer += temp;
1200  IntPointer++;
1201  ConfigWord >>= 1;
1202  }
1203  }
1204  }
1205 }
1206 
1207 
1208 
1209 /*---------------------------------------------------------------------------*/
1210 // Normalize Sum of Proto and Feature Evidence by dividing by the sum of
1211 // the Feature Lengths and the Proto Lengths for each configuration.
1213  INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
1214 
1215  for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
1216  sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
1217  (NumFeatures + ClassTemplate->ConfigLengths[i]);
1218  }
1219 }
1220 
1221 
1222 /*---------------------------------------------------------------------------*/
1223 int IntegerMatcher::FindBestMatch(
1224  INT_CLASS class_template,
1225  const ScratchEvidence &tables,
1226  UnicharRating* result) {
1227 /*
1228  ** Parameters:
1229  ** Globals:
1230  ** Operation:
1231  ** Find the best match for the current class and update the Result
1232  ** with the configuration and match rating.
1233  ** Return:
1234  ** The best normalized sum of evidences
1235  ** Exceptions: none
1236  ** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
1237  */
1238  int best_match = 0;
1239  result->config = 0;
1240  result->fonts.truncate(0);
1241  result->fonts.reserve(class_template->NumConfigs);
1242 
1243  /* Find best match */
1244  for (int c = 0; c < class_template->NumConfigs; ++c) {
1245  int rating = tables.sum_feature_evidence_[c];
1246  if (*classify_debug_level_ > 2)
1247  tprintf("Config %d, rating=%d\n", c, rating);
1248  if (rating > best_match) {
1249  result->config = c;
1250  best_match = rating;
1251  }
1252  result->fonts.push_back(ScoredFont(c, rating));
1253  }
1254 
1255  // Compute confidence on a Probability scale.
1256  result->rating = best_match / 65536.0f;
1257 
1258  return best_match;
1259 }
1260 
1261 // Applies the CN normalization factor to the given rating and returns
1262 // the modified rating.
1263 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1264  int normalization_factor,
1265  int matcher_multiplier) {
1266  return (rating * blob_length +
1267  matcher_multiplier * normalization_factor / 256.0) /
1268  (blob_length + matcher_multiplier);
1269 }
1270 
1271 /*---------------------------------------------------------------------------*/
1272 void
1273 HeapSort (int n, register int ra[], register int rb[]) {
1274 /*
1275  ** Parameters:
1276  ** n Number of elements to sort
1277  ** ra Key array [1..n]
1278  ** rb Index array [1..n]
1279  ** Globals:
1280  ** Operation:
1281  ** Sort Key array in ascending order using heap sort
1282  ** algorithm. Also sort Index array that is tied to
1283  ** the key array.
1284  ** Return:
1285  ** Exceptions: none
1286  ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
1287  */
1288  register int i, rra, rrb;
1289  int l, j, ir;
1290 
1291  l = (n >> 1) + 1;
1292  ir = n;
1293  for (;;) {
1294  if (l > 1) {
1295  rra = ra[--l];
1296  rrb = rb[l];
1297  }
1298  else {
1299  rra = ra[ir];
1300  rrb = rb[ir];
1301  ra[ir] = ra[1];
1302  rb[ir] = rb[1];
1303  if (--ir == 1) {
1304  ra[1] = rra;
1305  rb[1] = rrb;
1306  return;
1307  }
1308  }
1309  i = l;
1310  j = l << 1;
1311  while (j <= ir) {
1312  if (j < ir && ra[j] < ra[j + 1])
1313  ++j;
1314  if (rra < ra[j]) {
1315  ra[i] = ra[j];
1316  rb[i] = rb[j];
1317  j += (i = j);
1318  }
1319  else
1320  j = ir + 1;
1321  }
1322  ra[i] = rra;
1323  rb[i] = rrb;
1324  }
1325 }
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
int inT32
Definition: host.h:102
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:200
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:232
#define offset_table_entries
Definition: intmatcher.cpp:53
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:626
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:299
#define tprintf(...)
Definition: tprintf.h:31
uinT8 NumProtoSets
Definition: intproto.h:109
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:728
SIGNED char inT8
Definition: host.h:98
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:201
#define PrintMatchSummaryOn(D)
Definition: intproto.h:198
void IMDebugConfigurationSum(int FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
Definition: intmatcher.cpp:771
#define BITS_PER_WERD
Definition: intproto.h:44
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
#define NULL
Definition: host.h:144
unsigned int uinT32
Definition: host.h:103
inT16 PROTO_ID
Definition: matchdefs.h:41
#define INTMATCHER_OFFSET_TABLE_SIZE
Definition: intmatcher.cpp:66
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:203
#define NUM_CP_BUCKETS
Definition: intproto.h:52
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:735
#define SE_TABLE_BITS
Definition: intmatcher.h:66
static const float kSimilarityCenter
Definition: intmatcher.h:94
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:547
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:697
bool disable_character_fragments
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:407
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence)
Definition: intproto.cpp:628
#define MatchDebuggingOn(D)
Definition: intproto.h:197
const char * string() const
Definition: strngs.cpp:193
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
ClassPruner(int max_classes)
Definition: intmatcher.cpp:109
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:199
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:359
short inT16
Definition: host.h:100
void init_to_size(int size, T t)
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
#define NUM_PP_BUCKETS
Definition: intproto.h:51
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
void NormalizeForXheight(int norm_multiplier, const uinT8 *normalization_factors)
Definition: intmatcher.cpp:246
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:224
Definition: strngs.h:44
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence)
Definition: intproto.cpp:650
unsigned short uinT16
Definition: host.h:101
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1956
void AdjustForExpectedNumFeatures(const uinT16 *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:211
void Print() const
Definition: shapetable.h:49
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:140
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
uinT16 NumProtos
Definition: intproto.h:108
#define PrintProtoMatchesOn(D)
Definition: intproto.h:202
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1935
uinT8 FEATURE_ID
Definition: matchdefs.h:47
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1967
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
unsigned char uinT8
Definition: host.h:99
uinT8 NumConfigs
Definition: intproto.h:110
#define next_table_entries
Definition: intmatcher.cpp:68
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:264
void HeapSort(int n, register int ra[], register int rb[])
float FLOAT32
Definition: host.h:111
uinT8 * ProtoLengths
Definition: intproto.h:112
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:55
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
void IMDebugConfiguration(int FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
Definition: intmatcher.cpp:743
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uinT16 *expected_num_features, int norm_multiplier, const uinT8 *normalization_factors) const
Definition: intmatcher.cpp:336
int RoundUp(int n, int block_size)
Definition: helpers.h:109