tesseract  3.04.00
fixspace.cpp File Reference
#include <ctype.h>
#include "reject.h"
#include "statistc.h"
#include "control.h"
#include "fixspace.h"
#include "genblob.h"
#include "tessvars.h"
#include "tessbox.h"
#include "globals.h"
#include "tesseractclass.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define PERFECT_WERDS   999
 
#define MAXSPACING   128 /*max expected spacing in pix */
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

#define MAXSPACING   128 /*max expected spacing in pix */

Definition at line 34 of file fixspace.cpp.

#define PERFECT_WERDS   999

Definition at line 33 of file fixspace.cpp.

Function Documentation

void fixspace_dbg ( WERD_RES word)

Definition at line 796 of file fixspace.cpp.

796  {
797  TBOX box = word->word->bounding_box();
798  BOOL8 show_map_detail = FALSE;
799  inT16 i;
800 
801  box.print();
802  tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
803  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
804  word->word->cblob_list()->length(),
805  word->rebuild_word->NumBlobs(),
806  word->box_word->length());
807  word->reject_map.print(debug_fp);
808  tprintf("\n");
809  if (show_map_detail) {
810  tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
811  for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
812  tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
813  word->reject_map[i].full_print(debug_fp);
814  }
815  }
816 
817  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
818  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
819 }
TBOX bounding_box() const
Definition: werd.cpp:160
void print(FILE *fp)
Definition: rejctmap.cpp:394
#define tprintf(...)
Definition: tprintf.h:31
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
void full_print(FILE *fp)
Definition: rejctmap.cpp:406
const int length() const
Definition: boxword.h:85
TWERD * rebuild_word
Definition: pageres.h:244
int NumBlobs() const
Definition: blobs.h:425
#define FALSE
Definition: capi.h:29
Definition: rect.h:30
const char * string() const
Definition: strngs.cpp:193
const STRING & unichar_string() const
Definition: ratngs.h:524
short inT16
Definition: host.h:100
BOOL8 done
Definition: pageres.h:282
WERD * word
Definition: pageres.h:175
unsigned char BOOL8
Definition: host.h:113
BOOL8 tess_accepted
Definition: pageres.h:280
tesseract::BoxWord * box_word
Definition: pageres.h:250
WERD_CHOICE * best_choice
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:271
FILE * debug_fp
Definition: tessvars.cpp:24
void print() const
Definition: rect.h:270
void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 177 of file fixspace.cpp.

177  {
178  WERD_RES_IT src_it(&src_list);
179  WERD_RES_IT new_it(&new_list);
180  WERD_RES *src_wd;
181  WERD_RES *new_wd;
182 
183  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
184  src_wd = src_it.data();
185  if (!src_wd->combination) {
186  new_wd = WERD_RES::deep_copy(src_wd);
187  new_wd->combination = FALSE;
188  new_wd->part_of_combo = FALSE;
189  new_it.add_after_then_move(new_wd);
190  }
191  }
192 }
#define FALSE
Definition: capi.h:29
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:630
BOOL8 combination
Definition: pageres.h:315
BOOL8 part_of_combo
Definition: pageres.h:316
void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 373 of file fixspace.cpp.

373  {
374  WERD_RES_IT word_it(&words);
375  WERD_RES_IT prev_word_it(&words);
376  WERD_RES *word;
377  WERD_RES *prev_word;
378  WERD_RES *combo;
379  WERD *copy_word;
380  inT16 prev_right = -MAX_INT16;
381  TBOX box;
382  inT16 gap;
383  inT16 min_gap = MAX_INT16;
384 
385  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
386  word = word_it.data();
387  if (!word->part_of_combo) {
388  box = word->word->bounding_box();
389  if (prev_right > -MAX_INT16) {
390  gap = box.left() - prev_right;
391  if (gap < min_gap)
392  min_gap = gap;
393  }
394  prev_right = box.right();
395  }
396  }
397  if (min_gap < MAX_INT16) {
398  prev_right = -MAX_INT16; // back to start
399  word_it.set_to_list(&words);
400  // Note: we can't use cycle_pt due to inserted combos at start of list.
401  for (; (prev_right == -MAX_INT16) || !word_it.at_first();
402  word_it.forward()) {
403  word = word_it.data();
404  if (!word->part_of_combo) {
405  box = word->word->bounding_box();
406  if (prev_right > -MAX_INT16) {
407  gap = box.left() - prev_right;
408  if (gap <= min_gap) {
409  prev_word = prev_word_it.data();
410  if (prev_word->combination) {
411  combo = prev_word;
412  } else {
413  /* Make a new combination and insert before
414  * the first word being joined. */
415  copy_word = new WERD;
416  *copy_word = *(prev_word->word);
417  // deep copy
418  combo = new WERD_RES(copy_word);
419  combo->combination = TRUE;
420  combo->x_height = prev_word->x_height;
421  prev_word->part_of_combo = TRUE;
422  prev_word_it.add_before_then_move(combo);
423  }
424  combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
425  if (word->combination) {
426  combo->word->join_on(word->word);
427  // Move blobs to combo
428  // old combo no longer needed
429  delete word_it.extract();
430  } else {
431  // Copy current wd to combo
432  combo->copy_on(word);
433  word->part_of_combo = TRUE;
434  }
435  combo->done = FALSE;
436  combo->ClearResults();
437  } else {
438  prev_word_it = word_it; // catch up
439  }
440  }
441  prev_right = box.right();
442  }
443  }
444  } else {
445  words.clear(); // signal termination
446  }
447 }
TBOX bounding_box() const
Definition: werd.cpp:160
#define MAX_INT16
Definition: host.h:119
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: werd.h:36
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75
void ClearResults()
Definition: pageres.cpp:1140
Definition: werd.h:60
#define FALSE
Definition: capi.h:29
float x_height
Definition: pageres.h:295
Definition: rect.h:30
BOOL8 combination
Definition: pageres.h:315
short inT16
Definition: host.h:100
BOOL8 part_of_combo
Definition: pageres.h:316
void copy_on(WERD_RES *word_res)
Definition: pageres.h:641
BOOL8 done
Definition: pageres.h:282
WERD * word
Definition: pageres.h:175
#define TRUE
Definition: capi.h:28
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void join_on(WERD *other)
Definition: werd.cpp:211