tesseract  3.04.00
tesseract::CubeUtils Class Reference

#include <cube_utils.h>

Public Member Functions

 CubeUtils ()
 
 ~CubeUtils ()
 

Static Public Member Functions

static int Prob2Cost (double prob_val)
 
static double Cost2Prob (int cost)
 
static int StrLen (const char_32 *str)
 
static int StrCmp (const char_32 *str1, const char_32 *str2)
 
static char_32StrDup (const char_32 *str)
 
static CharSampCharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt)
 
static Pix * PixFromCharSample (CharSamp *char_samp)
 
static bool ReadFileToString (const string &file_name, string *str)
 
static void SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec)
 
static void UTF8ToUTF32 (const char *utf8_str, string_32 *str32)
 
static void UTF32ToUTF8 (const char_32 *utf32_str, string *str)
 
static bool IsCaseInvariant (const char_32 *str32, CharSet *char_set)
 
static char_32ToLower (const char_32 *str32, CharSet *char_set)
 
static char_32ToUpper (const char_32 *str32, CharSet *char_set)
 

Detailed Description

Definition at line 35 of file cube_utils.h.

Constructor & Destructor Documentation

tesseract::CubeUtils::CubeUtils ( )

Definition at line 28 of file cube_utils.cpp.

28  {
29 }
tesseract::CubeUtils::~CubeUtils ( )

Definition at line 31 of file cube_utils.cpp.

31  {
32 }

Member Function Documentation

CharSamp * tesseract::CubeUtils::CharSampleFromPix ( Pix *  pix,
int  left,
int  top,
int  wid,
int  hgt 
)
static

Definition at line 92 of file cube_utils.cpp.

93  {
94  // get the raw img data from the image
95  unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
96  if (temp_buff == NULL) {
97  return NULL;
98  }
99 
100  // create a char samp from temp buffer
101  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
102 
103  // clean up temp buffer
104  delete []temp_buff;
105  return char_samp;
106 }
#define NULL
Definition: host.h:144
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:273
double tesseract::CubeUtils::Cost2Prob ( int  cost)
static

Definition at line 43 of file cube_utils.cpp.

43  {
44  return exp(-cost / PROB2COST_SCALE);
45 }
#define PROB2COST_SCALE
Definition: cube_const.h:24
bool tesseract::CubeUtils::IsCaseInvariant ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 270 of file cube_utils.cpp.

270  {
271  bool all_one_case = true;
272  bool capitalized;
273  bool prev_upper;
274  bool prev_lower;
275  bool first_upper;
276  bool first_lower;
277  bool cur_upper;
278  bool cur_lower;
279 
280  string str8;
281  if (!char_set) {
282  // If cube char_set is missing, use C-locale-dependent functions
283  // on UTF8 characters to determine case properties.
284  first_upper = isupper(str32[0]);
285  first_lower = islower(str32[0]);
286  if (first_upper)
287  capitalized = true;
288  prev_upper = first_upper;
289  prev_lower = islower(str32[0]);
290  for (int c = 1; str32[c] != 0; ++c) {
291  cur_upper = isupper(str32[c]);
292  cur_lower = islower(str32[c]);
293  if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
294  all_one_case = false;
295  if (cur_upper)
296  capitalized = false;
297  prev_upper = cur_upper;
298  prev_lower = cur_lower;
299  }
300  } else {
301  UNICHARSET *unicharset = char_set->InternalUnicharset();
302  // Use UNICHARSET functions to determine case properties
303  first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
304  first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
305  if (first_upper)
306  capitalized = true;
307  prev_upper = first_upper;
308  prev_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
309 
310  for (int c = 1; c < StrLen(str32); ++c) {
311  cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
312  cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
313  if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
314  all_one_case = false;
315  if (cur_upper)
316  capitalized = false;
317  prev_upper = cur_upper;
318  prev_lower = cur_lower;
319  }
320  }
321  return all_one_case || capitalized;
322 }
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
bool get_islower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:456
bool get_isupper(UNICHAR_ID unichar_id) const
Definition: unicharset.h:463
Pix * tesseract::CubeUtils::PixFromCharSample ( CharSamp char_samp)
static

Definition at line 109 of file cube_utils.cpp.

109  {
110  // parameter check
111  if (char_samp == NULL) {
112  return NULL;
113  }
114 
115  // get the raw data
116  int stride = char_samp->Stride();
117  int wid = char_samp->Width();
118  int hgt = char_samp->Height();
119 
120  Pix *pix = pixCreate(wid, hgt, 1);
121  if (pix == NULL) {
122  return NULL;
123  }
124 
125  // copy the contents
126  unsigned char *line = char_samp->RawData();
127  for (int y = 0; y < hgt ; y++, line += stride) {
128  for (int x = 0; x < wid; x++) {
129  if (line[x] != 0) {
130  pixSetPixel(pix, x, y, 0);
131  } else {
132  pixSetPixel(pix, x, y, 255);
133  }
134  }
135  }
136 
137  return pix;
138 }
#define NULL
Definition: host.h:144
int tesseract::CubeUtils::Prob2Cost ( double  prob_val)
static

Definition at line 35 of file cube_utils.cpp.

35  {
36  if (prob_val < MIN_PROB) {
37  return MIN_PROB_COST;
38  }
39  return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
40 }
#define MIN_PROB
Definition: cube_const.h:28
#define PROB2COST_SCALE
Definition: cube_const.h:24
#define MIN_PROB_COST
Definition: cube_const.h:26
bool tesseract::CubeUtils::ReadFileToString ( const string &  file_name,
string *  str 
)
static

Definition at line 177 of file cube_utils.cpp.

177  {
178  str->clear();
179  FILE *fp = fopen(file_name.c_str(), "rb");
180  if (fp == NULL) {
181  return false;
182  }
183 
184  // get the size of the size
185  fseek(fp, 0, SEEK_END);
186  int file_size = ftell(fp);
187  if (file_size < 1) {
188  fclose(fp);
189  return false;
190  }
191  // adjust string size
192  str->reserve(file_size);
193  // read the contents
194  rewind(fp);
195  char *buff = new char[file_size];
196  if (buff == NULL) {
197  fclose(fp);
198  return false;
199  }
200  int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
201  if (read_bytes == file_size) {
202  str->append(buff, file_size);
203  }
204  delete []buff;
205  fclose(fp);
206  return (read_bytes == file_size);
207 }
#define NULL
Definition: host.h:144
void tesseract::CubeUtils::SplitStringUsing ( const string &  str,
const string &  delims,
vector< string > *  str_vec 
)
static

Definition at line 210 of file cube_utils.cpp.

212  {
213  // Optimize the common case where delims is a single character.
214  if (delims[0] != '\0' && delims[1] == '\0') {
215  char c = delims[0];
216  const char* p = str.data();
217  const char* end = p + str.size();
218  while (p != end) {
219  if (*p == c) {
220  ++p;
221  } else {
222  const char* start = p;
223  while (++p != end && *p != c);
224  str_vec->push_back(string(start, p - start));
225  }
226  }
227  return;
228  }
229 
230  string::size_type begin_index, end_index;
231  begin_index = str.find_first_not_of(delims);
232  while (begin_index != string::npos) {
233  end_index = str.find_first_of(delims, begin_index);
234  if (end_index == string::npos) {
235  str_vec->push_back(str.substr(begin_index));
236  return;
237  }
238  str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
239  begin_index = str.find_first_not_of(delims, end_index);
240  }
241 }
int tesseract::CubeUtils::StrCmp ( const char_32 str1,
const char_32 str2 
)
static

Definition at line 58 of file cube_utils.cpp.

58  {
59  const char_32 *pch1 = str1;
60  const char_32 *pch2 = str2;
61 
62  for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
63  if ((*pch1) != (*pch2)) {
64  return (*pch1) - (*pch2);
65  }
66  }
67 
68  if ((*pch1) == 0) {
69  if ((*pch2) == 0) {
70  return 0;
71  } else {
72  return -1;
73  }
74  } else {
75  return 1;
76  }
77 }
signed int char_32
Definition: string_32.h:40
char_32 * tesseract::CubeUtils::StrDup ( const char_32 str)
static

Definition at line 80 of file cube_utils.cpp.

80  {
81  int len = StrLen(str32);
82  char_32 *new_str = new char_32[len + 1];
83  if (new_str == NULL) {
84  return NULL;
85  }
86  memcpy(new_str, str32, len * sizeof(*str32));
87  new_str[len] = 0;
88  return new_str;
89 }
#define NULL
Definition: host.h:144
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
signed int char_32
Definition: string_32.h:40
int tesseract::CubeUtils::StrLen ( const char_32 str)
static

Definition at line 48 of file cube_utils.cpp.

48  {
49  if (char_32_ptr == NULL) {
50  return 0;
51  }
52  int len = -1;
53  while (char_32_ptr[++len]);
54  return len;
55 }
#define NULL
Definition: host.h:144
char_32 * tesseract::CubeUtils::ToLower ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 324 of file cube_utils.cpp.

324  {
325  if (!char_set) {
326  return NULL;
327  }
328  UNICHARSET *unicharset = char_set->InternalUnicharset();
329  int len = StrLen(str32);
330  char_32 *lower = new char_32[len + 1];
331  if (!lower)
332  return NULL;
333  for (int i = 0; i < len; ++i) {
334  char_32 ch = str32[i];
335  if (ch == INVALID_UNICHAR_ID) {
336  delete [] lower;
337  return NULL;
338  }
339  // convert upper-case characters to lower-case
340  if (unicharset->get_isupper(char_set->ClassID(ch))) {
341  UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
342  const char_32 *str32_lower = char_set->ClassString(uid_lower);
343  // expect lower-case version of character to be a single character
344  if (!str32_lower || StrLen(str32_lower) != 1) {
345  delete [] lower;
346  return NULL;
347  }
348  lower[i] = str32_lower[0];
349  } else {
350  lower[i] = ch;
351  }
352  }
353  lower[len] = 0;
354  return lower;
355 }
#define NULL
Definition: host.h:144
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
int UNICHAR_ID
Definition: unichar.h:33
signed int char_32
Definition: string_32.h:40
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:631
bool get_isupper(UNICHAR_ID unichar_id) const
Definition: unicharset.h:463
char_32 * tesseract::CubeUtils::ToUpper ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 357 of file cube_utils.cpp.

357  {
358  if (!char_set) {
359  return NULL;
360  }
361  UNICHARSET *unicharset = char_set->InternalUnicharset();
362  int len = StrLen(str32);
363  char_32 *upper = new char_32[len + 1];
364  if (!upper)
365  return NULL;
366  for (int i = 0; i < len; ++i) {
367  char_32 ch = str32[i];
368  if (ch == INVALID_UNICHAR_ID) {
369  delete [] upper;
370  return NULL;
371  }
372  // convert lower-case characters to upper-case
373  if (unicharset->get_islower(char_set->ClassID(ch))) {
374  UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
375  const char_32 *str32_upper = char_set->ClassString(uid_upper);
376  // expect upper-case version of character to be a single character
377  if (!str32_upper || StrLen(str32_upper) != 1) {
378  delete [] upper;
379  return NULL;
380  }
381  upper[i] = str32_upper[0];
382  } else {
383  upper[i] = ch;
384  }
385  }
386  upper[len] = 0;
387  return upper;
388 }
#define NULL
Definition: host.h:144
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
int UNICHAR_ID
Definition: unichar.h:33
bool get_islower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:456
signed int char_32
Definition: string_32.h:40
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:631
void tesseract::CubeUtils::UTF32ToUTF8 ( const char_32 utf32_str,
string *  str 
)
static

Definition at line 258 of file cube_utils.cpp.

258  {
259  str->clear();
260  for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
261  UNICHAR uni_ch((*ch_32));
262  char *utf8 = uni_ch.utf8_str();
263  if (utf8 != NULL) {
264  (*str) += utf8;
265  delete []utf8;
266  }
267  }
268 }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
void tesseract::CubeUtils::UTF8ToUTF32 ( const char *  utf8_str,
string_32 str32 
)
static

Definition at line 244 of file cube_utils.cpp.

244  {
245  str32->clear();
246  int len = strlen(utf8_str);
247  int step = 0;
248  for (int ch = 0; ch < len; ch += step) {
249  step = UNICHAR::utf8_step(utf8_str + ch);
250  if (step > 0) {
251  UNICHAR uni_ch(utf8_str + ch, step);
252  (*str32) += uni_ch.first_uni();
253  }
254  }
255 }
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:134

The documentation for this class was generated from the following files: