tesseract  3.04.00
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  ** History: 9/4/90, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
21 #include "picofeat.h"
22 
23 #include "classify.h"
24 #include "efio.h"
25 #include "featdefs.h"
26 #include "fpoint.h"
27 #include "mfoutline.h"
28 #include "ocrfeatures.h"
29 #include "params.h"
30 #include "trainingsample.h"
31 
32 #include <math.h>
33 #include <stdio.h>
34 
35 /*---------------------------------------------------------------------------
36  Variables
37 ----------------------------------------------------------------------------*/
38 
39 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
40 
41 /*---------------------------------------------------------------------------
42  Private Function Prototypes
43 ----------------------------------------------------------------------------*/
45  FPOINT *End,
46  FEATURE_SET FeatureSet);
47 
48 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
49 
50 void NormalizePicoX(FEATURE_SET FeatureSet);
51 
55 /*---------------------------------------------------------------------------*/
56 namespace tesseract {
58 /*
59  ** Parameters:
60  ** Blob blob to extract pico-features from
61  ** LineStats statistics on text row blob is in
62  ** Globals:
63  ** classify_norm_method normalization method currently specified
64  ** Operation: Dummy for now.
65  ** Return: Pico-features for Blob.
66  ** Exceptions: none
67  ** History: 9/4/90, DSJ, Created.
68  */
69  LIST Outlines;
70  LIST RemainingOutlines;
71  MFOUTLINE Outline;
72  FEATURE_SET FeatureSet;
73  FLOAT32 XScale, YScale;
74 
75  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
76  Outlines = ConvertBlob(Blob);
77  NormalizeOutlines(Outlines, &XScale, &YScale);
78  RemainingOutlines = Outlines;
79  iterate(RemainingOutlines) {
80  Outline = (MFOUTLINE) first_node (RemainingOutlines);
81  ConvertToPicoFeatures2(Outline, FeatureSet);
82  }
84  NormalizePicoX(FeatureSet);
85  FreeOutlines(Outlines);
86  return (FeatureSet);
87 
88 } /* ExtractPicoFeatures */
89 } // namespace tesseract
90 
94 /*---------------------------------------------------------------------------*/
96  FPOINT *End,
97  FEATURE_SET FeatureSet) {
98 /*
99  ** Parameters:
100  ** Start starting point of pico-feature
101  ** End ending point of pico-feature
102  ** FeatureSet set to add pico-feature to
103  ** Globals:
104  ** classify_pico_feature_length length of a single pico-feature
105  ** Operation: This routine converts an entire segment of an outline
106  ** into a set of pico features which are added to
107  ** FeatureSet. The length of the segment is rounded to the
108  ** nearest whole number of pico-features. The pico-features
109  ** are spaced evenly over the entire segment.
110  ** Return: none (results are placed in FeatureSet)
111  ** Exceptions: none
112  ** History: Tue Apr 30 15:44:34 1991, DSJ, Created.
113  */
114  FEATURE Feature;
115  FLOAT32 Angle;
116  FLOAT32 Length;
117  int NumFeatures;
118  FPOINT Center;
119  FPOINT Delta;
120  int i;
121 
122  Angle = NormalizedAngleFrom (Start, End, 1.0);
123  Length = DistanceBetween (*Start, *End);
124  NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5);
125  if (NumFeatures < 1)
126  NumFeatures = 1;
127 
128  /* compute vector for one pico feature */
129  Delta.x = XDelta (*Start, *End) / NumFeatures;
130  Delta.y = YDelta (*Start, *End) / NumFeatures;
131 
132  /* compute position of first pico feature */
133  Center.x = Start->x + Delta.x / 2.0;
134  Center.y = Start->y + Delta.y / 2.0;
135 
136  /* compute each pico feature in segment and add to feature set */
137  for (i = 0; i < NumFeatures; i++) {
138  Feature = NewFeature (&PicoFeatDesc);
139  Feature->Params[PicoFeatDir] = Angle;
140  Feature->Params[PicoFeatX] = Center.x;
141  Feature->Params[PicoFeatY] = Center.y;
142  AddFeature(FeatureSet, Feature);
143 
144  Center.x += Delta.x;
145  Center.y += Delta.y;
146  }
147 } /* ConvertSegmentToPicoFeat */
148 
149 
150 /*---------------------------------------------------------------------------*/
151 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
152 /*
153  ** Parameters:
154  ** Outline outline to extract micro-features from
155  ** FeatureSet set of features to add pico-features to
156  ** Globals:
157  ** classify_pico_feature_length
158  ** length of features to be extracted
159  ** Operation:
160  ** This routine steps thru the specified outline and cuts it
161  ** up into pieces of equal length. These pieces become the
162  ** desired pico-features. Each segment in the outline
163  ** is converted into an integral number of pico-features.
164  ** Return: none (results are returned in FeatureSet)
165  ** Exceptions: none
166  ** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
167  */
168  MFOUTLINE Next;
169  MFOUTLINE First;
170  MFOUTLINE Current;
171 
172  if (DegenerateOutline(Outline))
173  return;
174 
175  First = Outline;
176  Current = First;
177  Next = NextPointAfter(Current);
178  do {
179  /* note that an edge is hidden if the ending point of the edge is
180  marked as hidden. This situation happens because the order of
181  the outlines is reversed when they are converted from the old
182  format. In the old format, a hidden edge is marked by the
183  starting point for that edge. */
184  if (!(PointAt(Next)->Hidden))
185  ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
186  &(PointAt(Next)->Point), FeatureSet);
187 
188  Current = Next;
189  Next = NextPointAfter(Current);
190  }
191  while (Current != First);
192 
193 } /* ConvertToPicoFeatures2 */
194 
195 
196 /*---------------------------------------------------------------------------*/
197 void NormalizePicoX(FEATURE_SET FeatureSet) {
198 /*
199  ** Parameters:
200  ** FeatureSet pico-features to be normalized
201  ** Globals: none
202  ** Operation: This routine computes the average x position over all
203  ** of the pico-features in FeatureSet and then renormalizes
204  ** the pico-features to force this average to be the x origin
205  ** (i.e. x=0).
206  ** Return: none (FeatureSet is changed)
207  ** Exceptions: none
208  ** History: Tue Sep 4 16:50:08 1990, DSJ, Created.
209  */
210  int i;
211  FEATURE Feature;
212  FLOAT32 Origin = 0.0;
213 
214  for (i = 0; i < FeatureSet->NumFeatures; i++) {
215  Feature = FeatureSet->Features[i];
216  Origin += Feature->Params[PicoFeatX];
217  }
218  Origin /= FeatureSet->NumFeatures;
219 
220  for (i = 0; i < FeatureSet->NumFeatures; i++) {
221  Feature = FeatureSet->Features[i];
222  Feature->Params[PicoFeatX] -= Origin;
223  }
224 } /* NormalizePicoX */
225 
226 namespace tesseract {
227 /*---------------------------------------------------------------------------*/
229  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
230 /*
231  ** Parameters:
232  ** blob blob to extract features from
233  ** denorm normalization/denormalization parameters.
234  ** Return: Integer character-normalized features for blob.
235  ** Exceptions: none
236  ** History: 8/8/2011, rays, Created.
237  */
238  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
241  blob, false, &local_fx_info, &bl_features);
242  if (sample == NULL) return NULL;
243 
244  int num_features = sample->num_features();
245  const INT_FEATURE_STRUCT* features = sample->features();
246  FEATURE_SET feature_set = NewFeatureSet(num_features);
247  for (int f = 0; f < num_features; ++f) {
248  FEATURE feature = NewFeature(&IntFeatDesc);
249 
250  feature->Params[IntX] = features[f].X;
251  feature->Params[IntY] = features[f].Y;
252  feature->Params[IntDir] = features[f].Theta;
253  AddFeature(feature_set, feature);
254  }
255  delete sample;
256 
257  return feature_set;
258 } /* ExtractIntCNFeatures */
259 
260 /*---------------------------------------------------------------------------*/
262  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
263 /*
264  ** Parameters:
265  ** blob blob to extract features from
266  ** denorm normalization/denormalization parameters.
267  ** Return: Geometric (top/bottom/width) features for blob.
268  ** Exceptions: none
269  ** History: 8/8/2011, rays, Created.
270  */
271  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
274  blob, false, &local_fx_info, &bl_features);
275  if (sample == NULL) return NULL;
276 
277  FEATURE_SET feature_set = NewFeatureSet(1);
278  FEATURE feature = NewFeature(&IntFeatDesc);
279 
280  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
281  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
282  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
283  AddFeature(feature_set, feature);
284  delete sample;
285 
286  return feature_set;
287 } /* ExtractIntGeoFeatures */
288 
289 } // namespace tesseract.
#define NextPointAfter(E)
Definition: mfoutline.h:68
Definition: fpoint.h:29
FLOAT32 y
Definition: fpoint.h:31
#define YDelta(A, B)
Definition: fpoint.h:40
#define double_VAR(name, val, comment)
Definition: params.h:286
LIST MFOUTLINE
Definition: mfoutline.h:33
int geo_feature(int index) const
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:197
FEATURE Features[1]
Definition: ocrfeatures.h:72
#define NULL
Definition: host.h:144
const INT_FEATURE_STRUCT * features() const
#define PointAt(O)
Definition: mfoutline.h:67
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:39
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:57
const FEATURE_DESC_STRUCT IntFeatDesc
#define XDelta(A, B)
Definition: fpoint.h:39
#define DegenerateOutline(O)
Definition: mfoutline.h:66
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:228
const FEATURE_DESC_STRUCT PicoFeatDesc
#define iterate(l)
Definition: oldlist.h:159
Definition: picofeat.h:30
Definition: cluster.h:32
#define MAX_PICO_FEATURES
Definition: picofeat.h:47
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:35
Definition: picofeat.h:29
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:95
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
FLOAT32 DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:31
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:261
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:295
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: blobs.h:261
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:151
FLOAT32 x
Definition: fpoint.h:31
FLOAT32 NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, FLOAT32 FullScale)
Definition: fpoint.cpp:39
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:175
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:111
double classify_pico_feature_length
Definition: picofeat.cpp:39