60 file_name = data_file_path +
lang;
61 file_name +=
".cube.word-freq";
69 vector<string> str_vec;
71 if (str_vec.size() < 2) {
77 if (word_unigrams_obj ==
NULL) {
78 fprintf(stderr,
"Cube ERROR (WordUnigrams::Create): could not create " 79 "word unigrams object.\n");
83 int full_len = str.length();
84 int word_cnt = str_vec.size() / 2;
85 word_unigrams_obj->words_ =
new char*[word_cnt];
86 word_unigrams_obj->costs_ =
new int[word_cnt];
88 if (word_unigrams_obj->words_ ==
NULL ||
89 word_unigrams_obj->costs_ ==
NULL) {
90 fprintf(stderr,
"Cube ERROR (WordUnigrams::Create): error allocating " 91 "word unigram fields.\n");
92 delete word_unigrams_obj;
96 word_unigrams_obj->words_[0] =
new char[full_len];
97 if (word_unigrams_obj->words_[0] ==
NULL) {
98 fprintf(stderr,
"Cube ERROR (WordUnigrams::Create): error allocating " 99 "word unigram fields.\n");
100 delete word_unigrams_obj;
105 word_unigrams_obj->word_cnt_ = 0;
106 char *char_buff = word_unigrams_obj->words_[0];
110 for (
int wrd = 0; wrd < str_vec.size(); wrd += 2) {
111 word_unigrams_obj->words_[word_cnt] = char_buff;
113 strcpy(char_buff, str_vec[wrd].c_str());
114 char_buff += (str_vec[wrd].length() + 1);
116 if (sscanf(str_vec[wrd + 1].c_str(),
"%d",
117 word_unigrams_obj->costs_ + word_cnt) != 1) {
118 fprintf(stderr,
"Cube ERROR (WordUnigrams::Create): error reading " 119 "word unigram data.\n");
120 delete word_unigrams_obj;
124 max_cost =
MAX(max_cost, word_unigrams_obj->costs_[word_cnt]);
127 word_unigrams_obj->word_cnt_ = word_cnt;
140 word_unigrams_obj->not_in_list_cost_ = max_cost +
143 return word_unigrams_obj;
static bool ReadFileToString(const string &file_name, string *str)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
static int Prob2Cost(double prob_val)