PocketSphinx 5prealpha
cmdln_macro.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2006 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38/* cmdln_macro.h - Command line definitions for PocketSphinx */
39
40#ifndef __PS_CMDLN_MACRO_H__
41#define __PS_CMDLN_MACRO_H__
42
43#include <sphinxbase/cmd_ln.h>
44#include <sphinxbase/feat.h>
45#include <sphinxbase/fe.h>
46
48#define POCKETSPHINX_OPTIONS \
49 waveform_to_cepstral_command_line_macro(), \
50 cepstral_to_feature_command_line_macro(), \
51 POCKETSPHINX_ACMOD_OPTIONS, \
52 POCKETSPHINX_BEAM_OPTIONS, \
53 POCKETSPHINX_SEARCH_OPTIONS, \
54 POCKETSPHINX_DICT_OPTIONS, \
55 POCKETSPHINX_NGRAM_OPTIONS, \
56 POCKETSPHINX_FSG_OPTIONS, \
57 POCKETSPHINX_KWS_OPTIONS, \
58 POCKETSPHINX_DEBUG_OPTIONS
59
61#define POCKETSPHINX_DEBUG_OPTIONS \
62 { "-logfn", \
63 ARG_STRING, \
64 NULL, \
65 "File to write log messages in" }, \
66 { "-debug", \
67 ARG_INT32, \
68 NULL, \
69 "Verbosity level for debugging messages" }, \
70 { "-mfclogdir", \
71 ARG_STRING, \
72 NULL, \
73 "Directory to log feature files to" \
74 }, \
75 { "-rawlogdir", \
76 ARG_STRING, \
77 NULL, \
78 "Directory to log raw audio files to" }, \
79 { "-senlogdir", \
80 ARG_STRING, \
81 NULL, \
82 "Directory to log senone score files to" \
83 }
84
86#define POCKETSPHINX_BEAM_OPTIONS \
87{ "-beam", \
88 ARG_FLOAT64, \
89 "1e-48", \
90 "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
91{ "-wbeam", \
92 ARG_FLOAT64, \
93 "7e-29", \
94 "Beam width applied to word exits" }, \
95{ "-pbeam", \
96 ARG_FLOAT64, \
97 "1e-48", \
98 "Beam width applied to phone transitions" }, \
99{ "-lpbeam", \
100 ARG_FLOAT64, \
101 "1e-40", \
102 "Beam width applied to last phone in words" }, \
103{ "-lponlybeam", \
104 ARG_FLOAT64, \
105 "7e-29", \
106 "Beam width applied to last phone in single-phone words" }, \
107{ "-fwdflatbeam", \
108 ARG_FLOAT64, \
109 "1e-64", \
110 "Beam width applied to every frame in second-pass flat search" }, \
111{ "-fwdflatwbeam", \
112 ARG_FLOAT64, \
113 "7e-29", \
114 "Beam width applied to word exits in second-pass flat search" }, \
115{ "-pl_window", \
116 ARG_INT32, \
117 "5", \
118 "Phoneme lookahead window size, in frames" }, \
119{ "-pl_beam", \
120 ARG_FLOAT64, \
121 "1e-10", \
122 "Beam width applied to phone loop search for lookahead" }, \
123{ "-pl_pbeam", \
124 ARG_FLOAT64, \
125 "1e-10", \
126 "Beam width applied to phone loop transitions for lookahead" }, \
127{ "-pl_pip", \
128 ARG_FLOAT32, \
129 "1.0", \
130 "Phone insertion penalty for phone loop" }, \
131{ "-pl_weight", \
132 ARG_FLOAT64, \
133 "3.0", \
134 "Weight for phoneme lookahead penalties" } \
135
137#define POCKETSPHINX_SEARCH_OPTIONS \
138{ "-compallsen", \
139 ARG_BOOLEAN, \
140 "no", \
141 "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
142{ "-fwdtree", \
143 ARG_BOOLEAN, \
144 "yes", \
145 "Run forward lexicon-tree search (1st pass)" }, \
146{ "-fwdflat", \
147 ARG_BOOLEAN, \
148 "yes", \
149 "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
150{ "-bestpath", \
151 ARG_BOOLEAN, \
152 "yes", \
153 "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
154{ "-backtrace", \
155 ARG_BOOLEAN, \
156 "no", \
157 "Print results and backtraces to log." }, \
158{ "-latsize", \
159 ARG_INT32, \
160 "5000", \
161 "Initial backpointer table size" }, \
162{ "-maxwpf", \
163 ARG_INT32, \
164 "-1", \
165 "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
166{ "-maxhmmpf", \
167 ARG_INT32, \
168 "30000", \
169 "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
170{ "-min_endfr", \
171 ARG_INT32, \
172 "0", \
173 "Nodes ignored in lattice construction if they persist for fewer than N frames" }, \
174{ "-fwdflatefwid", \
175 ARG_INT32, \
176 "4", \
177 "Minimum number of end frames for a word to be searched in fwdflat search" }, \
178{ "-fwdflatsfwin", \
179 ARG_INT32, \
180 "25", \
181 "Window of frames in lattice to search for successor words in fwdflat search " }
182
184#define POCKETSPHINX_KWS_OPTIONS \
185{ "-keyphrase", \
186 ARG_STRING, \
187 NULL, \
188 "Keyphrase to spot"}, \
189{ "-kws", \
190 ARG_STRING, \
191 NULL, \
192 "A file with keyphrases to spot, one per line"}, \
193{ "-kws_plp", \
194 ARG_FLOAT64, \
195 "1e-1", \
196 "Phone loop probability for keyphrase spotting" }, \
197{ "-kws_delay", \
198 ARG_INT32, \
199 "10", \
200 "Delay to wait for best detection score" }, \
201{ "-kws_threshold", \
202 ARG_FLOAT64, \
203 "1", \
204 "Threshold for p(hyp)/p(alternatives) ratio" }
205
207#define POCKETSPHINX_FSG_OPTIONS \
208 { "-fsg", \
209 ARG_STRING, \
210 NULL, \
211 "Sphinx format finite state grammar file"}, \
212{ "-jsgf", \
213 ARG_STRING, \
214 NULL, \
215 "JSGF grammar file" }, \
216{ "-toprule", \
217 ARG_STRING, \
218 NULL, \
219 "Start rule for JSGF (first public rule is default)" }, \
220{ "-fsgusealtpron", \
221 ARG_BOOLEAN, \
222 "yes", \
223 "Add alternate pronunciations to FSG"}, \
224{ "-fsgusefiller", \
225 ARG_BOOLEAN, \
226 "yes", \
227 "Insert filler words at each state."}
228
230#define POCKETSPHINX_NGRAM_OPTIONS \
231{ "-allphone", \
232 ARG_STRING, \
233 NULL, \
234 "Perform phoneme decoding with phonetic lm" }, \
235{ "-allphone_ci", \
236 ARG_BOOLEAN, \
237 "no", \
238 "Perform phoneme decoding with phonetic lm and context-independent units only" }, \
239{ "-lm", \
240 ARG_STRING, \
241 NULL, \
242 "Word trigram language model input file" }, \
243{ "-lmctl", \
244 ARG_STRING, \
245 NULL, \
246 "Specify a set of language model"}, \
247{ "-lmname", \
248 ARG_STRING, \
249 NULL, \
250 "Which language model in -lmctl to use by default"}, \
251{ "-lw", \
252 ARG_FLOAT32, \
253 "6.5", \
254 "Language model probability weight" }, \
255{ "-fwdflatlw", \
256 ARG_FLOAT32, \
257 "8.5", \
258 "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
259{ "-bestpathlw", \
260 ARG_FLOAT32, \
261 "9.5", \
262 "Language model probability weight for bestpath search" }, \
263{ "-ascale", \
264 ARG_FLOAT32, \
265 "20.0", \
266 "Inverse of acoustic model scale for confidence score calculation" }, \
267{ "-wip", \
268 ARG_FLOAT32, \
269 "0.65", \
270 "Word insertion penalty" }, \
271{ "-nwpen", \
272 ARG_FLOAT32, \
273 "1.0", \
274 "New word transition penalty" }, \
275{ "-pip", \
276 ARG_FLOAT32, \
277 "1.0", \
278 "Phone insertion penalty" }, \
279{ "-uw", \
280 ARG_FLOAT32, \
281 "1.0", \
282 "Unigram weight" }, \
283{ "-silprob", \
284 ARG_FLOAT32, \
285 "0.005", \
286 "Silence word transition probability" }, \
287{ "-fillprob", \
288 ARG_FLOAT32, \
289 "1e-8", \
290 "Filler word transition probability" } \
291
293#define POCKETSPHINX_DICT_OPTIONS \
294 { "-dict", \
295 REQARG_STRING, \
296 NULL, \
297 "Main pronunciation dictionary (lexicon) input file" }, \
298 { "-fdict", \
299 ARG_STRING, \
300 NULL, \
301 "Noise word pronunciation dictionary input file" }, \
302 { "-dictcase", \
303 ARG_BOOLEAN, \
304 "no", \
305 "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" } \
306
308#define POCKETSPHINX_ACMOD_OPTIONS \
309{ "-hmm", \
310 ARG_STRING, \
311 NULL, \
312 "Directory containing acoustic model files."}, \
313{ "-featparams", \
314 ARG_STRING, \
315 NULL, \
316 "File containing feature extraction parameters."}, \
317{ "-mdef", \
318 ARG_STRING, \
319 NULL, \
320 "Model definition input file" }, \
321{ "-senmgau", \
322 ARG_STRING, \
323 NULL, \
324 "Senone to codebook mapping input file (usually not needed)" }, \
325{ "-tmat", \
326 ARG_STRING, \
327 NULL, \
328 "HMM state transition matrix input file" }, \
329{ "-tmatfloor", \
330 ARG_FLOAT32, \
331 "0.0001", \
332 "HMM state transition probability floor (applied to -tmat file)" }, \
333{ "-mean", \
334 ARG_STRING, \
335 NULL, \
336 "Mixture gaussian means input file" }, \
337{ "-var", \
338 ARG_STRING, \
339 NULL, \
340 "Mixture gaussian variances input file" }, \
341{ "-varfloor", \
342 ARG_FLOAT32, \
343 "0.0001", \
344 "Mixture gaussian variance floor (applied to data from -var file)" }, \
345{ "-mixw", \
346 ARG_STRING, \
347 NULL, \
348 "Senone mixture weights input file (uncompressed)" }, \
349{ "-mixwfloor", \
350 ARG_FLOAT32, \
351 "0.0000001", \
352 "Senone mixture weights floor (applied to data from -mixw file)" }, \
353{ "-aw", \
354 ARG_INT32, \
355 "1", \
356 "Inverse weight applied to acoustic scores." }, \
357{ "-sendump", \
358 ARG_STRING, \
359 NULL, \
360 "Senone dump (compressed mixture weights) input file" }, \
361{ "-mllr", \
362 ARG_STRING, \
363 NULL, \
364 "MLLR transformation to apply to means and variances" }, \
365{ "-mmap", \
366 ARG_BOOLEAN, \
367 "yes", \
368 "Use memory-mapped I/O (if possible) for model files" }, \
369{ "-ds", \
370 ARG_INT32, \
371 "1", \
372 "Frame GMM computation downsampling ratio" }, \
373{ "-topn", \
374 ARG_INT32, \
375 "4", \
376 "Maximum number of top Gaussians to use in scoring." }, \
377{ "-topn_beam", \
378 ARG_STRING, \
379 "0", \
380 "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
381{ "-logbase", \
382 ARG_FLOAT32, \
383 "1.0001", \
384 "Base in which all log-likelihoods calculated" }
385
386#define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
387
388#endif /* __PS_CMDLN_MACRO_H__ */