/home/taku/proj/crfpp/crfpp.h
00001 /*
00002   CRF++ -- Yet Another CRF toolkit
00003 
00004   $Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $;
00005 
00006   Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org>
00007 */
00008 #ifndef CRFPP_CRFPP_H_
00009 #define CRFPP_CRFPP_H_
00010 
00011 /* C interface  */
00012 #ifdef __cplusplus
00013 #include <cstdio>
00014 #else
00015 #include <stdio.h>
00016 #endif
00017 
00018 #ifdef __cplusplus
00019 extern "C" {
00020 #endif
00021 
00022 #ifdef _WIN32
00023 #include <windows.h>
00024 #  ifdef DLL_EXPORT
00025 #    define CRFPP_DLL_EXTERN  __declspec(dllexport)
00026 #    define CRFPP_DLL_CLASS_EXTERN  __declspec(dllexport)
00027 #  else
00028 #    define CRFPP_DLL_EXTERN  __declspec(dllimport)
00029 #  endif
00030 #endif
00031 
00032 #ifndef CRFPP_DLL_EXTERN
00033 #  define CRFPP_DLL_EXTERN extern
00034 #endif
00035 
00036 #ifndef CRFPP_DLL_CLASS_EXTERN
00037 #  define CRFPP_DLL_CLASS_EXTERN
00038 #endif
00039 
00040 #ifndef SWIG
00041   typedef struct crfpp_t crfpp_t;
00042   typedef struct crfpp_model_t crfpp_model_t;
00043 
00044   /* C interface */
00045   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int,  char**);
00046   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*);
00047   CRFPP_DLL_EXTERN void           crfpp_model_destroy(crfpp_model_t*);
00048   CRFPP_DLL_EXTERN const char *   crfpp_model_strerror(crfpp_model_t *);
00049   CRFPP_DLL_EXTERN crfpp_t*       crfpp_model_new_tagger(crfpp_model_t *);
00050 
00051   CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int,  char**);
00052   CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*);
00053   CRFPP_DLL_EXTERN void     crfpp_destroy(crfpp_t*);
00054   CRFPP_DLL_EXTERN int      crfpp_set_model(crfpp_t *, crfpp_model_t *);
00055   CRFPP_DLL_EXTERN int      crfpp_add2(crfpp_t*, size_t, const char **);
00056   CRFPP_DLL_EXTERN int      crfpp_add(crfpp_t*, const char*);
00057   CRFPP_DLL_EXTERN size_t   crfpp_size(crfpp_t*);
00058   CRFPP_DLL_EXTERN size_t   crfpp_xsize(crfpp_t*);
00059   CRFPP_DLL_EXTERN size_t   crfpp_dsize(crfpp_t*);
00060   CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*);
00061   CRFPP_DLL_EXTERN size_t   crfpp_result(crfpp_t*, size_t);
00062   CRFPP_DLL_EXTERN size_t   crfpp_answer(crfpp_t*, size_t);
00063   CRFPP_DLL_EXTERN size_t   crfpp_y(crfpp_t*, size_t);
00064   CRFPP_DLL_EXTERN size_t   crfpp_ysize(crfpp_t*);
00065   CRFPP_DLL_EXTERN double   crfpp_prob(crfpp_t*, size_t, size_t);
00066   CRFPP_DLL_EXTERN double   crfpp_prob2(crfpp_t*, size_t);
00067   CRFPP_DLL_EXTERN double   crfpp_prob3(crfpp_t*);
00068   CRFPP_DLL_EXTERN void     crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty);
00069   CRFPP_DLL_EXTERN double   crfpp_penalty(crfpp_t *, size_t i, size_t j);
00070   CRFPP_DLL_EXTERN double   crfpp_alpha(crfpp_t*, size_t, size_t);
00071   CRFPP_DLL_EXTERN double   crfpp_beta(crfpp_t*, size_t, size_t);
00072   CRFPP_DLL_EXTERN double   crfpp_emisstion_cost(crfpp_t*, size_t, size_t);
00073   CRFPP_DLL_EXTERN double   crfpp_next_transition_cost(crfpp_t*, size_t,
00074                                                        size_t, size_t);
00075   CRFPP_DLL_EXTERN double   crfpp_prev_transition_cost(crfpp_t*, size_t,
00076                                                        size_t, size_t);
00077   CRFPP_DLL_EXTERN double   crfpp_best_cost(crfpp_t*, size_t, size_t);
00078   CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t);
00079   CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t,
00080                                                            size_t, size_t);
00081   CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t,
00082                                                            size_t, size_t);
00083   CRFPP_DLL_EXTERN double   crfpp_Z(crfpp_t*);
00084   CRFPP_DLL_EXTERN int      crfpp_parse(crfpp_t*);
00085   CRFPP_DLL_EXTERN int      crfpp_empty(crfpp_t*);
00086   CRFPP_DLL_EXTERN int      crfpp_clear(crfpp_t*);
00087   CRFPP_DLL_EXTERN int      crfpp_next(crfpp_t*);
00088   CRFPP_DLL_EXTERN int      crfpp_test(int, char **);
00089   CRFPP_DLL_EXTERN int      crfpp_test2(const char *);
00090   CRFPP_DLL_EXTERN int      crfpp_learn(int, char **);
00091   CRFPP_DLL_EXTERN int      crfpp_learn2(const char *);
00092   CRFPP_DLL_EXTERN const char*  crfpp_strerror(crfpp_t*);
00093   CRFPP_DLL_EXTERN const char*  crfpp_yname(crfpp_t*, size_t);
00094   CRFPP_DLL_EXTERN const char*  crfpp_y2(crfpp_t*, size_t);
00095   CRFPP_DLL_EXTERN const char*  crfpp_x(crfpp_t*, size_t, size_t);
00096   CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t);
00097   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr(crfpp_t*, const char*);
00098   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr2(crfpp_t*,
00099                                                    const char*, size_t);
00100   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr3(crfpp_t*, const char*,
00101                                                    size_t, char *, size_t);
00102   CRFPP_DLL_EXTERN const char*  crfpp_tostr(crfpp_t*);
00103   CRFPP_DLL_EXTERN const char*  crfpp_tostr2(crfpp_t*, char *, size_t);
00104 
00105   CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int);
00106   CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *);
00107   CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float);
00108   CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *);
00109   CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t);
00110 #endif
00111 
00112 #ifdef __cplusplus
00113 }
00114 #endif
00115 
00116 /* C++ interface */
00117 #ifdef __cplusplus
00118 
00119 namespace CRFPP {
00120 
00121 class Tagger;
00122 
00123 class CRFPP_DLL_CLASS_EXTERN Model {
00124  public:
00125 #ifndef SWIG
00126   // open model with parameters in argv[]
00127   // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00128   virtual bool open(int argc,  char** argv) = 0;
00129 
00130   // open model with parameter arg, e.g. arg = "-m model -v3";
00131   virtual bool open(const char* arg) = 0;
00132 #endif
00133 
00134   // create Tagger object. Returned object shared the same
00135   // model object
00136   virtual Tagger *createTagger() const = 0;
00137 
00138   virtual const char* what() = 0;
00139 
00140   virtual ~Model() {}
00141 };
00142 
00143 class CRFPP_DLL_CLASS_EXTERN Tagger {
00144  public:
00145 #ifndef SWIG
00146   // open model with parameters in argv[]
00147   // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00148   virtual bool open(int argc,  char** argv) = 0;
00149 
00150   // open model with parameter arg, e.g. arg = "-m model -v3";
00151   virtual bool open(const char* arg) = 0;
00152 
00153   // add str[] as tokens to the current context
00154   virtual bool add(size_t size, const char **str) = 0;
00155 
00156   // close the current model
00157   virtual void close() = 0;
00158 
00159   // return parameter vector. the size should be dsize();
00160   virtual const float *weight_vector() const = 0;
00161 #endif
00162 
00163   // set Model
00164   virtual bool set_model(const Model &model) = 0;
00165 
00166   // set vlevel
00167   virtual void set_vlevel(unsigned int vlevel) = 0;
00168 
00169   // get vlevel
00170   virtual unsigned int vlevel() const = 0;
00171 
00172   // set cost factor
00173   virtual void set_cost_factor(float cost_factor) = 0;
00174 
00175   // get cost factor
00176   virtual float cost_factor() const = 0;
00177 
00178   // set nbest
00179   virtual void set_nbest(size_t nbest) = 0;
00180 
00181   // get nbest
00182   virtual size_t nbest() const = 0;
00183 
00184   // add one line to the current context
00185   virtual bool add(const char* str) = 0;
00186 
00187   // return size of tokens(lines)
00188   virtual size_t size() const = 0;
00189 
00190   // return size of column
00191   virtual size_t xsize() const = 0;
00192 
00193   // return size of features
00194   virtual size_t dsize() const = 0;
00195 
00196   // return output tag-id of i-th token
00197   virtual size_t result(size_t i) const = 0;
00198 
00199   // return answer tag-id of i-th token if it is available
00200   virtual size_t answer(size_t i) const = 0;
00201 
00202   // alias of result(i)
00203   virtual size_t y(size_t i) const = 0;
00204 
00205   // return output tag of i-th token as string
00206   virtual const char*   y2(size_t i) const = 0;
00207 
00208   // return i-th tag-id as string
00209   virtual const char*   yname(size_t i) const = 0;
00210 
00211   // return token at [i,j] as string(i:token j:column)
00212   virtual const char*   x(size_t i, size_t j) const = 0;
00213 
00214 #ifndef SWIG
00215   // return an array of strings at i-th tokens
00216   virtual const char**  x(size_t) const = 0;
00217 #endif
00218 
00219   // return size of output tags
00220   virtual size_t ysize() const = 0;
00221 
00222   // return marginal probability of j-th tag id at i-th token
00223   virtual double prob(size_t i, size_t j) const = 0;
00224 
00225   // return marginal probability of output tag at i-th token
00226   // same as prob(i, tagger->y(i));
00227   virtual double prob(size_t i) const = 0;
00228 
00229   // return conditional probability of enter output
00230   virtual double prob() const = 0;
00231 
00232   // set token-level penalty. It would be useful for implementing
00233   // Dual decompositon decoding.
00234   // e.g.
00235   // "Dual Decomposition for Parsing with Non-Projective Head Automata"
00236   // Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag
00237   virtual void set_penalty(size_t i, size_t j, double penalty) = 0;
00238   virtual double penalty(size_t i, size_t j) const = 0;
00239 
00240   // return forward log-prob of the j-th tag at i-th token
00241   virtual double alpha(size_t i, size_t j) const = 0;
00242 
00243   // return backward log-prob of the j-th tag at i-th token
00244   virtual double beta(size_t i, size_t j) const = 0;
00245 
00246   // return emission cost of the j-th tag at i-th token
00247   virtual double emission_cost(size_t i, size_t j) const = 0;
00248 
00249   // return transition cost of [j-th tag at i-th token] to
00250   // [k-th tag at(i+1)-th token]
00251   virtual double next_transition_cost(size_t i,
00252                                       size_t j, size_t k) const = 0;
00253 
00254   // return transition cost of [j-th tag at i-th token] to
00255   // [k-th tag at(i-1)-th token]
00256   virtual double prev_transition_cost(size_t i,
00257                                       size_t j, size_t k) const = 0;
00258 
00259   //  return the best accumulative cost to the j-th tag at i-th token
00260   // used in viterbi search
00261   virtual double best_cost(size_t i, size_t j) const = 0;
00262 
00263 #ifndef SWIG
00264   // return emission feature vector of the j-th tag at i-th token
00265   virtual const int* emission_vector(size_t i, size_t j) const = 0;
00266 
00267   // return transition feature vector of [j-th tag at i-th token] to
00268   // [k-th tag at(i+1)-th token]
00269   virtual const int* next_transition_vector(size_t i,
00270                                             size_t j, size_t k) const = 0;
00271 
00272   // return transition feature vector of [j-th tag at i-th token] to
00273   // [k-th tag at(i-1)-th token]
00274   virtual const int* prev_transition_vector(size_t i,
00275                                             size_t j, size_t k) const = 0;
00276 #endif
00277 
00278   // normalizing factor(log-prob)
00279   virtual double Z() const = 0;
00280 
00281   // do parse and change the internal status, if failed, returns false
00282   virtual bool parse() = 0;
00283 
00284   // return true if the context is empty
00285   virtual bool empty() const = 0;
00286 
00287   // clear all context
00288   virtual bool clear() = 0;
00289 
00290   // change the internal state to output next-optimal output.
00291   // calling it n-th times, can get n-best results,
00292   // Neeed to specify -nN option to use this function, where
00293   // N>=2
00294   virtual bool next() = 0;
00295 
00296   // parse 'str' and return result as string
00297   // 'str' must be written in CRF++'s input format
00298   virtual const char* parse(const char* str) = 0;
00299 
00300 #ifndef SWIG
00301   // return parsed result as string
00302   virtual const char* toString() = 0;
00303 
00304   // return parsed result as string.
00305   // Result is saved in the buffer 'result', 'size' is the
00306   // size of the buffer. if failed, return NULL
00307   virtual const char* toString(char* result , size_t size) = 0;
00308 
00309   // parse 'str' and return parsed result.
00310   // You don't need to delete return value, but the buffer
00311   // is rewritten whenever you call parse method.
00312   // if failed, return NULL
00313   virtual const char* parse(const char *str, size_t size) = 0;
00314 
00315   // parse 'str' and return parsed result.
00316   // The result is stored in the buffer 'result'.
00317   // 'size2' is the size of the buffer. if failed, return NULL
00318   virtual const char* parse(const char *str, size_t size1,
00319                             char *result, size_t size2) = 0;
00320 #endif
00321   // return internal error code as string
00322   virtual const char* what() = 0;
00323 
00324   virtual ~Tagger() {}
00325 };
00326 
00327 /* factory method */
00328 
00329 // create CRFPP::Tagger instance with parameters in argv[]
00330 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00331 CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
00332 
00333 // create CRFPP::Tagger instance with parameter in arg
00334 // e.g. arg = "-m model -v3";
00335 CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg);
00336 
00337 // create CRFPP::Model instance with parameters in argv[]
00338 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00339 CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv);
00340 
00341 // load model from [buf, buf+size].
00342 CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv,
00343                                              const char *buf, size_t size);
00344 
00345 // create CRFPP::Model instance with parameter in arg
00346 // e.g. arg = "-m model -v3";
00347 CRFPP_DLL_EXTERN Model *createModel(const char *arg);
00348 
00349 // load model from [buf, buf+size].
00350 CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg,
00351                                              const char *buf, size_t size);
00352 
00353 // return error code of createTagger();
00354 CRFPP_DLL_EXTERN const char *getTaggerError();
00355 
00356 // alias of getTaggerError();
00357 CRFPP_DLL_EXTERN const char *getLastError();
00358 }
00359 
00360 #endif
00361 #endif