xref: /plan9/sys/src/libscribble/scribbleimpl.h (revision 767832599f05417fe9452317de3fa53a733ada57)
1 /*
2  *  scribble.h:			User-Level API for Handwriting Recognition
3  *  Author:				James Kempf
4  *  Created On:			Mon Nov  2 14:01:25 1992
5  *  Last Modified By:	Sape Mullender
6  *  Last Modified On:	Fri Aug 25 10:24:50 EDT 2000
7  *  Copyright (c) 1994 by Sun Microsystems Computer Company
8  *  All rights reserved.
9  *
10  *  Use and copying of this software and preparation of
11  *  derivative works based upon this software are permitted.
12  *  Any distribution of this software or derivative works
13  *  must comply with all applicable United States export control
14  *  laws.
15  *
16  *  This software is made available as is, and Sun Microsystems
17  *  Computer Company makes no warranty about the software, its
18  *  performance, or its conformity to any specification
19  */
20 
21 /*
22  * Opaque type for the recognizer. The toolkit must access through
23  * appropriate access functions.
24  */
25 #pragma incomplete struct _Recognizer
26 typedef struct _Recognizer* recognizer;
27 
28 /*
29  * Opaque type for recognizers to implement dictionaries.
30  */
31 
32 typedef struct _wordset		*wordset;
33 typedef struct rc		rc;
34 typedef struct rec_correlation	rec_correlation;
35 typedef struct rec_alternative	rec_alternative;
36 typedef struct rec_element	rec_element;
37 typedef struct gesture		gesture;
38 typedef uint			wchar_t;
39 
40 /* Scalar Type Definitions */
41 
42 /* For better readibility.*/
43 
44 typedef int bool;
45 
46 #define true 1
47 #define false 0
48 
49 /*For pointers to extra functions on recognizer.*/
50 
51 typedef void (*rec_fn)();
52 
53 /*
54  * rec_confidence is an integer between 0-100 giving the confidence of the
55  * recognizer in a particular result.
56  */
57 
58 typedef uchar rec_confidence;
59 
60 /**************** RECOGNIZER CONFIGURATION INFORMATION *******************/
61 
62 /*
63  * Recognizer information. Gives the locale, category of the character
64  * set returned by the recognizer, and any subsets to which the
65  * recognition can be limited. The locale and category should be
66  * suitable for the setlocale(3). Those recognizers which don't do text
67  * can simply report a blank locale and category, and report the
68  * graphics types they recognize in the subset.
69  */
70 
71 typedef struct {
72     char* ri_locale;        /*The locale of the character set.*/
73     char* ri_name;          /*Complete pathname to the recognizer.*/
74     char** ri_subset;       /*Null terminated list of subsets supported*/
75 } rec_info;
76 
77 /*These define a set of common character subset names.*/
78 
79 #define GESTURE		"GESTURE"		/* gestures only */
80 #define MATHSET		"MATHSET"		/* %^*()_+={}<>,/. */
81 #define MONEYSET	"MONEYSET"		/* $, maybe cent, pound, and yen */
82 #define WHITESPACE	"WHITESPACE"	/* gaps are recognized as space */
83 #define KANJI_JIS1	"KANJI_JIS1"	/* the JIS1 kanji only */
84 #define KANJI_JIS1_PLUS	"KANJI_JIS1_PLUS" /* JIS1 plus some JIS2 */
85 #define KANJI_JIS2	"KANJI_JIS2"	/* the JIS1 + JIS2 kanji */
86 #define HIRIGANA	"HIRIGANA"		/* the hirigana */
87 #define KATAKANA	"KATAKANA"		/* the katakana */
88 #define UPPERCASE	"UPPERCASE"		/* upper case alphabetics, no digits */
89 #define LOWERCASE	"LOWERCASE"		/* lower case alphabetics, no digits */
90 #define DIGITS		"DIGITS"		/* digits 0-9 only */
91 #define PUNCTUATION	"PUNCTUATION"	/* \!-;'"?()&., */
92 #define NONALPHABETIC	"NONALPHABETIC" /* all nonalphabetics, no digits */
93 #define ASCII		"ASCII"			/* the ASCII character set */
94 #define ISO_LATIN12	"ISO_LATIN12"	/* The ISO Latin 12 characters */
95 
96 
97 /********************  RECOGNITION INPUT STRUCTURES ***********************/
98 
99 /*
100  * WINDOW SYSTEM INTERFACE
101 */
102 
103 /*Bounding box. Structurally identical to Rectangle.*/
104 
105 typedef Rectangle pen_rect;
106 
107 
108 /*
109  * RECOGNITION CONTEXT
110  */
111 
112 /* Structure for reporting writing area geometric constraints. */
113 
114 typedef struct {
115 	pen_rect pr_area;
116 	short pr_row, pr_col;
117 } pen_frame;
118 
119 /*
120  * Structure for describing a set of letters to constrain recognition.
121  * ls_type is the same as the re_type field for rec_element below.
122 */
123 
124 typedef struct _letterset {
125         char ls_type;
126         union _ls_set {
127                 char* aval;
128                 wchar_t* wval;
129         } ls_set;
130 } letterset;
131 
132 /********************* RECOGNITION RETURN VALUES *************************/
133 
134 
135 /*Different types in union. "Other" indicates a cast is needed.*/
136 
137 #define REC_NONE    0x0             /*No return value*/
138 #define REC_GESTURE 0x1             /*Gesture.*/
139 #define REC_ASCII   0x2             /*Array of 8 bit ASCII*/
140 #define REC_VAR     0x4             /*Array of variable width characters. */
141 #define REC_WCHAR   0x8             /*Array of Unicode (wide) characters. */
142 #define REC_OTHER   0x10            /*Undefined type.*/
143 #define REC_CORR    0x20	    /*rec_correlation struct*/
144 
145 /*
146  * Recognition elements. A recognition element is a structure having a
147  * confidence level member, and a union, along with a flag indicating
148  * the union type. The union contains a pointer to the result. This
149  * is the basic recognition return value, corresponding to one
150  * recognized word, letter, or group of letters.
151 */
152 
153 struct rec_element {
154 	char			re_type;		/*Union type flag.*/
155 	union {
156 		gesture	*			gval;	/*Gesture.*/
157 		char*				aval;	/*ASCII and variable width.*/
158 		wchar_t*			wval;	/*Unicode.*/
159 		rec_correlation*	rcval;	/*rec_correlation*/
160 	} re_result;
161 	rec_confidence	re_conf;        /*Confidence (0-100).*/
162 };
163 
164 /*
165  * Recognition alternative. The recognition alternative gives
166  * a translated element for a particular segmentation, and
167  * a pointer to an array of alternatives for the next position
168  * in the segmentation thread.
169 */
170 
171 struct rec_alternative {
172 	rec_element			ra_elem; 	/*the translated element*/
173 	uint				ra_nalter;	/*number of next alternatives*/
174 	rec_alternative*	ra_next;	/*the array of next alternatives*/
175 };
176 
177 /**************************  GESTURES  **************************/
178 
179 /*
180  * Gestures. The toolkit initializes the recognizer with a
181  * set of gestures having appropriate callbacks.
182  * When a gesture is recognized, it is returned as part of a
183  * recognition element. The recognizer fills in the bounding
184  * box and hotspots. The toolkit fills in any additional values,
185  * such as the current window, and calls the callback.
186 */
187 
188 struct gesture {
189 	char*		g_name;			/*The gesture's name.*/
190 	uint			g_nhs;			/*Number of hotspots.*/
191 	pen_point*	g_hspots;			/*The hotspots.*/
192 	pen_rect		g_bbox;			/*The bounding box.*/
193 	void	  		(*g_action)(gesture*);	/*Pointer to execution function.*/
194 	void*		g_wsinfo;			/*For toolkit to fill in.*/
195 };
196 
197 typedef void (*xgesture)(gesture*);
198 
199 /*
200  * Recognition correlation. A recognition correlation is a recognition
201  * of the stroke input along with a correlation between the stroke
202  * input and the recognized text. The rec_correlation struct contains
203  * a pointer to an arrray of pointers to strokes, and
204  * two arrays of integers, giving the starting point and
205  * stopping point of each corresponding recogition element returned
206  * in the strokes.
207  */
208 
209 struct rec_correlation {
210 	rec_element	ro_elem;			/*The recognized alternative.*/
211 	uint		ro_nstrokes;		/*Number of strokes.*/
212 	Stroke*	ro_strokes;			/*Array of strokes.*/
213 	uint*		ro_start;			/*Starting index of points.*/
214 	uint*		ro_stop;			/*Stopping index of points.*/
215 };
216 
217 /*
218  * ADMINISTRATION
219  */
220 
221 /*
222  * recognizer_load - If directory is not NULL, then use it as a pathname
223  * to find the recognizer. Otherwise, use the default naming conventions
224  * to find the recognizer having file name name. The subset argument
225  * contains a null-terminated array of names for character subsets which
226  * the recognizer should translate.
227  */
228 
229 recognizer	recognizer_load(char*, char*, char**);
230 
231 /*
232  * recognizer_unload - Unload the recognizer.
233  */
234 
235 int			recognizer_unload(recognizer);
236 
237 /*
238  * recognizer_get_info-Get a pointer to a rec_info
239  * giving the locale and subsets supported by the recognizer, and shared
240  * library pathname.
241  */
242 
243 const rec_info*	recognizer_get_info(recognizer);
244 
245 /*
246  * recognizer_manager_version-Return the version number string of the
247  * recognition manager.
248  */
249 
250 const char*	recognizer_manager_version(recognizer);
251 
252 /*
253  * recognizer_load_state-Get any recognizer state associated with name
254  * in dir. Note that name may not be simple file name, since
255  * there may be more than one file involved. Return 0 if successful,
256  * -1 if not.
257  */
258 
259 int			recognizer_load_state(recognizer, char*, char*);
260 
261 /*
262  * recognizer_save_state-Save any recognizer state to name
263  * in dir. Note that name may not be a simple file name, since
264  * there may be more than one file involved. Return 0 if successful,
265  * -1 if not.
266  */
267 
268 int			recognizer_save_state(recognizer, char*, char*);
269 
270 /*
271  * recognizer_error-Return the last error message, or NULL if none.
272  */
273 
274 char*		recognizer_error(recognizer);
275 
276 /*
277  * DICTIONARIES
278  */
279 
280 /* recognizer_load_dictionary-Load a dictionary from the directory
281  * dir and file name. Return the dictionary pointer if successful,
282  * otherwise NULL.
283  */
284 
285 wordset		recognizer_load_dictionary(recognizer, char*, char*);
286 
287 /* recoginzer_save_dictionary-Save the dictionary to the file. Return 0
288  * successful, -1 if error occurs.
289  */
290 
291 int			recognizer_save_dictionary(recognizer, char*, char*, wordset);
292 
293 /*
294  * recognizer_free_dictionary-Free the dictionary. Return 0 if successful,
295  * -1 if error occurs.
296  */
297 
298 int			recognizer_free_dictionary(recognizer, wordset);
299 
300 /*
301  * recognizer_add_to_dictionary-Add the word to the dictionary. Return 0
302  * if successful, -1 if error occurs.
303  */
304 
305 int			recognizer_add_to_dictionary(recognizer, letterset*, wordset);
306 
307 /*
308  * recognizer_delete_from_dictionary-Delete the word from the dictionary.
309  * Return 0 if successful, -1 if error occurs.
310  */
311 
312 int			recognizer_delete_from_dictionary(recognizer, letterset*, wordset);
313 
314 /*
315  * TRANSLATION
316  */
317 
318 /* recognizer_set/get_context - Set/get the recognition context for
319  * subsequent buffering and translation. recognizer_set_context()
320  * returns -1 if an error occurs, otherwise 0. recognizer_get_context()
321  * returns NULL if no context has been set. The context is copied to avoid
322  * potential memory deallocation problems.
323  */
324 
325 int			recognizer_set_context(recognizer, rc*);
326 rc*			recognizer_get_context(recognizer);
327 
328 /* recognizer_clear - Set stroke buffer to NULL and clear the context.
329  * Returns -1 if an error occurred, otherwise 0. Both the context and the
330  * stroke buffer are deallocated. If delete_points_p is true, delete the
331  * points also.
332  */
333 
334 int			recognizer_clear(recognizer, bool);
335 
336 /* recognizer_get/set_buffer - Get/set the stroke buffer. The stroke buffer
337  * is copied to avoid potential memory allocation problems. Returns -1 if
338  * an error occurs, otherwise 0.
339  */
340 
341 int			recognizer_get_buffer(recognizer, uint*, Stroke**);
342 int			recognizer_set_buffer(recognizer, uint, Stroke*);
343 
344 /* recognizer_translate - Copy the strokes argument into the stroke buffer and
345  * translate the buffer. If correlate_p is true, then provide stroke
346  * correlations as well. If either nstrokes is 0 or strokes is NULL, then
347  * just translate the stroke buffer and return the translation. Return an
348  * array of alternative translation segmentations in the ret pointer and the
349  * number of alternatives in nret, or NULL and 0 if there is no translation.
350  * The direction of segmentation is as specified by the rc_direction field in
351  * the buffered recognition context. Returns -1 if an error occurred,
352  * otherwise 0.
353  */
354 
355 int			recognizer_translate(recognizer, uint, Stroke*, bool,
356 				int*, rec_alternative**);
357 
358 /*
359  * recognizer_get_extension_functions-Return a null terminated array
360  * of functions providing extended functionality. Their interfaces
361  * will change depending on the recognizer.
362  */
363 
364 rec_fn*		recognizer_get_extension_functions(recognizer);
365 
366 /*
367  * GESTURE SUPPORT
368 */
369 
370 /*
371  * recognizer_get_gesture_names - Return a null terminated array of
372  * character strings containing the gesture names.
373  */
374 
375 char**		recognizer_get_gesture_names(recognizer);
376 
377 /*
378  * recognizer_set_gesture_action-Set the action function associated with the
379  *  name.
380  */
381 
382 xgesture	recognizer_set_gesture_action(recognizer, char*, xgesture, void*);
383 
384 /*
385  * The following functions are for deleting data structures returned
386  *   by the API functions.
387  */
388 
389 void		delete_rec_alternative_array(uint, rec_alternative*, bool);
390 void		delete_rec_correlation(rec_correlation*, bool);
391 
392 /*
393  * These are used by clients to create arrays for passing to API
394  *  functions.
395  */
396 
397 Stroke*	make_Stroke_array(uint);
398 void		delete_Stroke_array(uint, Stroke*, bool);
399 
400 pen_point* 	make_pen_point_array(uint);
401 void 		delete_pen_point_array(pen_point*);
402 
403 Stroke*	copy_Stroke_array(uint, Stroke*);
404 
405 /*Extension function interfaces and indices.*/
406 
407 #define LI_ISA_LI		0	/*Is this a li recognizer?.*/
408 #define LI_TRAIN		1	/*Train recognizer*/
409 #define LI_CLEAR		2	/* ari's clear-state extension fn. */
410 #define LI_GET_CLASSES	3	/* ari's get-classes extension fn. */
411 #define LI_NUM_EX_FNS	4	/*Number of extension functions*/
412 
413 typedef bool	(*li_isa_li)(recognizer r);
414 typedef int		(*li_recognizer_train)(recognizer, rc*, uint,
415 					Stroke*, rec_element*, bool);
416 typedef int		(*li_recognizer_clearState)(recognizer);
417 typedef int		(*li_recognizer_getClasses)(recognizer, char ***, int *);
418