xref: /plan9/sys/src/libscribble/scribbleimpl.h (revision a84536681645e23c630ce4ef2e5c3b284d4c590b)
1 /*
2  *  scribble.h:			User-Level API for Handwriting Recognition
3  *  Author:				James Kempf
4  *  Created On:			Mon Nov  2 14:01:25 1992
5  *  Last Modified By:	Sape Mullender
6  *  Last Modified On:	Fri Aug 25 10:24:50 EDT 2000
7  *  Copyright (c) 1994 by Sun Microsystems Computer Company
8  *  All rights reserved.
9  *
10  *  Use and copying of this software and preparation of
11  *  derivative works based upon this software are permitted.
12  *  Any distribution of this software or derivative works
13  *  must comply with all applicable United States export control
14  *  laws.
15  *
16  *  This software is made available as is, and Sun Microsystems
17  *  Computer Company makes no warranty about the software, its
18  *  performance, or its conformity to any specification
19  */
20 
21 /*
22  * Opaque type for the recognizer. The toolkit must access through
23  * appropriate access functions.
24  */
25 
26 typedef struct _Recognizer* recognizer;
27 
28 #pragma incomplete recognizer
29 
30 /*
31  * Opaque type for recognizers to implement dictionaries.
32  */
33 
34 typedef struct _wordset		*wordset;
35 typedef struct rc		rc;
36 typedef struct rec_correlation	rec_correlation;
37 typedef struct rec_alternative	rec_alternative;
38 typedef struct rec_element	rec_element;
39 typedef struct gesture		gesture;
40 typedef uint			wchar_t;
41 
42 /* Scalar Type Definitions */
43 
44 /* For better readibility.*/
45 
46 typedef int bool;
47 
48 #define true 1
49 #define false 0
50 
51 /*For pointers to extra functions on recognizer.*/
52 
53 typedef void (*rec_fn)();
54 
55 /*
56  * rec_confidence is an integer between 0-100 giving the confidence of the
57  * recognizer in a particular result.
58  */
59 
60 typedef uchar rec_confidence;
61 
62 /**************** RECOGNIZER CONFIGURATION INFORMATION *******************/
63 
64 /*
65  * Recognizer information. Gives the locale, category of the character
66  * set returned by the recognizer, and any subsets to which the
67  * recognition can be limited. The locale and category should be
68  * suitable for the setlocale(3). Those recognizers which don't do text
69  * can simply report a blank locale and category, and report the
70  * graphics types they recognize in the subset.
71  */
72 
73 typedef struct {
74     char* ri_locale;        /*The locale of the character set.*/
75     char* ri_name;          /*Complete pathname to the recognizer.*/
76     char** ri_subset;       /*Null terminated list of subsets supported*/
77 } rec_info;
78 
79 /*These define a set of common character subset names.*/
80 
81 #define GESTURE		"GESTURE"		/* gestures only */
82 #define MATHSET		"MATHSET"		/* %^*()_+={}<>,/. */
83 #define MONEYSET	"MONEYSET"		/* $, maybe cent, pound, and yen */
84 #define WHITESPACE	"WHITESPACE"	/* gaps are recognized as space */
85 #define KANJI_JIS1	"KANJI_JIS1"	/* the JIS1 kanji only */
86 #define KANJI_JIS1_PLUS	"KANJI_JIS1_PLUS" /* JIS1 plus some JIS2 */
87 #define KANJI_JIS2	"KANJI_JIS2"	/* the JIS1 + JIS2 kanji */
88 #define HIRIGANA	"HIRIGANA"		/* the hirigana */
89 #define KATAKANA	"KATAKANA"		/* the katakana */
90 #define UPPERCASE	"UPPERCASE"		/* upper case alphabetics, no digits */
91 #define LOWERCASE	"LOWERCASE"		/* lower case alphabetics, no digits */
92 #define DIGITS		"DIGITS"		/* digits 0-9 only */
93 #define PUNCTUATION	"PUNCTUATION"	/* \!-;'"?()&., */
94 #define NONALPHABETIC	"NONALPHABETIC" /* all nonalphabetics, no digits */
95 #define ASCII		"ASCII"			/* the ASCII character set */
96 #define ISO_LATIN12	"ISO_LATIN12"	/* The ISO Latin 12 characters */
97 
98 
99 /********************  RECOGNITION INPUT STRUCTURES ***********************/
100 
101 /*
102  * WINDOW SYSTEM INTERFACE
103 */
104 
105 /*Bounding box. Structurally identical to Rectangle.*/
106 
107 typedef Rectangle pen_rect;
108 
109 
110 /*
111  * RECOGNITION CONTEXT
112  */
113 
114 /* Structure for reporting writing area geometric constraints. */
115 
116 typedef struct {
117 	pen_rect pr_area;
118 	short pr_row, pr_col;
119 } pen_frame;
120 
121 /*
122  * Structure for describing a set of letters to constrain recognition.
123  * ls_type is the same as the re_type field for rec_element below.
124 */
125 
126 typedef struct _letterset {
127         char ls_type;
128         union _ls_set {
129                 char* aval;
130                 wchar_t* wval;
131         } ls_set;
132 } letterset;
133 
134 /********************* RECOGNITION RETURN VALUES *************************/
135 
136 
137 /*Different types in union. "Other" indicates a cast is needed.*/
138 
139 #define REC_NONE    0x0             /*No return value*/
140 #define REC_GESTURE 0x1             /*Gesture.*/
141 #define REC_ASCII   0x2             /*Array of 8 bit ASCII*/
142 #define REC_VAR     0x4             /*Array of variable width characters. */
143 #define REC_WCHAR   0x8             /*Array of Unicode (wide) characters. */
144 #define REC_OTHER   0x10            /*Undefined type.*/
145 #define REC_CORR    0x20	    /*rec_correlation struct*/
146 
147 /*
148  * Recognition elements. A recognition element is a structure having a
149  * confidence level member, and a union, along with a flag indicating
150  * the union type. The union contains a pointer to the result. This
151  * is the basic recognition return value, corresponding to one
152  * recognized word, letter, or group of letters.
153 */
154 
155 struct rec_element {
156 	char			re_type;		/*Union type flag.*/
157 	union {
158 		gesture	*			gval;	/*Gesture.*/
159 		char*				aval;	/*ASCII and variable width.*/
160 		wchar_t*			wval;	/*Unicode.*/
161 		rec_correlation*	rcval;	/*rec_correlation*/
162 	} re_result;
163 	rec_confidence	re_conf;        /*Confidence (0-100).*/
164 };
165 
166 /*
167  * Recognition alternative. The recognition alternative gives
168  * a translated element for a particular segmentation, and
169  * a pointer to an array of alternatives for the next position
170  * in the segmentation thread.
171 */
172 
173 struct rec_alternative {
174 	rec_element			ra_elem; 	/*the translated element*/
175 	uint				ra_nalter;	/*number of next alternatives*/
176 	rec_alternative*	ra_next;	/*the array of next alternatives*/
177 };
178 
179 /**************************  GESTURES  **************************/
180 
181 /*
182  * Gestures. The toolkit initializes the recognizer with a
183  * set of gestures having appropriate callbacks.
184  * When a gesture is recognized, it is returned as part of a
185  * recognition element. The recognizer fills in the bounding
186  * box and hotspots. The toolkit fills in any additional values,
187  * such as the current window, and calls the callback.
188 */
189 
190 struct gesture {
191 	char*		g_name;			/*The gesture's name.*/
192 	uint			g_nhs;			/*Number of hotspots.*/
193 	pen_point*	g_hspots;			/*The hotspots.*/
194 	pen_rect		g_bbox;			/*The bounding box.*/
195 	void	  		(*g_action)(gesture*);	/*Pointer to execution function.*/
196 	void*		g_wsinfo;			/*For toolkit to fill in.*/
197 };
198 
199 typedef void (*xgesture)(gesture*);
200 
201 /*
202  * Recognition correlation. A recognition correlation is a recognition
203  * of the stroke input along with a correlation between the stroke
204  * input and the recognized text. The rec_correlation struct contains
205  * a pointer to an arrray of pointers to strokes, and
206  * two arrays of integers, giving the starting point and
207  * stopping point of each corresponding recogition element returned
208  * in the strokes.
209  */
210 
211 struct rec_correlation {
212 	rec_element	ro_elem;			/*The recognized alternative.*/
213 	uint		ro_nstrokes;		/*Number of strokes.*/
214 	Stroke*	ro_strokes;			/*Array of strokes.*/
215 	uint*		ro_start;			/*Starting index of points.*/
216 	uint*		ro_stop;			/*Stopping index of points.*/
217 };
218 
219 /*
220  * ADMINISTRATION
221  */
222 
223 /*
224  * recognizer_load - If directory is not NULL, then use it as a pathname
225  * to find the recognizer. Otherwise, use the default naming conventions
226  * to find the recognizer having file name name. The subset argument
227  * contains a null-terminated array of names for character subsets which
228  * the recognizer should translate.
229  */
230 
231 recognizer	recognizer_load(char*, char*, char**);
232 
233 /*
234  * recognizer_unload - Unload the recognizer.
235  */
236 
237 int			recognizer_unload(recognizer);
238 
239 /*
240  * recognizer_get_info-Get a pointer to a rec_info
241  * giving the locale and subsets supported by the recognizer, and shared
242  * library pathname.
243  */
244 
245 const rec_info*	recognizer_get_info(recognizer);
246 
247 /*
248  * recognizer_manager_version-Return the version number string of the
249  * recognition manager.
250  */
251 
252 const char*	recognizer_manager_version(recognizer);
253 
254 /*
255  * recognizer_load_state-Get any recognizer state associated with name
256  * in dir. Note that name may not be simple file name, since
257  * there may be more than one file involved. Return 0 if successful,
258  * -1 if not.
259  */
260 
261 int			recognizer_load_state(recognizer, char*, char*);
262 
263 /*
264  * recognizer_save_state-Save any recognizer state to name
265  * in dir. Note that name may not be a simple file name, since
266  * there may be more than one file involved. Return 0 if successful,
267  * -1 if not.
268  */
269 
270 int			recognizer_save_state(recognizer, char*, char*);
271 
272 /*
273  * recognizer_error-Return the last error message, or NULL if none.
274  */
275 
276 char*		recognizer_error(recognizer);
277 
278 /*
279  * DICTIONARIES
280  */
281 
282 /* recognizer_load_dictionary-Load a dictionary from the directory
283  * dir and file name. Return the dictionary pointer if successful,
284  * otherwise NULL.
285  */
286 
287 wordset		recognizer_load_dictionary(recognizer, char*, char*);
288 
289 /* recoginzer_save_dictionary-Save the dictionary to the file. Return 0
290  * successful, -1 if error occurs.
291  */
292 
293 int			recognizer_save_dictionary(recognizer, char*, char*, wordset);
294 
295 /*
296  * recognizer_free_dictionary-Free the dictionary. Return 0 if successful,
297  * -1 if error occurs.
298  */
299 
300 int			recognizer_free_dictionary(recognizer, wordset);
301 
302 /*
303  * recognizer_add_to_dictionary-Add the word to the dictionary. Return 0
304  * if successful, -1 if error occurs.
305  */
306 
307 int			recognizer_add_to_dictionary(recognizer, letterset*, wordset);
308 
309 /*
310  * recognizer_delete_from_dictionary-Delete the word from the dictionary.
311  * Return 0 if successful, -1 if error occurs.
312  */
313 
314 int			recognizer_delete_from_dictionary(recognizer, letterset*, wordset);
315 
316 /*
317  * TRANSLATION
318  */
319 
320 /* recognizer_set/get_context - Set/get the recognition context for
321  * subsequent buffering and translation. recognizer_set_context()
322  * returns -1 if an error occurs, otherwise 0. recognizer_get_context()
323  * returns NULL if no context has been set. The context is copied to avoid
324  * potential memory deallocation problems.
325  */
326 
327 int			recognizer_set_context(recognizer, rc*);
328 rc*			recognizer_get_context(recognizer);
329 
330 /* recognizer_clear - Set stroke buffer to NULL and clear the context.
331  * Returns -1 if an error occurred, otherwise 0. Both the context and the
332  * stroke buffer are deallocated. If delete_points_p is true, delete the
333  * points also.
334  */
335 
336 int			recognizer_clear(recognizer, bool);
337 
338 /* recognizer_get/set_buffer - Get/set the stroke buffer. The stroke buffer
339  * is copied to avoid potential memory allocation problems. Returns -1 if
340  * an error occurs, otherwise 0.
341  */
342 
343 int			recognizer_get_buffer(recognizer, uint*, Stroke**);
344 int			recognizer_set_buffer(recognizer, uint, Stroke*);
345 
346 /* recognizer_translate - Copy the strokes argument into the stroke buffer and
347  * translate the buffer. If correlate_p is true, then provide stroke
348  * correlations as well. If either nstrokes is 0 or strokes is NULL, then
349  * just translate the stroke buffer and return the translation. Return an
350  * array of alternative translation segmentations in the ret pointer and the
351  * number of alternatives in nret, or NULL and 0 if there is no translation.
352  * The direction of segmentation is as specified by the rc_direction field in
353  * the buffered recognition context. Returns -1 if an error occurred,
354  * otherwise 0.
355  */
356 
357 int			recognizer_translate(recognizer, uint, Stroke*, bool,
358 				int*, rec_alternative**);
359 
360 /*
361  * recognizer_get_extension_functions-Return a null terminated array
362  * of functions providing extended functionality. Their interfaces
363  * will change depending on the recognizer.
364  */
365 
366 rec_fn*		recognizer_get_extension_functions(recognizer);
367 
368 /*
369  * GESTURE SUPPORT
370 */
371 
372 /*
373  * recognizer_get_gesture_names - Return a null terminated array of
374  * character strings containing the gesture names.
375  */
376 
377 char**		recognizer_get_gesture_names(recognizer);
378 
379 /*
380  * recognizer_set_gesture_action-Set the action function associated with the
381  *  name.
382  */
383 
384 xgesture	recognizer_set_gesture_action(recognizer, char*, xgesture, void*);
385 
386 /*
387  * The following functions are for deleting data structures returned
388  *   by the API functions.
389  */
390 
391 void		delete_rec_alternative_array(uint, rec_alternative*, bool);
392 void		delete_rec_correlation(rec_correlation*, bool);
393 
394 /*
395  * These are used by clients to create arrays for passing to API
396  *  functions.
397  */
398 
399 Stroke*	make_Stroke_array(uint);
400 void		delete_Stroke_array(uint, Stroke*, bool);
401 
402 pen_point* 	make_pen_point_array(uint);
403 void 		delete_pen_point_array(pen_point*);
404 
405 Stroke*	copy_Stroke_array(uint, Stroke*);
406 
407 /*Extension function interfaces and indices.*/
408 
409 #define LI_ISA_LI		0	/*Is this a li recognizer?.*/
410 #define LI_TRAIN		1	/*Train recognizer*/
411 #define LI_CLEAR		2	/* ari's clear-state extension fn. */
412 #define LI_GET_CLASSES	3	/* ari's get-classes extension fn. */
413 #define LI_NUM_EX_FNS	4	/*Number of extension functions*/
414 
415 typedef bool	(*li_isa_li)(recognizer r);
416 typedef int		(*li_recognizer_train)(recognizer, rc*, uint,
417 					Stroke*, rec_element*, bool);
418 typedef int		(*li_recognizer_clearState)(recognizer);
419 typedef int		(*li_recognizer_getClasses)(recognizer, char ***, int *);
420