xref: /netbsd-src/external/bsd/tre/dist/python/tre-python.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*
2   tre-python.c - TRE Python language bindings
3 
4   This sotfware is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7   The original version of this code was contributed by
8   Nikolai Saoukh <nms+python@otdel1.org>.
9 
10 */
11 
12 
13 #include "Python.h"
14 #include "structmember.h"
15 
16 #include <tre/tre.h>
17 
18 #define	TRE_MODULE	"tre"
19 
20 typedef struct {
21   PyObject_HEAD
22   regex_t rgx;
23   int flags;
24 } TrePatternObject;
25 
26 typedef struct {
27   PyObject_HEAD
28   regaparams_t ap;
29 } TreFuzzynessObject;
30 
31 typedef struct {
32   PyObject_HEAD
33   regamatch_t am;
34   PyObject *targ;	  /* string we matched against */
35   TreFuzzynessObject *fz; /* fuzzyness used during match */
36 } TreMatchObject;
37 
38 
39 static PyObject *ErrorObject;
40 
41 static void
42 _set_tre_err(int rc, regex_t *rgx)
43 {
44   PyObject *errval;
45   char emsg[256];
46   size_t elen;
47 
48   elen = tre_regerror(rc, rgx, emsg, sizeof(emsg));
49   if (emsg[elen] == '\0')
50     elen--;
51   errval = Py_BuildValue("s#", emsg, elen);
52   PyErr_SetObject(ErrorObject, errval);
53   Py_XDECREF(errval);
54 }
55 
56 static PyObject *
57 TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
58 {
59   static char *kwlist[] = {
60     "delcost", "inscost", "maxcost", "subcost",
61     "maxdel", "maxerr", "maxins", "maxsub",
62     NULL
63   };
64 
65   TreFuzzynessObject *self;
66 
67   self = (TreFuzzynessObject*)type->tp_alloc(type, 0);
68   if (self == NULL)
69     return NULL;
70   tre_regaparams_default(&self->ap);
71   if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist,
72 				   &self->ap.cost_del, &self->ap.cost_ins,
73 				   &self->ap.max_cost, &self->ap.cost_subst,
74 				   &self->ap.max_del, &self->ap.max_err,
75 				   &self->ap.max_ins, &self->ap.max_subst))
76     {
77       Py_DECREF(self);
78       return NULL;
79     }
80   return (PyObject*)self;
81 }
82 
83 static PyObject *
84 TreFuzzyness_repr(PyObject *obj)
85 {
86   TreFuzzynessObject *self = (TreFuzzynessObject*)obj;
87   PyObject *o;
88 
89   o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d,"
90 			  "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)",
91 			  self->ob_type->tp_name, self->ap.cost_del,
92 			  self->ap.cost_ins, self->ap.max_cost,
93 			  self->ap.cost_subst, self->ap.max_del,
94 			  self->ap.max_err, self->ap.max_ins,
95 			  self->ap.max_subst);
96   return o;
97 }
98 
99 static PyMemberDef TreFuzzyness_members[] = {
100   { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0,
101     "The cost of a deleted character" },
102   { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0,
103     "The cost of an inserted character" },
104   { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0,
105     "The maximum allowed cost of a match. If this is set to zero, an exact "
106     "match is searched for" },
107   { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0,
108     "The cost of a substituted character" },
109   { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0,
110     "Maximum allowed number of deleted characters" },
111   { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0,
112     "Maximum allowed number of errors (inserts + deletes + substitutes)" },
113   { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0,
114     "Maximum allowed number of inserted characters" },
115   { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0,
116     "Maximum allowed number of substituted characters" },
117   { NULL }
118 };
119 
120 static PyTypeObject TreFuzzynessType = {
121   PyObject_HEAD_INIT(NULL)
122   0,			        /* ob_size */
123   TRE_MODULE ".Fuzzyness",	/* tp_name */
124   sizeof(TreFuzzynessObject),	/* tp_basicsize */
125   0,			        /* tp_itemsize */
126   /* methods */
127   0,				/* tp_dealloc */
128   0,				/* tp_print */
129   0,				/* tp_getattr */
130   0,				/* tp_setattr */
131   0,				/* tp_compare */
132   TreFuzzyness_repr,		/* tp_repr */
133   0,				/* tp_as_number */
134   0,				/* tp_as_sequence */
135   0,				/* tp_as_mapping */
136   0,				/* tp_hash */
137   0,				/* tp_call */
138   0,				/* tp_str */
139   0,				/* tp_getattro */
140   0,				/* tp_setattro */
141   0,				/* tp_as_buffer */
142   Py_TPFLAGS_DEFAULT,		/* tp_flags */
143   /* tp_doc */
144   TRE_MODULE ".fuzzyness object holds approximation parameters for match",
145   0,				/* tp_traverse */
146   0,				/* tp_clear */
147   0,				/* tp_richcompare */
148   0,				/* tp_weaklistoffset */
149   0,				/* tp_iter */
150   0,				/* tp_iternext */
151   0,				/* tp_methods */
152   TreFuzzyness_members,		/* tp_members */
153   0,				/* tp_getset */
154   0,				/* tp_base */
155   0,				/* tp_dict */
156   0,				/* tp_descr_get */
157   0,				/* tp_descr_set */
158   0,				/* tp_dictoffset */
159   0,				/* tp_init */
160   0,				/* tp_alloc */
161   TreFuzzyness_new		/* tp_new */
162 };
163 
164 static PyObject *
165 PyTreMatch_groups(TreMatchObject *self, PyObject *dummy)
166 {
167   PyObject *result;
168   size_t i;
169 
170   if (self->am.nmatch < 1)
171     {
172       Py_INCREF(Py_None);
173       return Py_None;
174     }
175   result = PyTuple_New(self->am.nmatch);
176   for (i = 0; i < self->am.nmatch; i++)
177     {
178       PyObject *range;
179       regmatch_t *rm = &self->am.pmatch[i];
180 
181       if (rm->rm_so == (-1) && rm->rm_eo == (-1))
182 	{
183 	  Py_INCREF(Py_None);
184 	  range = Py_None;
185 	}
186       else
187 	{
188 	  range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo);
189 	}
190       PyTuple_SetItem(result, i, range);
191     }
192   return (PyObject*)result;
193 }
194 
195 static PyObject *
196 PyTreMatch_groupi(PyObject *obj, int gn)
197 {
198   TreMatchObject *self = (TreMatchObject*)obj;
199   PyObject *result;
200   regmatch_t *rm;
201 
202   if (gn < 0 || (size_t)gn > self->am.nmatch - 1)
203     {
204       PyErr_SetString(PyExc_ValueError, "out of bounds");
205       return NULL;
206     }
207   rm = &self->am.pmatch[gn];
208   if (rm->rm_so == (-1) && rm->rm_eo == (-1))
209     {
210       Py_INCREF(Py_None);
211       return Py_None;
212     }
213   result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo);
214   return result;
215 }
216 
217 static PyObject *
218 PyTreMatch_group(TreMatchObject *self, PyObject *grpno)
219 {
220   PyObject *result;
221   long gn;
222 
223   gn = PyInt_AsLong(grpno);
224 
225   if (PyErr_Occurred())
226     return NULL;
227 
228   result = PyTreMatch_groupi((PyObject*)self, gn);
229   return result;
230 }
231 
232 static PyMethodDef TreMatch_methods[] = {
233   {"group", (PyCFunction)PyTreMatch_group, METH_O,
234    "return submatched string or None if a parenthesized subexpression did "
235    "not participate in a match"},
236   {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS,
237    "return the tuple of slice tuples for all parenthesized subexpressions "
238    "(None for not participated)"},
239   {NULL, NULL}
240 };
241 
242 static PyMemberDef TreMatch_members[] = {
243   { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY,
244     "Cost of the match" },
245   { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY,
246     "Number of deletes in the match" },
247   { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY,
248     "Number of inserts in the match" },
249   { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY,
250     "Number of substitutes in the match" },
251   { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY,
252     "Fuzzyness used during match" },
253   { NULL }
254 };
255 
256 static void
257 PyTreMatch_dealloc(TreMatchObject *self)
258 {
259   Py_XDECREF(self->targ);
260   Py_XDECREF(self->fz);
261   if (self->am.pmatch != NULL)
262     PyMem_Del(self->am.pmatch);
263   PyObject_Del(self);
264 }
265 
266 static PySequenceMethods TreMatch_as_sequence_methods = {
267   0, /* sq_length */
268   0, /* sq_concat */
269   0, /* sq_repeat */
270   PyTreMatch_groupi, /* sq_item */
271   0, /* sq_slice */
272   0, /* sq_ass_item */
273   0, /* sq_ass_slice */
274   0, /* sq_contains */
275   0, /* sq_inplace_concat */
276   0 /* sq_inplace_repeat */
277 };
278 
279 static PyTypeObject TreMatchType = {
280   PyObject_HEAD_INIT(NULL)
281   0,			        /* ob_size */
282   TRE_MODULE ".Match",		/* tp_name */
283   sizeof(TreMatchObject),	/* tp_basicsize */
284   0,			        /* tp_itemsize */
285   /* methods */
286   (destructor)PyTreMatch_dealloc, /* tp_dealloc */
287   0,			        /* tp_print */
288   0,				/* tp_getattr */
289   0,				/* tp_setattr */
290   0,				/* tp_compare */
291   0,				/* tp_repr */
292   0,				/* tp_as_number */
293   &TreMatch_as_sequence_methods,	/* tp_as_sequence */
294   0,				/* tp_as_mapping */
295   0,				/* tp_hash */
296   0,				/* tp_call */
297   0,				/* tp_str */
298   0,				/* tp_getattro */
299   0,				/* tp_setattro */
300   0,				/* tp_as_buffer */
301   Py_TPFLAGS_DEFAULT,		/* tp_flags */
302   TRE_MODULE ".match object holds result of successful match",	/* tp_doc */
303   0,				/* tp_traverse */
304   0,				/* tp_clear */
305   0,				/* tp_richcompare */
306   0,				/* tp_weaklistoffset */
307   0,				/* tp_iter */
308   0,				/* tp_iternext */
309   TreMatch_methods,		/* tp_methods */
310   TreMatch_members		/* tp_members */
311 };
312 
313 static TreMatchObject *
314 newTreMatchObject(void)
315 {
316   TreMatchObject *self;
317 
318   self = PyObject_New(TreMatchObject, &TreMatchType);
319   if (self == NULL)
320     return NULL;
321   memset(&self->am, '\0', sizeof(self->am));
322   self->targ = NULL;
323   self->fz = NULL;
324   return self;
325 }
326 
327 static PyObject *
328 PyTrePattern_search(TrePatternObject *self, PyObject *args)
329 {
330   PyObject *pstring;
331   int eflags = 0;
332   TreMatchObject *mo;
333   TreFuzzynessObject *fz;
334   size_t nsub;
335   int rc;
336   regmatch_t *pm;
337   char *targ;
338   size_t tlen;
339 
340   if (!PyArg_ParseTuple(args, "SO!|i:match", &pstring, &TreFuzzynessType,
341 			&fz, &eflags))
342     return NULL;
343 
344   mo = newTreMatchObject();
345   if (mo == NULL)
346     return NULL;
347 
348   nsub = self->rgx.re_nsub + 1;
349   pm = PyMem_New(regmatch_t, nsub);
350   if (pm != NULL)
351     {
352       mo->am.nmatch = nsub;
353       mo->am.pmatch = pm;
354     }
355   else
356     {
357       /* XXX */
358       Py_DECREF(mo);
359       return NULL;
360     }
361 
362   targ = PyString_AsString(pstring);
363   tlen = PyString_Size(pstring);
364 
365   rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
366 
367   if (PyErr_Occurred())
368     {
369       Py_DECREF(mo);
370       return NULL;
371     }
372 
373   if (rc == REG_OK)
374     {
375       Py_INCREF(pstring);
376       mo->targ = pstring;
377       Py_INCREF(fz);
378       mo->fz = fz;
379       return (PyObject*)mo;
380     }
381 
382   if (rc == REG_NOMATCH)
383     {
384       Py_DECREF(mo);
385       Py_INCREF(Py_None);
386       return Py_None;
387     }
388   _set_tre_err(rc, &self->rgx);
389   Py_DECREF(mo);
390   return NULL;
391 }
392 
393 static PyMethodDef TrePattern_methods[] = {
394   { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS,
395     "try to match against given string, returning " TRE_MODULE ".match object "
396     "or None on failure" },
397   {NULL, NULL}
398 };
399 
400 static PyMemberDef TrePattern_members[] = {
401   { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY,
402     "Number of parenthesized subexpressions in regex" },
403   { NULL }
404 };
405 
406 static void
407 PyTrePattern_dealloc(TrePatternObject *self)
408 {
409   tre_regfree(&self->rgx);
410   PyObject_Del(self);
411 }
412 
413 static PyTypeObject TrePatternType = {
414   PyObject_HEAD_INIT(NULL)
415   0,			        /* ob_size */
416   TRE_MODULE ".Pattern",	/* tp_name */
417   sizeof(TrePatternObject),	/* tp_basicsize */
418   0,			        /* tp_itemsize */
419   /* methods */
420   (destructor)PyTrePattern_dealloc, /*tp_dealloc*/
421   0,				/* tp_print */
422   0,				/* tp_getattr */
423   0,				/* tp_setattr */
424   0,				/* tp_compare */
425   0,				/* tp_repr */
426   0,				/* tp_as_number */
427   0,				/* tp_as_sequence */
428   0,				/* tp_as_mapping */
429   0,				/* tp_hash */
430   0,				/* tp_call */
431   0,				/* tp_str */
432   0,				/* tp_getattro */
433   0,				/* tp_setattro */
434   0,				/* tp_as_buffer */
435   Py_TPFLAGS_DEFAULT,		/* tp_flags */
436   TRE_MODULE ".pattern object holds compiled tre regex",	/* tp_doc */
437   0,				/* tp_traverse */
438   0,				/* tp_clear */
439   0,				/* tp_richcompare */
440   0,				/* tp_weaklistoffset */
441   0,				/* tp_iter */
442   0,				/* tp_iternext */
443   TrePattern_methods,		/* tp_methods */
444   TrePattern_members		/* tp_members */
445 };
446 
447 static TrePatternObject *
448 newTrePatternObject(PyObject *args)
449 {
450   TrePatternObject *self;
451 
452   self = PyObject_New(TrePatternObject, &TrePatternType);
453   if (self == NULL)
454     return NULL;
455   self->flags = 0;
456   return self;
457 }
458 
459 static PyObject *
460 PyTre_ncompile(PyObject *self, PyObject *args)
461 {
462   TrePatternObject *rv;
463   char *pattern;
464   int pattlen;
465   int cflags = 0;
466   int rc;
467 
468   if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags))
469     return NULL;
470 
471   rv = newTrePatternObject(args);
472   if (rv == NULL)
473     return NULL;
474 
475   rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags);
476   if (rc != REG_OK)
477     {
478       if (!PyErr_Occurred())
479 	_set_tre_err(rc, &rv->rgx);
480       Py_DECREF(rv);
481       return NULL;
482     }
483   rv->flags = cflags;
484   return (PyObject*)rv;
485 }
486 
487 static PyMethodDef tre_methods[] = {
488   { "compile", PyTre_ncompile, METH_VARARGS,
489     "Compile a regular expression pattern, returning a "
490     TRE_MODULE ".pattern object" },
491   { NULL, NULL }
492 };
493 
494 static char *tre_doc =
495 "Python module for TRE library\n\nModule exports "
496 "the only function: compile";
497 
498 static struct _tre_flags {
499   char *name;
500   int val;
501 } tre_flags[] = {
502   { "EXTENDED", REG_EXTENDED },
503   { "ICASE", REG_ICASE },
504   { "NEWLINE", REG_NEWLINE },
505   { "NOSUB", REG_NOSUB },
506   { "LITERAL", REG_LITERAL },
507 
508   { "NOTBOL", REG_NOTBOL },
509   { "NOTEOL", REG_NOTEOL },
510   { NULL, 0 }
511 };
512 
513 PyMODINIT_FUNC
514 inittre(void)
515 {
516   PyObject *m;
517   struct _tre_flags *fp;
518 
519   if (PyType_Ready(&TreFuzzynessType) < 0)
520     return;
521   if (PyType_Ready(&TreMatchType) < 0)
522     return;
523   if (PyType_Ready(&TrePatternType) < 0)
524     return;
525 
526   /* Create the module and add the functions */
527   m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc);
528   if (m == NULL)
529     return;
530 
531   Py_INCREF(&TreFuzzynessType);
532   if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0)
533     return;
534   Py_INCREF(&TreMatchType);
535   if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0)
536     return;
537   Py_INCREF(&TrePatternType);
538   if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0)
539     return;
540   ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL);
541   Py_INCREF(ErrorObject);
542   if (PyModule_AddObject(m, "Error", ErrorObject) < 0)
543     return;
544 
545   /* Insert the flags */
546   for (fp = tre_flags; fp->name != NULL; fp++)
547     if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0)
548       return;
549 }
550