xref: /netbsd-src/external/bsd/tre/dist/python/tre-python.c (revision 1580a27b92f58fcdcb23fdfbc04a7c2b54a0b7c8)
1 /*
2   tre-python.c - TRE Python language bindings
3 
4   This sotfware is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7   The original version of this code was contributed by
8   Nikolai Saoukh <nms+python@otdel1.org>.
9 
10 */
11 
12 
13 #include "Python.h"
14 #include "structmember.h"
15 
16 #include <tre/tre.h>
17 
18 #define	TRE_MODULE	"tre"
19 
20 typedef struct {
21   PyObject_HEAD
22   regex_t rgx;
23   int flags;
24 } TrePatternObject;
25 
26 typedef struct {
27   PyObject_HEAD
28   regaparams_t ap;
29 } TreFuzzynessObject;
30 
31 typedef struct {
32   PyObject_HEAD
33   regamatch_t am;
34   PyObject *targ;	  /* string we matched against */
35   TreFuzzynessObject *fz; /* fuzzyness used during match */
36 } TreMatchObject;
37 
38 
39 static PyObject *ErrorObject;
40 
41 static void
_set_tre_err(int rc,regex_t * rgx)42 _set_tre_err(int rc, regex_t *rgx)
43 {
44   PyObject *errval;
45   char emsg[256];
46   size_t elen;
47 
48   elen = tre_regerror(rc, rgx, emsg, sizeof(emsg));
49   if (emsg[elen] == '\0')
50     elen--;
51   errval = Py_BuildValue("s#", emsg, elen);
52   PyErr_SetObject(ErrorObject, errval);
53   Py_XDECREF(errval);
54 }
55 
56 static PyObject *
TreFuzzyness_new(PyTypeObject * type,PyObject * args,PyObject * kwds)57 TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
58 {
59   static char *kwlist[] = {
60     "delcost", "inscost", "maxcost", "subcost",
61     "maxdel", "maxerr", "maxins", "maxsub",
62     NULL
63   };
64 
65   TreFuzzynessObject *self;
66 
67   self = (TreFuzzynessObject*)type->tp_alloc(type, 0);
68   if (self == NULL)
69     return NULL;
70   tre_regaparams_default(&self->ap);
71   if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist,
72 				   &self->ap.cost_del, &self->ap.cost_ins,
73 				   &self->ap.max_cost, &self->ap.cost_subst,
74 				   &self->ap.max_del, &self->ap.max_err,
75 				   &self->ap.max_ins, &self->ap.max_subst))
76     {
77       Py_DECREF(self);
78       return NULL;
79     }
80   return (PyObject*)self;
81 }
82 
83 static PyObject *
TreFuzzyness_repr(PyObject * obj)84 TreFuzzyness_repr(PyObject *obj)
85 {
86   TreFuzzynessObject *self = (TreFuzzynessObject*)obj;
87   PyObject *o;
88 
89   o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d,"
90 			  "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)",
91 			  self->ob_type->tp_name, self->ap.cost_del,
92 			  self->ap.cost_ins, self->ap.max_cost,
93 			  self->ap.cost_subst, self->ap.max_del,
94 			  self->ap.max_err, self->ap.max_ins,
95 			  self->ap.max_subst);
96   return o;
97 }
98 
99 static PyMemberDef TreFuzzyness_members[] = {
100   { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0,
101     "The cost of a deleted character" },
102   { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0,
103     "The cost of an inserted character" },
104   { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0,
105     "The maximum allowed cost of a match. If this is set to zero, an exact "
106     "match is searched for" },
107   { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0,
108     "The cost of a substituted character" },
109   { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0,
110     "Maximum allowed number of deleted characters" },
111   { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0,
112     "Maximum allowed number of errors (inserts + deletes + substitutes)" },
113   { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0,
114     "Maximum allowed number of inserted characters" },
115   { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0,
116     "Maximum allowed number of substituted characters" },
117   { NULL }
118 };
119 
120 static PyTypeObject TreFuzzynessType = {
121   PyObject_HEAD_INIT(NULL)
122   0,			        /* ob_size */
123   TRE_MODULE ".Fuzzyness",	/* tp_name */
124   sizeof(TreFuzzynessObject),	/* tp_basicsize */
125   0,			        /* tp_itemsize */
126   /* methods */
127   0,				/* tp_dealloc */
128   0,				/* tp_print */
129   0,				/* tp_getattr */
130   0,				/* tp_setattr */
131   0,				/* tp_compare */
132   TreFuzzyness_repr,		/* tp_repr */
133   0,				/* tp_as_number */
134   0,				/* tp_as_sequence */
135   0,				/* tp_as_mapping */
136   0,				/* tp_hash */
137   0,				/* tp_call */
138   0,				/* tp_str */
139   0,				/* tp_getattro */
140   0,				/* tp_setattro */
141   0,				/* tp_as_buffer */
142   Py_TPFLAGS_DEFAULT,		/* tp_flags */
143   /* tp_doc */
144   TRE_MODULE ".fuzzyness object holds approximation parameters for match",
145   0,				/* tp_traverse */
146   0,				/* tp_clear */
147   0,				/* tp_richcompare */
148   0,				/* tp_weaklistoffset */
149   0,				/* tp_iter */
150   0,				/* tp_iternext */
151   0,				/* tp_methods */
152   TreFuzzyness_members,		/* tp_members */
153   0,				/* tp_getset */
154   0,				/* tp_base */
155   0,				/* tp_dict */
156   0,				/* tp_descr_get */
157   0,				/* tp_descr_set */
158   0,				/* tp_dictoffset */
159   0,				/* tp_init */
160   0,				/* tp_alloc */
161   TreFuzzyness_new		/* tp_new */
162 };
163 
164 static PyObject *
PyTreMatch_groups(TreMatchObject * self,PyObject * dummy)165 PyTreMatch_groups(TreMatchObject *self, PyObject *dummy)
166 {
167   PyObject *result;
168   size_t i;
169 
170   if (self->am.nmatch < 1)
171     {
172       Py_INCREF(Py_None);
173       return Py_None;
174     }
175   result = PyTuple_New(self->am.nmatch);
176   for (i = 0; i < self->am.nmatch; i++)
177     {
178       PyObject *range;
179       regmatch_t *rm = &self->am.pmatch[i];
180 
181       if (rm->rm_so == (-1) && rm->rm_eo == (-1))
182 	{
183 	  Py_INCREF(Py_None);
184 	  range = Py_None;
185 	}
186       else
187 	{
188 	  range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo);
189 	}
190       PyTuple_SetItem(result, i, range);
191     }
192   return (PyObject*)result;
193 }
194 
195 static PyObject *
PyTreMatch_groupi(PyObject * obj,int gn)196 PyTreMatch_groupi(PyObject *obj, int gn)
197 {
198   TreMatchObject *self = (TreMatchObject*)obj;
199   PyObject *result;
200   regmatch_t *rm;
201 
202   if (gn < 0 || (size_t)gn > self->am.nmatch - 1)
203     {
204       PyErr_SetString(PyExc_ValueError, "out of bounds");
205       return NULL;
206     }
207   rm = &self->am.pmatch[gn];
208   if (rm->rm_so == (-1) && rm->rm_eo == (-1))
209     {
210       Py_INCREF(Py_None);
211       return Py_None;
212     }
213   result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo);
214   return result;
215 }
216 
217 static PyObject *
PyTreMatch_group(TreMatchObject * self,PyObject * grpno)218 PyTreMatch_group(TreMatchObject *self, PyObject *grpno)
219 {
220   PyObject *result;
221   long gn;
222 
223   gn = PyInt_AsLong(grpno);
224 
225   if (PyErr_Occurred())
226     return NULL;
227 
228   result = PyTreMatch_groupi((PyObject*)self, gn);
229   return result;
230 }
231 
232 static PyMethodDef TreMatch_methods[] = {
233   {"group", (PyCFunction)PyTreMatch_group, METH_O,
234    "return submatched string or None if a parenthesized subexpression did "
235    "not participate in a match"},
236   {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS,
237    "return the tuple of slice tuples for all parenthesized subexpressions "
238    "(None for not participated)"},
239   {NULL, NULL}
240 };
241 
242 static PyMemberDef TreMatch_members[] = {
243   { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY,
244     "Cost of the match" },
245   { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY,
246     "Number of deletes in the match" },
247   { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY,
248     "Number of inserts in the match" },
249   { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY,
250     "Number of substitutes in the match" },
251   { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY,
252     "Fuzzyness used during match" },
253   { NULL }
254 };
255 
256 static void
PyTreMatch_dealloc(TreMatchObject * self)257 PyTreMatch_dealloc(TreMatchObject *self)
258 {
259   Py_XDECREF(self->targ);
260   Py_XDECREF(self->fz);
261   if (self->am.pmatch != NULL)
262     PyMem_Del(self->am.pmatch);
263   PyObject_Del(self);
264 }
265 
266 static PySequenceMethods TreMatch_as_sequence_methods = {
267   0, /* sq_length */
268   0, /* sq_concat */
269   0, /* sq_repeat */
270   PyTreMatch_groupi, /* sq_item */
271   0, /* sq_slice */
272   0, /* sq_ass_item */
273   0, /* sq_ass_slice */
274   0, /* sq_contains */
275   0, /* sq_inplace_concat */
276   0 /* sq_inplace_repeat */
277 };
278 
279 static PyTypeObject TreMatchType = {
280   PyObject_HEAD_INIT(NULL)
281   0,			        /* ob_size */
282   TRE_MODULE ".Match",		/* tp_name */
283   sizeof(TreMatchObject),	/* tp_basicsize */
284   0,			        /* tp_itemsize */
285   /* methods */
286   (destructor)PyTreMatch_dealloc, /* tp_dealloc */
287   0,			        /* tp_print */
288   0,				/* tp_getattr */
289   0,				/* tp_setattr */
290   0,				/* tp_compare */
291   0,				/* tp_repr */
292   0,				/* tp_as_number */
293   &TreMatch_as_sequence_methods,	/* tp_as_sequence */
294   0,				/* tp_as_mapping */
295   0,				/* tp_hash */
296   0,				/* tp_call */
297   0,				/* tp_str */
298   0,				/* tp_getattro */
299   0,				/* tp_setattro */
300   0,				/* tp_as_buffer */
301   Py_TPFLAGS_DEFAULT,		/* tp_flags */
302   TRE_MODULE ".match object holds result of successful match",	/* tp_doc */
303   0,				/* tp_traverse */
304   0,				/* tp_clear */
305   0,				/* tp_richcompare */
306   0,				/* tp_weaklistoffset */
307   0,				/* tp_iter */
308   0,				/* tp_iternext */
309   TreMatch_methods,		/* tp_methods */
310   TreMatch_members		/* tp_members */
311 };
312 
313 static TreMatchObject *
newTreMatchObject(void)314 newTreMatchObject(void)
315 {
316   TreMatchObject *self;
317 
318   self = PyObject_New(TreMatchObject, &TreMatchType);
319   if (self == NULL)
320     return NULL;
321   memset(&self->am, '\0', sizeof(self->am));
322   self->targ = NULL;
323   self->fz = NULL;
324   return self;
325 }
326 
327 static PyObject *
PyTrePattern_search(TrePatternObject * self,PyObject * args)328 PyTrePattern_search(TrePatternObject *self, PyObject *args)
329 {
330   PyObject *pstring;
331   int eflags = 0;
332   TreMatchObject *mo;
333   TreFuzzynessObject *fz;
334   size_t nsub;
335   int rc;
336   regmatch_t *pm;
337   char *targ;
338   size_t tlen;
339 
340   if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0)))
341     {
342       if (!PyArg_ParseTuple(args, "UO!|i:search", &pstring, &TreFuzzynessType,
343 			&fz, &eflags))
344       return NULL;
345     }
346   else
347     {
348       if (!PyArg_ParseTuple(args, "SO!|i:search", &pstring, &TreFuzzynessType,
349 			&fz, &eflags))
350       return NULL;
351     }
352 
353   mo = newTreMatchObject();
354   if (mo == NULL)
355     return NULL;
356 
357   nsub = self->rgx.re_nsub + 1;
358   pm = PyMem_New(regmatch_t, nsub);
359   if (!pm)
360     {
361       Py_DECREF(mo);
362       return PyErr_NoMemory();
363     }
364 
365   mo->am.nmatch = nsub;
366   mo->am.pmatch = pm;
367 
368   if (PyUnicode_Check(pstring))
369     {
370       Py_ssize_t len = PyUnicode_GetSize(pstring);
371       wchar_t *buf = calloc(sizeof(wchar_t), len);
372       if(!buf)
373         {
374           Py_DECREF(mo);
375           return PyErr_NoMemory();
376         }
377       PyUnicode_AsWideChar(pstring, buf, len);
378       rc = tre_regawnexec(&self->rgx, buf, len, &mo->am, fz->ap, eflags);
379       free(buf);
380     }
381   else
382     {
383       targ = PyString_AsString(pstring);
384       tlen = PyString_Size(pstring);
385 
386       rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
387     }
388 
389   if (PyErr_Occurred())
390     {
391       Py_DECREF(mo);
392       return NULL;
393     }
394 
395   if (rc == REG_OK)
396     {
397       Py_INCREF(pstring);
398       mo->targ = pstring;
399       Py_INCREF(fz);
400       mo->fz = fz;
401       return (PyObject*)mo;
402     }
403 
404   if (rc == REG_NOMATCH)
405     {
406       Py_DECREF(mo);
407       Py_INCREF(Py_None);
408       return Py_None;
409     }
410   _set_tre_err(rc, &self->rgx);
411   Py_DECREF(mo);
412   return NULL;
413 }
414 
415 static PyMethodDef TrePattern_methods[] = {
416   { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS,
417     "try to search in the given string, returning " TRE_MODULE ".match object "
418     "or None on failure" },
419   {NULL, NULL}
420 };
421 
422 static PyMemberDef TrePattern_members[] = {
423   { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY,
424     "Number of parenthesized subexpressions in regex" },
425   { NULL }
426 };
427 
428 static void
PyTrePattern_dealloc(TrePatternObject * self)429 PyTrePattern_dealloc(TrePatternObject *self)
430 {
431   tre_regfree(&self->rgx);
432   PyObject_Del(self);
433 }
434 
435 static PyTypeObject TrePatternType = {
436   PyObject_HEAD_INIT(NULL)
437   0,			        /* ob_size */
438   TRE_MODULE ".Pattern",	/* tp_name */
439   sizeof(TrePatternObject),	/* tp_basicsize */
440   0,			        /* tp_itemsize */
441   /* methods */
442   (destructor)PyTrePattern_dealloc, /*tp_dealloc*/
443   0,				/* tp_print */
444   0,				/* tp_getattr */
445   0,				/* tp_setattr */
446   0,				/* tp_compare */
447   0,				/* tp_repr */
448   0,				/* tp_as_number */
449   0,				/* tp_as_sequence */
450   0,				/* tp_as_mapping */
451   0,				/* tp_hash */
452   0,				/* tp_call */
453   0,				/* tp_str */
454   0,				/* tp_getattro */
455   0,				/* tp_setattro */
456   0,				/* tp_as_buffer */
457   Py_TPFLAGS_DEFAULT,		/* tp_flags */
458   TRE_MODULE ".pattern object holds compiled tre regex",	/* tp_doc */
459   0,				/* tp_traverse */
460   0,				/* tp_clear */
461   0,				/* tp_richcompare */
462   0,				/* tp_weaklistoffset */
463   0,				/* tp_iter */
464   0,				/* tp_iternext */
465   TrePattern_methods,		/* tp_methods */
466   TrePattern_members		/* tp_members */
467 };
468 
469 static TrePatternObject *
newTrePatternObject()470 newTrePatternObject()
471 {
472   TrePatternObject *self;
473 
474   self = PyObject_New(TrePatternObject, &TrePatternType);
475   if (self == NULL)
476     return NULL;
477   self->flags = 0;
478   return self;
479 }
480 
481 static PyObject *
PyTre_ncompile(PyObject * self,PyObject * args)482 PyTre_ncompile(PyObject *self, PyObject *args)
483 {
484   TrePatternObject *rv;
485   PyUnicodeObject *upattern = NULL;
486   char *pattern = NULL;
487   int pattlen;
488   int cflags = 0;
489   int rc;
490 
491   if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0)))
492     {
493       if (!PyArg_ParseTuple(args, "U|i:compile", &upattern, &cflags))
494         return NULL;
495     }
496   else
497     {
498       if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags))
499         return NULL;
500     }
501 
502   rv = newTrePatternObject();
503   if (rv == NULL)
504     return NULL;
505 
506   if (upattern != NULL)
507     {
508       Py_ssize_t len = PyUnicode_GetSize(upattern);
509       wchar_t *buf = calloc(sizeof(wchar_t), len);
510       if(!buf)
511         {
512           Py_DECREF(rv);
513           return PyErr_NoMemory();
514         }
515       PyUnicode_AsWideChar(upattern, buf, len);
516       rc = tre_regwncomp(&rv->rgx, buf, len, cflags);
517       free(buf);
518     }
519   else
520     rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags);
521 
522   if (rc != REG_OK)
523     {
524       if (!PyErr_Occurred())
525 	_set_tre_err(rc, &rv->rgx);
526       Py_DECREF(rv);
527       return NULL;
528     }
529   rv->flags = cflags;
530   return (PyObject*)rv;
531 }
532 
533 static PyMethodDef tre_methods[] = {
534   { "compile", PyTre_ncompile, METH_VARARGS,
535     "Compile a regular expression pattern, returning a "
536     TRE_MODULE ".pattern object" },
537   { NULL, NULL }
538 };
539 
540 static char *tre_doc =
541 "Python module for TRE library\n\nModule exports "
542 "the only function: compile";
543 
544 static struct _tre_flags {
545   char *name;
546   int val;
547 } tre_flags[] = {
548   { "EXTENDED", REG_EXTENDED },
549   { "ICASE", REG_ICASE },
550   { "NEWLINE", REG_NEWLINE },
551   { "NOSUB", REG_NOSUB },
552   { "LITERAL", REG_LITERAL },
553 
554   { "NOTBOL", REG_NOTBOL },
555   { "NOTEOL", REG_NOTEOL },
556   { NULL, 0 }
557 };
558 
559 PyMODINIT_FUNC
inittre(void)560 inittre(void)
561 {
562   PyObject *m;
563   struct _tre_flags *fp;
564 
565   if (PyType_Ready(&TreFuzzynessType) < 0)
566     return;
567   if (PyType_Ready(&TreMatchType) < 0)
568     return;
569   if (PyType_Ready(&TrePatternType) < 0)
570     return;
571 
572   /* Create the module and add the functions */
573   m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc);
574   if (m == NULL)
575     return;
576 
577   Py_INCREF(&TreFuzzynessType);
578   if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0)
579     return;
580   Py_INCREF(&TreMatchType);
581   if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0)
582     return;
583   Py_INCREF(&TrePatternType);
584   if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0)
585     return;
586   ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL);
587   Py_INCREF(ErrorObject);
588   if (PyModule_AddObject(m, "Error", ErrorObject) < 0)
589     return;
590 
591   /* Insert the flags */
592   for (fp = tre_flags; fp->name != NULL; fp++)
593     if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0)
594       return;
595 }
596