163d4abf0Sagc /*
263d4abf0Sagc tre-python.c - TRE Python language bindings
363d4abf0Sagc
463d4abf0Sagc This sotfware is released under a BSD-style license.
563d4abf0Sagc See the file LICENSE for details and copyright.
663d4abf0Sagc
763d4abf0Sagc The original version of this code was contributed by
863d4abf0Sagc Nikolai Saoukh <nms+python@otdel1.org>.
963d4abf0Sagc
1063d4abf0Sagc */
1163d4abf0Sagc
1263d4abf0Sagc
1363d4abf0Sagc #include "Python.h"
1463d4abf0Sagc #include "structmember.h"
1563d4abf0Sagc
1663d4abf0Sagc #include <tre/tre.h>
1763d4abf0Sagc
1863d4abf0Sagc #define TRE_MODULE "tre"
1963d4abf0Sagc
2063d4abf0Sagc typedef struct {
2163d4abf0Sagc PyObject_HEAD
2263d4abf0Sagc regex_t rgx;
2363d4abf0Sagc int flags;
2463d4abf0Sagc } TrePatternObject;
2563d4abf0Sagc
2663d4abf0Sagc typedef struct {
2763d4abf0Sagc PyObject_HEAD
2863d4abf0Sagc regaparams_t ap;
2963d4abf0Sagc } TreFuzzynessObject;
3063d4abf0Sagc
3163d4abf0Sagc typedef struct {
3263d4abf0Sagc PyObject_HEAD
3363d4abf0Sagc regamatch_t am;
3463d4abf0Sagc PyObject *targ; /* string we matched against */
3563d4abf0Sagc TreFuzzynessObject *fz; /* fuzzyness used during match */
3663d4abf0Sagc } TreMatchObject;
3763d4abf0Sagc
3863d4abf0Sagc
3963d4abf0Sagc static PyObject *ErrorObject;
4063d4abf0Sagc
4163d4abf0Sagc static void
_set_tre_err(int rc,regex_t * rgx)4263d4abf0Sagc _set_tre_err(int rc, regex_t *rgx)
4363d4abf0Sagc {
4463d4abf0Sagc PyObject *errval;
4563d4abf0Sagc char emsg[256];
4663d4abf0Sagc size_t elen;
4763d4abf0Sagc
4863d4abf0Sagc elen = tre_regerror(rc, rgx, emsg, sizeof(emsg));
4963d4abf0Sagc if (emsg[elen] == '\0')
5063d4abf0Sagc elen--;
5163d4abf0Sagc errval = Py_BuildValue("s#", emsg, elen);
5263d4abf0Sagc PyErr_SetObject(ErrorObject, errval);
5363d4abf0Sagc Py_XDECREF(errval);
5463d4abf0Sagc }
5563d4abf0Sagc
5663d4abf0Sagc static PyObject *
TreFuzzyness_new(PyTypeObject * type,PyObject * args,PyObject * kwds)5763d4abf0Sagc TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
5863d4abf0Sagc {
5963d4abf0Sagc static char *kwlist[] = {
6063d4abf0Sagc "delcost", "inscost", "maxcost", "subcost",
6163d4abf0Sagc "maxdel", "maxerr", "maxins", "maxsub",
6263d4abf0Sagc NULL
6363d4abf0Sagc };
6463d4abf0Sagc
6563d4abf0Sagc TreFuzzynessObject *self;
6663d4abf0Sagc
6763d4abf0Sagc self = (TreFuzzynessObject*)type->tp_alloc(type, 0);
6863d4abf0Sagc if (self == NULL)
6963d4abf0Sagc return NULL;
7063d4abf0Sagc tre_regaparams_default(&self->ap);
7163d4abf0Sagc if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist,
7263d4abf0Sagc &self->ap.cost_del, &self->ap.cost_ins,
7363d4abf0Sagc &self->ap.max_cost, &self->ap.cost_subst,
7463d4abf0Sagc &self->ap.max_del, &self->ap.max_err,
7563d4abf0Sagc &self->ap.max_ins, &self->ap.max_subst))
7663d4abf0Sagc {
7763d4abf0Sagc Py_DECREF(self);
7863d4abf0Sagc return NULL;
7963d4abf0Sagc }
8063d4abf0Sagc return (PyObject*)self;
8163d4abf0Sagc }
8263d4abf0Sagc
8363d4abf0Sagc static PyObject *
TreFuzzyness_repr(PyObject * obj)8463d4abf0Sagc TreFuzzyness_repr(PyObject *obj)
8563d4abf0Sagc {
8663d4abf0Sagc TreFuzzynessObject *self = (TreFuzzynessObject*)obj;
8763d4abf0Sagc PyObject *o;
8863d4abf0Sagc
8963d4abf0Sagc o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d,"
9063d4abf0Sagc "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)",
9163d4abf0Sagc self->ob_type->tp_name, self->ap.cost_del,
9263d4abf0Sagc self->ap.cost_ins, self->ap.max_cost,
9363d4abf0Sagc self->ap.cost_subst, self->ap.max_del,
9463d4abf0Sagc self->ap.max_err, self->ap.max_ins,
9563d4abf0Sagc self->ap.max_subst);
9663d4abf0Sagc return o;
9763d4abf0Sagc }
9863d4abf0Sagc
9963d4abf0Sagc static PyMemberDef TreFuzzyness_members[] = {
10063d4abf0Sagc { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0,
10163d4abf0Sagc "The cost of a deleted character" },
10263d4abf0Sagc { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0,
10363d4abf0Sagc "The cost of an inserted character" },
10463d4abf0Sagc { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0,
10563d4abf0Sagc "The maximum allowed cost of a match. If this is set to zero, an exact "
10663d4abf0Sagc "match is searched for" },
10763d4abf0Sagc { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0,
10863d4abf0Sagc "The cost of a substituted character" },
10963d4abf0Sagc { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0,
11063d4abf0Sagc "Maximum allowed number of deleted characters" },
11163d4abf0Sagc { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0,
11263d4abf0Sagc "Maximum allowed number of errors (inserts + deletes + substitutes)" },
11363d4abf0Sagc { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0,
11463d4abf0Sagc "Maximum allowed number of inserted characters" },
11563d4abf0Sagc { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0,
11663d4abf0Sagc "Maximum allowed number of substituted characters" },
11763d4abf0Sagc { NULL }
11863d4abf0Sagc };
11963d4abf0Sagc
12063d4abf0Sagc static PyTypeObject TreFuzzynessType = {
12163d4abf0Sagc PyObject_HEAD_INIT(NULL)
12263d4abf0Sagc 0, /* ob_size */
12363d4abf0Sagc TRE_MODULE ".Fuzzyness", /* tp_name */
12463d4abf0Sagc sizeof(TreFuzzynessObject), /* tp_basicsize */
12563d4abf0Sagc 0, /* tp_itemsize */
12663d4abf0Sagc /* methods */
12763d4abf0Sagc 0, /* tp_dealloc */
12863d4abf0Sagc 0, /* tp_print */
12963d4abf0Sagc 0, /* tp_getattr */
13063d4abf0Sagc 0, /* tp_setattr */
13163d4abf0Sagc 0, /* tp_compare */
13263d4abf0Sagc TreFuzzyness_repr, /* tp_repr */
13363d4abf0Sagc 0, /* tp_as_number */
13463d4abf0Sagc 0, /* tp_as_sequence */
13563d4abf0Sagc 0, /* tp_as_mapping */
13663d4abf0Sagc 0, /* tp_hash */
13763d4abf0Sagc 0, /* tp_call */
13863d4abf0Sagc 0, /* tp_str */
13963d4abf0Sagc 0, /* tp_getattro */
14063d4abf0Sagc 0, /* tp_setattro */
14163d4abf0Sagc 0, /* tp_as_buffer */
14263d4abf0Sagc Py_TPFLAGS_DEFAULT, /* tp_flags */
14363d4abf0Sagc /* tp_doc */
14463d4abf0Sagc TRE_MODULE ".fuzzyness object holds approximation parameters for match",
14563d4abf0Sagc 0, /* tp_traverse */
14663d4abf0Sagc 0, /* tp_clear */
14763d4abf0Sagc 0, /* tp_richcompare */
14863d4abf0Sagc 0, /* tp_weaklistoffset */
14963d4abf0Sagc 0, /* tp_iter */
15063d4abf0Sagc 0, /* tp_iternext */
15163d4abf0Sagc 0, /* tp_methods */
15263d4abf0Sagc TreFuzzyness_members, /* tp_members */
15363d4abf0Sagc 0, /* tp_getset */
15463d4abf0Sagc 0, /* tp_base */
15563d4abf0Sagc 0, /* tp_dict */
15663d4abf0Sagc 0, /* tp_descr_get */
15763d4abf0Sagc 0, /* tp_descr_set */
15863d4abf0Sagc 0, /* tp_dictoffset */
15963d4abf0Sagc 0, /* tp_init */
16063d4abf0Sagc 0, /* tp_alloc */
16163d4abf0Sagc TreFuzzyness_new /* tp_new */
16263d4abf0Sagc };
16363d4abf0Sagc
16463d4abf0Sagc static PyObject *
PyTreMatch_groups(TreMatchObject * self,PyObject * dummy)16563d4abf0Sagc PyTreMatch_groups(TreMatchObject *self, PyObject *dummy)
16663d4abf0Sagc {
16763d4abf0Sagc PyObject *result;
16863d4abf0Sagc size_t i;
16963d4abf0Sagc
17063d4abf0Sagc if (self->am.nmatch < 1)
17163d4abf0Sagc {
17263d4abf0Sagc Py_INCREF(Py_None);
17363d4abf0Sagc return Py_None;
17463d4abf0Sagc }
17563d4abf0Sagc result = PyTuple_New(self->am.nmatch);
17663d4abf0Sagc for (i = 0; i < self->am.nmatch; i++)
17763d4abf0Sagc {
17863d4abf0Sagc PyObject *range;
17963d4abf0Sagc regmatch_t *rm = &self->am.pmatch[i];
18063d4abf0Sagc
18163d4abf0Sagc if (rm->rm_so == (-1) && rm->rm_eo == (-1))
18263d4abf0Sagc {
18363d4abf0Sagc Py_INCREF(Py_None);
18463d4abf0Sagc range = Py_None;
18563d4abf0Sagc }
18663d4abf0Sagc else
18763d4abf0Sagc {
18863d4abf0Sagc range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo);
18963d4abf0Sagc }
19063d4abf0Sagc PyTuple_SetItem(result, i, range);
19163d4abf0Sagc }
19263d4abf0Sagc return (PyObject*)result;
19363d4abf0Sagc }
19463d4abf0Sagc
19563d4abf0Sagc static PyObject *
PyTreMatch_groupi(PyObject * obj,int gn)19663d4abf0Sagc PyTreMatch_groupi(PyObject *obj, int gn)
19763d4abf0Sagc {
19863d4abf0Sagc TreMatchObject *self = (TreMatchObject*)obj;
19963d4abf0Sagc PyObject *result;
20063d4abf0Sagc regmatch_t *rm;
20163d4abf0Sagc
20263d4abf0Sagc if (gn < 0 || (size_t)gn > self->am.nmatch - 1)
20363d4abf0Sagc {
20463d4abf0Sagc PyErr_SetString(PyExc_ValueError, "out of bounds");
20563d4abf0Sagc return NULL;
20663d4abf0Sagc }
20763d4abf0Sagc rm = &self->am.pmatch[gn];
20863d4abf0Sagc if (rm->rm_so == (-1) && rm->rm_eo == (-1))
20963d4abf0Sagc {
21063d4abf0Sagc Py_INCREF(Py_None);
21163d4abf0Sagc return Py_None;
21263d4abf0Sagc }
21363d4abf0Sagc result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo);
21463d4abf0Sagc return result;
21563d4abf0Sagc }
21663d4abf0Sagc
21763d4abf0Sagc static PyObject *
PyTreMatch_group(TreMatchObject * self,PyObject * grpno)21863d4abf0Sagc PyTreMatch_group(TreMatchObject *self, PyObject *grpno)
21963d4abf0Sagc {
22063d4abf0Sagc PyObject *result;
22163d4abf0Sagc long gn;
22263d4abf0Sagc
22363d4abf0Sagc gn = PyInt_AsLong(grpno);
22463d4abf0Sagc
22563d4abf0Sagc if (PyErr_Occurred())
22663d4abf0Sagc return NULL;
22763d4abf0Sagc
22863d4abf0Sagc result = PyTreMatch_groupi((PyObject*)self, gn);
22963d4abf0Sagc return result;
23063d4abf0Sagc }
23163d4abf0Sagc
23263d4abf0Sagc static PyMethodDef TreMatch_methods[] = {
23363d4abf0Sagc {"group", (PyCFunction)PyTreMatch_group, METH_O,
23463d4abf0Sagc "return submatched string or None if a parenthesized subexpression did "
23563d4abf0Sagc "not participate in a match"},
23663d4abf0Sagc {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS,
23763d4abf0Sagc "return the tuple of slice tuples for all parenthesized subexpressions "
23863d4abf0Sagc "(None for not participated)"},
23963d4abf0Sagc {NULL, NULL}
24063d4abf0Sagc };
24163d4abf0Sagc
24263d4abf0Sagc static PyMemberDef TreMatch_members[] = {
24363d4abf0Sagc { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY,
24463d4abf0Sagc "Cost of the match" },
24563d4abf0Sagc { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY,
24663d4abf0Sagc "Number of deletes in the match" },
24763d4abf0Sagc { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY,
24863d4abf0Sagc "Number of inserts in the match" },
24963d4abf0Sagc { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY,
25063d4abf0Sagc "Number of substitutes in the match" },
25163d4abf0Sagc { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY,
25263d4abf0Sagc "Fuzzyness used during match" },
25363d4abf0Sagc { NULL }
25463d4abf0Sagc };
25563d4abf0Sagc
25663d4abf0Sagc static void
PyTreMatch_dealloc(TreMatchObject * self)25763d4abf0Sagc PyTreMatch_dealloc(TreMatchObject *self)
25863d4abf0Sagc {
25963d4abf0Sagc Py_XDECREF(self->targ);
26063d4abf0Sagc Py_XDECREF(self->fz);
26163d4abf0Sagc if (self->am.pmatch != NULL)
26263d4abf0Sagc PyMem_Del(self->am.pmatch);
26363d4abf0Sagc PyObject_Del(self);
26463d4abf0Sagc }
26563d4abf0Sagc
26663d4abf0Sagc static PySequenceMethods TreMatch_as_sequence_methods = {
26763d4abf0Sagc 0, /* sq_length */
26863d4abf0Sagc 0, /* sq_concat */
26963d4abf0Sagc 0, /* sq_repeat */
27063d4abf0Sagc PyTreMatch_groupi, /* sq_item */
27163d4abf0Sagc 0, /* sq_slice */
27263d4abf0Sagc 0, /* sq_ass_item */
27363d4abf0Sagc 0, /* sq_ass_slice */
27463d4abf0Sagc 0, /* sq_contains */
27563d4abf0Sagc 0, /* sq_inplace_concat */
27663d4abf0Sagc 0 /* sq_inplace_repeat */
27763d4abf0Sagc };
27863d4abf0Sagc
27963d4abf0Sagc static PyTypeObject TreMatchType = {
28063d4abf0Sagc PyObject_HEAD_INIT(NULL)
28163d4abf0Sagc 0, /* ob_size */
28263d4abf0Sagc TRE_MODULE ".Match", /* tp_name */
28363d4abf0Sagc sizeof(TreMatchObject), /* tp_basicsize */
28463d4abf0Sagc 0, /* tp_itemsize */
28563d4abf0Sagc /* methods */
28663d4abf0Sagc (destructor)PyTreMatch_dealloc, /* tp_dealloc */
28763d4abf0Sagc 0, /* tp_print */
28863d4abf0Sagc 0, /* tp_getattr */
28963d4abf0Sagc 0, /* tp_setattr */
29063d4abf0Sagc 0, /* tp_compare */
29163d4abf0Sagc 0, /* tp_repr */
29263d4abf0Sagc 0, /* tp_as_number */
29363d4abf0Sagc &TreMatch_as_sequence_methods, /* tp_as_sequence */
29463d4abf0Sagc 0, /* tp_as_mapping */
29563d4abf0Sagc 0, /* tp_hash */
29663d4abf0Sagc 0, /* tp_call */
29763d4abf0Sagc 0, /* tp_str */
29863d4abf0Sagc 0, /* tp_getattro */
29963d4abf0Sagc 0, /* tp_setattro */
30063d4abf0Sagc 0, /* tp_as_buffer */
30163d4abf0Sagc Py_TPFLAGS_DEFAULT, /* tp_flags */
30263d4abf0Sagc TRE_MODULE ".match object holds result of successful match", /* tp_doc */
30363d4abf0Sagc 0, /* tp_traverse */
30463d4abf0Sagc 0, /* tp_clear */
30563d4abf0Sagc 0, /* tp_richcompare */
30663d4abf0Sagc 0, /* tp_weaklistoffset */
30763d4abf0Sagc 0, /* tp_iter */
30863d4abf0Sagc 0, /* tp_iternext */
30963d4abf0Sagc TreMatch_methods, /* tp_methods */
31063d4abf0Sagc TreMatch_members /* tp_members */
31163d4abf0Sagc };
31263d4abf0Sagc
31363d4abf0Sagc static TreMatchObject *
newTreMatchObject(void)31463d4abf0Sagc newTreMatchObject(void)
31563d4abf0Sagc {
31663d4abf0Sagc TreMatchObject *self;
31763d4abf0Sagc
31863d4abf0Sagc self = PyObject_New(TreMatchObject, &TreMatchType);
31963d4abf0Sagc if (self == NULL)
32063d4abf0Sagc return NULL;
32163d4abf0Sagc memset(&self->am, '\0', sizeof(self->am));
32263d4abf0Sagc self->targ = NULL;
32363d4abf0Sagc self->fz = NULL;
32463d4abf0Sagc return self;
32563d4abf0Sagc }
32663d4abf0Sagc
32763d4abf0Sagc static PyObject *
PyTrePattern_search(TrePatternObject * self,PyObject * args)32863d4abf0Sagc PyTrePattern_search(TrePatternObject *self, PyObject *args)
32963d4abf0Sagc {
33063d4abf0Sagc PyObject *pstring;
33163d4abf0Sagc int eflags = 0;
33263d4abf0Sagc TreMatchObject *mo;
33363d4abf0Sagc TreFuzzynessObject *fz;
33463d4abf0Sagc size_t nsub;
33563d4abf0Sagc int rc;
33663d4abf0Sagc regmatch_t *pm;
33763d4abf0Sagc char *targ;
33863d4abf0Sagc size_t tlen;
33963d4abf0Sagc
340*1580a27bSrin if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0)))
341*1580a27bSrin {
342*1580a27bSrin if (!PyArg_ParseTuple(args, "UO!|i:search", &pstring, &TreFuzzynessType,
34363d4abf0Sagc &fz, &eflags))
34463d4abf0Sagc return NULL;
345*1580a27bSrin }
346*1580a27bSrin else
347*1580a27bSrin {
348*1580a27bSrin if (!PyArg_ParseTuple(args, "SO!|i:search", &pstring, &TreFuzzynessType,
349*1580a27bSrin &fz, &eflags))
350*1580a27bSrin return NULL;
351*1580a27bSrin }
35263d4abf0Sagc
35363d4abf0Sagc mo = newTreMatchObject();
35463d4abf0Sagc if (mo == NULL)
35563d4abf0Sagc return NULL;
35663d4abf0Sagc
35763d4abf0Sagc nsub = self->rgx.re_nsub + 1;
35863d4abf0Sagc pm = PyMem_New(regmatch_t, nsub);
359*1580a27bSrin if (!pm)
36063d4abf0Sagc {
361*1580a27bSrin Py_DECREF(mo);
362*1580a27bSrin return PyErr_NoMemory();
363*1580a27bSrin }
364*1580a27bSrin
36563d4abf0Sagc mo->am.nmatch = nsub;
36663d4abf0Sagc mo->am.pmatch = pm;
367*1580a27bSrin
368*1580a27bSrin if (PyUnicode_Check(pstring))
369*1580a27bSrin {
370*1580a27bSrin Py_ssize_t len = PyUnicode_GetSize(pstring);
371*1580a27bSrin wchar_t *buf = calloc(sizeof(wchar_t), len);
372*1580a27bSrin if(!buf)
373*1580a27bSrin {
374*1580a27bSrin Py_DECREF(mo);
375*1580a27bSrin return PyErr_NoMemory();
376*1580a27bSrin }
377*1580a27bSrin PyUnicode_AsWideChar(pstring, buf, len);
378*1580a27bSrin rc = tre_regawnexec(&self->rgx, buf, len, &mo->am, fz->ap, eflags);
379*1580a27bSrin free(buf);
38063d4abf0Sagc }
38163d4abf0Sagc else
38263d4abf0Sagc {
38363d4abf0Sagc targ = PyString_AsString(pstring);
38463d4abf0Sagc tlen = PyString_Size(pstring);
38563d4abf0Sagc
38663d4abf0Sagc rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
387*1580a27bSrin }
38863d4abf0Sagc
38963d4abf0Sagc if (PyErr_Occurred())
39063d4abf0Sagc {
39163d4abf0Sagc Py_DECREF(mo);
39263d4abf0Sagc return NULL;
39363d4abf0Sagc }
39463d4abf0Sagc
39563d4abf0Sagc if (rc == REG_OK)
39663d4abf0Sagc {
39763d4abf0Sagc Py_INCREF(pstring);
39863d4abf0Sagc mo->targ = pstring;
39963d4abf0Sagc Py_INCREF(fz);
40063d4abf0Sagc mo->fz = fz;
40163d4abf0Sagc return (PyObject*)mo;
40263d4abf0Sagc }
40363d4abf0Sagc
40463d4abf0Sagc if (rc == REG_NOMATCH)
40563d4abf0Sagc {
40663d4abf0Sagc Py_DECREF(mo);
40763d4abf0Sagc Py_INCREF(Py_None);
40863d4abf0Sagc return Py_None;
40963d4abf0Sagc }
41063d4abf0Sagc _set_tre_err(rc, &self->rgx);
41163d4abf0Sagc Py_DECREF(mo);
41263d4abf0Sagc return NULL;
41363d4abf0Sagc }
41463d4abf0Sagc
41563d4abf0Sagc static PyMethodDef TrePattern_methods[] = {
41663d4abf0Sagc { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS,
417*1580a27bSrin "try to search in the given string, returning " TRE_MODULE ".match object "
41863d4abf0Sagc "or None on failure" },
41963d4abf0Sagc {NULL, NULL}
42063d4abf0Sagc };
42163d4abf0Sagc
42263d4abf0Sagc static PyMemberDef TrePattern_members[] = {
42363d4abf0Sagc { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY,
42463d4abf0Sagc "Number of parenthesized subexpressions in regex" },
42563d4abf0Sagc { NULL }
42663d4abf0Sagc };
42763d4abf0Sagc
42863d4abf0Sagc static void
PyTrePattern_dealloc(TrePatternObject * self)42963d4abf0Sagc PyTrePattern_dealloc(TrePatternObject *self)
43063d4abf0Sagc {
43163d4abf0Sagc tre_regfree(&self->rgx);
43263d4abf0Sagc PyObject_Del(self);
43363d4abf0Sagc }
43463d4abf0Sagc
43563d4abf0Sagc static PyTypeObject TrePatternType = {
43663d4abf0Sagc PyObject_HEAD_INIT(NULL)
43763d4abf0Sagc 0, /* ob_size */
43863d4abf0Sagc TRE_MODULE ".Pattern", /* tp_name */
43963d4abf0Sagc sizeof(TrePatternObject), /* tp_basicsize */
44063d4abf0Sagc 0, /* tp_itemsize */
44163d4abf0Sagc /* methods */
44263d4abf0Sagc (destructor)PyTrePattern_dealloc, /*tp_dealloc*/
44363d4abf0Sagc 0, /* tp_print */
44463d4abf0Sagc 0, /* tp_getattr */
44563d4abf0Sagc 0, /* tp_setattr */
44663d4abf0Sagc 0, /* tp_compare */
44763d4abf0Sagc 0, /* tp_repr */
44863d4abf0Sagc 0, /* tp_as_number */
44963d4abf0Sagc 0, /* tp_as_sequence */
45063d4abf0Sagc 0, /* tp_as_mapping */
45163d4abf0Sagc 0, /* tp_hash */
45263d4abf0Sagc 0, /* tp_call */
45363d4abf0Sagc 0, /* tp_str */
45463d4abf0Sagc 0, /* tp_getattro */
45563d4abf0Sagc 0, /* tp_setattro */
45663d4abf0Sagc 0, /* tp_as_buffer */
45763d4abf0Sagc Py_TPFLAGS_DEFAULT, /* tp_flags */
45863d4abf0Sagc TRE_MODULE ".pattern object holds compiled tre regex", /* tp_doc */
45963d4abf0Sagc 0, /* tp_traverse */
46063d4abf0Sagc 0, /* tp_clear */
46163d4abf0Sagc 0, /* tp_richcompare */
46263d4abf0Sagc 0, /* tp_weaklistoffset */
46363d4abf0Sagc 0, /* tp_iter */
46463d4abf0Sagc 0, /* tp_iternext */
46563d4abf0Sagc TrePattern_methods, /* tp_methods */
46663d4abf0Sagc TrePattern_members /* tp_members */
46763d4abf0Sagc };
46863d4abf0Sagc
46963d4abf0Sagc static TrePatternObject *
newTrePatternObject()470*1580a27bSrin newTrePatternObject()
47163d4abf0Sagc {
47263d4abf0Sagc TrePatternObject *self;
47363d4abf0Sagc
47463d4abf0Sagc self = PyObject_New(TrePatternObject, &TrePatternType);
47563d4abf0Sagc if (self == NULL)
47663d4abf0Sagc return NULL;
47763d4abf0Sagc self->flags = 0;
47863d4abf0Sagc return self;
47963d4abf0Sagc }
48063d4abf0Sagc
48163d4abf0Sagc static PyObject *
PyTre_ncompile(PyObject * self,PyObject * args)48263d4abf0Sagc PyTre_ncompile(PyObject *self, PyObject *args)
48363d4abf0Sagc {
48463d4abf0Sagc TrePatternObject *rv;
485*1580a27bSrin PyUnicodeObject *upattern = NULL;
486*1580a27bSrin char *pattern = NULL;
48763d4abf0Sagc int pattlen;
48863d4abf0Sagc int cflags = 0;
48963d4abf0Sagc int rc;
49063d4abf0Sagc
491*1580a27bSrin if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0)))
492*1580a27bSrin {
493*1580a27bSrin if (!PyArg_ParseTuple(args, "U|i:compile", &upattern, &cflags))
494*1580a27bSrin return NULL;
495*1580a27bSrin }
496*1580a27bSrin else
497*1580a27bSrin {
49863d4abf0Sagc if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags))
49963d4abf0Sagc return NULL;
500*1580a27bSrin }
50163d4abf0Sagc
502*1580a27bSrin rv = newTrePatternObject();
50363d4abf0Sagc if (rv == NULL)
50463d4abf0Sagc return NULL;
50563d4abf0Sagc
506*1580a27bSrin if (upattern != NULL)
507*1580a27bSrin {
508*1580a27bSrin Py_ssize_t len = PyUnicode_GetSize(upattern);
509*1580a27bSrin wchar_t *buf = calloc(sizeof(wchar_t), len);
510*1580a27bSrin if(!buf)
511*1580a27bSrin {
512*1580a27bSrin Py_DECREF(rv);
513*1580a27bSrin return PyErr_NoMemory();
514*1580a27bSrin }
515*1580a27bSrin PyUnicode_AsWideChar(upattern, buf, len);
516*1580a27bSrin rc = tre_regwncomp(&rv->rgx, buf, len, cflags);
517*1580a27bSrin free(buf);
518*1580a27bSrin }
519*1580a27bSrin else
52063d4abf0Sagc rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags);
521*1580a27bSrin
52263d4abf0Sagc if (rc != REG_OK)
52363d4abf0Sagc {
52463d4abf0Sagc if (!PyErr_Occurred())
52563d4abf0Sagc _set_tre_err(rc, &rv->rgx);
52663d4abf0Sagc Py_DECREF(rv);
52763d4abf0Sagc return NULL;
52863d4abf0Sagc }
52963d4abf0Sagc rv->flags = cflags;
53063d4abf0Sagc return (PyObject*)rv;
53163d4abf0Sagc }
53263d4abf0Sagc
53363d4abf0Sagc static PyMethodDef tre_methods[] = {
53463d4abf0Sagc { "compile", PyTre_ncompile, METH_VARARGS,
53563d4abf0Sagc "Compile a regular expression pattern, returning a "
53663d4abf0Sagc TRE_MODULE ".pattern object" },
53763d4abf0Sagc { NULL, NULL }
53863d4abf0Sagc };
53963d4abf0Sagc
54063d4abf0Sagc static char *tre_doc =
54163d4abf0Sagc "Python module for TRE library\n\nModule exports "
54263d4abf0Sagc "the only function: compile";
54363d4abf0Sagc
54463d4abf0Sagc static struct _tre_flags {
54563d4abf0Sagc char *name;
54663d4abf0Sagc int val;
54763d4abf0Sagc } tre_flags[] = {
54863d4abf0Sagc { "EXTENDED", REG_EXTENDED },
54963d4abf0Sagc { "ICASE", REG_ICASE },
55063d4abf0Sagc { "NEWLINE", REG_NEWLINE },
55163d4abf0Sagc { "NOSUB", REG_NOSUB },
55263d4abf0Sagc { "LITERAL", REG_LITERAL },
55363d4abf0Sagc
55463d4abf0Sagc { "NOTBOL", REG_NOTBOL },
55563d4abf0Sagc { "NOTEOL", REG_NOTEOL },
55663d4abf0Sagc { NULL, 0 }
55763d4abf0Sagc };
55863d4abf0Sagc
55963d4abf0Sagc PyMODINIT_FUNC
inittre(void)56063d4abf0Sagc inittre(void)
56163d4abf0Sagc {
56263d4abf0Sagc PyObject *m;
56363d4abf0Sagc struct _tre_flags *fp;
56463d4abf0Sagc
56563d4abf0Sagc if (PyType_Ready(&TreFuzzynessType) < 0)
56663d4abf0Sagc return;
56763d4abf0Sagc if (PyType_Ready(&TreMatchType) < 0)
56863d4abf0Sagc return;
56963d4abf0Sagc if (PyType_Ready(&TrePatternType) < 0)
57063d4abf0Sagc return;
57163d4abf0Sagc
57263d4abf0Sagc /* Create the module and add the functions */
57363d4abf0Sagc m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc);
57463d4abf0Sagc if (m == NULL)
57563d4abf0Sagc return;
57663d4abf0Sagc
57763d4abf0Sagc Py_INCREF(&TreFuzzynessType);
57863d4abf0Sagc if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0)
57963d4abf0Sagc return;
58063d4abf0Sagc Py_INCREF(&TreMatchType);
58163d4abf0Sagc if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0)
58263d4abf0Sagc return;
58363d4abf0Sagc Py_INCREF(&TrePatternType);
58463d4abf0Sagc if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0)
58563d4abf0Sagc return;
58663d4abf0Sagc ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL);
58763d4abf0Sagc Py_INCREF(ErrorObject);
58863d4abf0Sagc if (PyModule_AddObject(m, "Error", ErrorObject) < 0)
58963d4abf0Sagc return;
59063d4abf0Sagc
59163d4abf0Sagc /* Insert the flags */
59263d4abf0Sagc for (fp = tre_flags; fp->name != NULL; fp++)
59363d4abf0Sagc if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0)
59463d4abf0Sagc return;
59563d4abf0Sagc }
596