Friday, April 17, 2009

shparse

Wanting to learn the basics of how to write Python modules in C, I started on this simple function. All it does is use shell-style rules to parse a string into "arguments."

shparse.c

#include <Python.h>
#define INCS 64

enum EMODE {
EMODE_ESC = 1,
EMODE_QUOT = 2,
EMODE_DQUOT = 4
};


static PyObject * shparse_parse(PyObject * self, PyObject * args) {
const Py_UNICODE * cmdstring;
size_t i, x = 0, cplen;
char mode = 0;
PyObject * ret;
Py_UNICODE c, cp[4];

Py_UNICODE * buff = calloc(INCS, sizeof(Py_UNICODE)), * tmp;
size_t bufflen = INCS;



if(!PyArg_ParseTuple(args, "u", &cmdstring))
return NULL;

ret = PyList_New(0);
for(i = 0; cmdstring[i]; i++) {
c = cmdstring[i];
cp[0] = 0; cp[1] = 0; cp[2] = 0; cp[3] = 0; cplen = 0;
if(mode & EMODE_ESC) {
cp[0] = c;
cplen = 1;
mode &= ~EMODE_ESC;
}
else if(mode & EMODE_QUOT) {
switch(c) {
case '\'':
mode &= ~EMODE_QUOT;
break;
default:
cp[0] = c;
cplen = 1;
break;
}
}
else if(mode & EMODE_DQUOT) {
switch(c) {
case '\\':
mode |= EMODE_ESC;
break;
case '"':
mode &= ~EMODE_DQUOT;
break;
default:
cp[0] = c;
cplen = 1;
break;
}
}
else {
switch(c) {
case '\\':
mode |= EMODE_ESC;
break;
case '\'':
mode |= EMODE_QUOT;
break;
case '"':
mode |= EMODE_DQUOT;
break;
case ' ':
if(x > 0) {
x = 0;
PyList_Append(ret, Py_BuildValue("u", buff));
}
break;
default:
cp[0] = c;
cplen = 1;
break;
}
}


if(cp[0]) {
while(x + cplen + 1 >= bufflen) {
bufflen += INCS;
buff = realloc(buff, bufflen);
}
buff[x++] = cp[0];
if(cplen >= 2) {
buff[x++] = cp[1];
if(cplen >= 3) {
buff[x++] = cp[2];
if(cplen == 4)
buff[x++] = cp[2];
else
buff[x] = '\0';
}
else
buff[x] = '\0';
}
else
buff[x] = '\0';
}
}

if(x > 0)
PyList_Append(ret, Py_BuildValue("u", buff));
free(buff);


return ret;
}

static PyMethodDef Methods[] = {
{"parse", shparse_parse, METH_VARARGS, "Parse an input string."},
{NULL, NULL, 0, NULL}
};


PyMODINIT_FUNC initshparse(void) {
Py_InitModule("shparse", Methods);
}

It could be quite useful, and would most definitely be faster than equivalent Python code. By the way, the Python/C API reference is quite good.

No comments: