Cython __cinit__

cdef classes are awesome if you want lightweight data structures, for example, when you need millions of them. These cython classes have some special methods, the most basic of which is the __cinit__ method which is the analog of the __init__ method of regular Python classes.

The __cinit__ method is practical because it lets you initialize your Cython class transparently from straight python The downside is that we are now back to type checking and converting when we initialize, since we will accept Python variables. This can add perceptible overhead to object creation.

Consider the following class definition

cdef class A:
  cdef public:
    int a, b, c, d, e

  def __cinit__(self, int a, int b, int c, int d, int e):
    self.a, self.b, self.c, self.d, self.e = a, b, c, d, e

  def __repr__(self):
    return '({:d}, {:d})'.format(self.a, self.b)

If we run the following function

def one_million_objects():
  cdef:
    int n
    list x = []
  for n in xrange(1000000):
    a = A(n, n + 1, n + 2, n + 3, n + 4)
    x += [a]
  return x

and profile it, we obtain:

ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1    0.235    0.235    0.235    0.235 {v1.one_million_objects}

If you peek at the translated C code (which, admittedly, is pretty ugly) you will find that the relevant part of the code goes:

 for (__pyx_t_2 = 0; __pyx_t_2 < 1000000; __pyx_t_2+=1) {
    __pyx_v_n = __pyx_t_2;

    /* "v1.pyx":17
 *     list x = []
 *   for n in xrange(1000000):
 *     a = A(n, n + 1, n + 2, n + 3, n + 4)             # <<<<<<<<<<<<<<
 *     x += [a]
 * 
 */
    __pyx_t_1 = __Pyx_PyInt_From_unsigned_long(__pyx_v_n); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_3 = __Pyx_PyInt_From_unsigned_long((__pyx_v_n + 1)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_4 = __Pyx_PyInt_From_unsigned_long((__pyx_v_n + 2)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_5 = __Pyx_PyInt_From_unsigned_long((__pyx_v_n + 3)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_6 = __Pyx_PyInt_From_unsigned_long((__pyx_v_n + 4)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_6);
    __pyx_t_7 = PyTuple_New(5); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_7);
    PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1);
    __Pyx_GIVEREF(__pyx_t_1);
    PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_3);
    __Pyx_GIVEREF(__pyx_t_3);
    PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_4);
    __Pyx_GIVEREF(__pyx_t_4);
    PyTuple_SET_ITEM(__pyx_t_7, 3, __pyx_t_5);
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_7, 4, __pyx_t_6);
    __Pyx_GIVEREF(__pyx_t_6);
    __pyx_t_1 = 0;
    __pyx_t_3 = 0;
    __pyx_t_4 = 0;
    __pyx_t_5 = 0;
    __pyx_t_6 = 0;
    __pyx_t_6 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_2v1_A)), __pyx_t_7, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_6);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_XDECREF_SET(__pyx_v_a, ((struct __pyx_obj_2v1_A *)__pyx_t_6));
    __pyx_t_6 = 0;

Our simple integers are being converted into python objects and then back again.

If we omit the __cinit__ definition and manually initialize the elements of the structure:

cdef class A:
  cdef public:
    int a, b, c, d, e

  def __repr__(self):
    return '({:d}, {:d})'.format(self.a, self.b)

With our million objects function being written as:

def one_million_objects():
  cdef:
    int n
    list x = []
    A a
  for n in xrange(1000000):
    a = A()
    a.a, a.b, a.c, a.d, a.e = n, n + 1, n + 2, n + 3, n + 4
    x += [a]
  return x

and profile it, we obtain:

ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1    0.146    0.146    0.146    0.146 {v2.one_million_objects}

Which is a pretty big savings. You will have guessed that this savings is due to the fact that we don’t do an expensive round trip through Python objects any more:

for (__pyx_t_2 = 0; __pyx_t_2 < 1000000; __pyx_t_2+=1) {
    __pyx_v_n = __pyx_t_2;

    /* "v2.pyx":15
 *     A a
 *   for n in xrange(1000000):
 *     a = A()             # <<<<<<<<<<<<<<
 *     a.a, a.b, a.c, a.d, a.e = n, n + 1, n + 2, n + 3, n + 4
 *     x += [a]
 */
    __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_2v2_A)), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_XDECREF_SET(__pyx_v_a, ((struct __pyx_obj_2v2_A *)__pyx_t_1));
    __pyx_t_1 = 0;

    /* "v2.pyx":16
 *   for n in xrange(1000000):
 *     a = A()
 *     a.a, a.b, a.c, a.d, a.e = n, n + 1, n + 2, n + 3, n + 4             # <<<<<<<<<<<<<<
 *     x += [a]
 * 
 */
    __pyx_t_3 = __pyx_v_n;
    __pyx_t_4 = (__pyx_v_n + 1);
    __pyx_t_5 = (__pyx_v_n + 2);
    __pyx_t_6 = (__pyx_v_n + 3);
    __pyx_t_7 = (__pyx_v_n + 4);
    __pyx_v_a->a = __pyx_t_3;
    __pyx_v_a->b = __pyx_t_4;
    __pyx_v_a->c = __pyx_t_5;
    __pyx_v_a->d = __pyx_t_6;
    __pyx_v_a->e = __pyx_t_7;

For some reason, we can not declare the

__cinit__

function as a

cpdef

function.

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s