25% better performance than CPython
"maybe like 10% runtime", hot attr
by Kevin Modzelewski @ pyston talk November 24, 2015
LST = list(map(''.join, product('abc', repeat=17)))
def foo():
return map(str.upper, LST)
def bar():
res = []
for i in LST:
res.append(i.upper())
return res
def baz():
return [i.upper() for i in LST]
@jit(int32(int32, int32), nopython=True, nogil=True)
def add_two(a, b):
acc = 0
i = 0
while i < 1000:
acc += a + b
i += 1
return acc
12 SETUP_LOOP 40 (to 55)
15 LOAD_FAST 3 (i)
18 LOAD_CONST 2 (1000)
21 COMPARE_OP 0 (<)
24 POP_JUMP_IF_FALSE 54
27 LOAD_FAST 2 (acc)
30 LOAD_FAST 0 (a)
33 LOAD_FAST 1 (b)
36 BINARY_ADD
37 INPLACE_ADD
38 STORE_FAST 2 (acc)
41 LOAD_FAST 3 (i)
44 LOAD_CONST 3 (1)
47 INPLACE_ADD
48 STORE_FAST 3 (i)
51 JUMP_ABSOLUTE 15
54 POP_BLOCK
___main__.add_two$1.int32.int32:
addl %r8d, %ecx
imull $1000, %ecx, %eax
movl %eax, (%rdi)
xorl %eax, %eax
retq
add_two.inspect_asm().values()[0].decode('string_escape')
_wrapper.__main__.add_two$1.int32.int32:
movq %rdi, %r14
movabsq $_.const.add_two, %r10
movabsq $_PyArg_UnpackTuple, %r11
...
movabsq $_PyNumber_Long, %r15
callq *%r15
movq %rax, %rbx
xorl %r14d, %r14d
testq %rbx, %rbx
je LBB1_8
movabsq $_PyLong_AsLongLong, %rax
...
+ ~77 Python C API instructions
Rich Wareham, "Creating a toy language with the Python, LLVM and the IPython web notebook" https://www.youtube.com/watch?v=G78cTmgeUxI
// sample.c
void initsample(void)
{
Py_InitModule("sample", NULL);
}
static PyObject * add_two(PyObject * self, PyObject * args);
static PyMethodDef SampleMethods[] = {
{"add_two", add_two, METH_VARARGS, ""},
{NULL, NULL, 0, NULL}
};
void initsample(void) {
Py_InitModule("sample", SampleMethods);
}
PyObject * add_two(PyObject * self, PyObject * args) {
int a, b, acc = 0;
if (!PyArg_ParseTuple(args, "ii", &a, &b)) {
PyErr_SetNone(PyExc_ValueError);
return NULL;
}
for (int i = 0; i < 1000; i++)
acc += a + b;
return Py_BuildValue("i", acc);
}
import sample
IMPORT_NAME 0 (sample)
STORE_FAST 0 (sample)
// ceval.c
...
w = GETITEM(names, oparg);
v = PyDict_GetItemString(f->f_builtins, "__import__");
...
w = PyTuple_Pack(4, w,
f->f_globals,
f->f_locals == NULL ? Py_None : f->f_locals,
v);
...
x = PyEval_CallObject(v, w);
...
SET_TOP(x); if (x != NULL) DISPATCH();
...
dl_funcptr _PyImport_GetDynLoadFunc(const char *fqname,
const char *shortname,
const char *pathname, FILE *fp)
{
char funcname[258];
PyOS_snprintf(funcname, sizeof(funcname),
"init%.200s", shortname);
return dl_loadmod(Py_GetProgramName(),
pathname, funcname);
}
def add_two(a, b):
i = acc = 0
while i < 1000:
acc += a + b
return acc
$ cat sample.c | wc -l 1906
__pyx_t_2 = PyNumber_Add(__pyx_v_a, __pyx_v_b);
if (unlikely(!__pyx_t_2)) {
__pyx_filename = __pyx_f[0];
__pyx_lineno = 14; __pyx_clineno = __LINE__;
goto __pyx_L1_error;
}
__Pyx_GOTREF(__pyx_t_2);
__pyx_t_3 = PyNumber_InPlaceAdd(__pyx_v_acc, __pyx_t_2);
if (unlikely(!__pyx_t_3)) {
__pyx_filename = __pyx_f[0]; __pyx_lineno = 14;
__pyx_clineno = __LINE__; goto __pyx_L1_error;
}
int cadd_two(int a, int b) {
int32_t acc = 0;
for (int i = 0; i < 1000; i++)
acc += a + b;
return acc;
}
cdef extern from "sample_func.h":
int cadd_two(int, int)
def add_two(a, b):
return cadd_two(a, b)
cythonize("sample.pyx", sources=[ 'sample_func.c' ])
__pyx_t_1 = __Pyx_PyInt_As_int32_t(__pyx_v_a); if (unlikely((__pyx_t_1 == (int32_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_
__pyx_t_2 = __Pyx_PyInt_As_int32_t(__pyx_v_b); if (unlikely((__pyx_t_2 == (int32_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_
__pyx_t_3 = __Pyx_PyInt_From_int32_t(cadd_two(__pyx_t_1, __pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; _
#[no_mangle]
pub extern fn initsample() {
unsafe {
Py_InitModule4_64(&SAMPLE[0] as *const _,
&METHODS[0] as *const _,
0 as *const _,
0,
PYTHON_API_VERSION);
};
}
type PyCFunction = unsafe extern "C"
fn (slf: *mut isize, args: *mut isize) -> *mut isize;
#[repr(C)]
struct PyMethodDef {
pub ml_name: *const i8,
pub ml_meth: Option<PyCFunction>,
pub ml_flags: i32,
pub ml_doc: *const i8,
}
unsafe impl Sync for PyMethodDef { }
lazy_static! {
static ref METHODS: Vec = { vec![
PyMethodDef {
ml_name: &ADD_TWO[0] as *const _,
ml_meth: Some(add_two),
},
...
] };
}
#[link(name="python2.7")]
extern {
fn Py_InitModule4_64(name: *const i8,
methods: *const PyMethodDef,
doc: *const i8, s: isize, apiver: usize) -> *mut isize;
fn PyArg_ParseTuple(arg1: *mut isize,
arg2: *const i8, ...) -> isize;
fn Py_BuildValue(arg1: *const i8, ...) -> *mut isize;
}
#[allow(unused_variables)]
unsafe extern "C" fn add_two(slf: *mut isize,
args: *mut isize) -> *mut isize {
let mut a: i32 = 0;
let mut b: i32 = 0;
if PyArg_ParseTuple(args,
&II_ARGS[0] as *const _,
&a as *const i32, &b as *const i32) == 0 {
return 0 as *mut _;
}
let mut acc: i32 = 0;
for i in 0..1000 { acc += a + b; }
Py_BuildValue(&I_ARGS[0] as *const _, acc)
}
let acc: i32 = (0..).take(1000)
.map(|_| a + b)
.fold(0, |acc, x| acc + x);
__ZN7add_two20h391818698d43ab0ffcaE:
...
callq 0x7a002 ## symbol stub for: _PyArg_ParseTuple
testq %rax, %rax
je 0x14e3
movl -0x8(%rbp), %eax
addl -0x4(%rbp), %eax
imull $0x3e8, %eax, %esi ## imm = 0x3E8
leaq _ref5540(%rip), %rdi ## literal pool for: "h"
...
https://github.com/dgrunwald/rust-cpython
#![feature(slice_patterns)] #[macro_use] extern crate cpython;
use cpython::{PyObject, PyResult, Python, PyTuple, PyDict};
py_module_initializer!(sample, |py, m| {
try!(m.add(py, "add_two", py_fn!(add_two))); Ok(())
});
fn add_two(p: Python, args: &PyTuple, kw: Option<&PyDict>) -> PyResult<PyObject> {
match args.as_slice() {
[a, b] => {
let acc: i32 = 0;
for i in 0..1000 { acc += a.value(p) + b.value(p) }
Ok(acc.to_py_object())
},
_ => Ok(py.None())
}
}