Mercurial > python-compiler.rs
changeset 0:211b0df72e64
Hello world!
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Sun, 29 May 2016 19:15:02 +0100 |
parents | |
children | b90e49ab734b |
files | .hgignore Cargo.lock Cargo.toml example/global.py example/simple_arith.py example/test.py src/ast_convert.rs src/ast_dump.rs src/ast_rewrite.rs src/ast_type.rs src/main.rs src/python_ast.rs src/python_dump.rs src/python_parse.rs src/python_tb.rs |
diffstat | 15 files changed, 1416 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,210 @@ +[root] +name = "python-compiler" +version = "0.0.1" +dependencies = [ + "cpython 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "abort_on_panic" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "aho-corasick" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cpython" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "abort_on_panic 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "interpolate_idents 0.0.10 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", + "num 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "python3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "interpolate_idents" +version = "0.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-bigint 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-complex 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-iter 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-rational 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-bigint" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-complex" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-integer" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-iter" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-rational" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-bigint 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "python3-sys" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.66 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rustc-serialize" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unreachable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +
new file mode 100644 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "python-compiler" +version = "0.0.1" +authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"] + +[dependencies] +cpython = "0.0"
new file mode 100644 --- /dev/null +++ b/example/global.py @@ -0,0 +1,10 @@ +value = 2 + +def function(): + global value + value = 3 + +if __name__ == '__main__': + print(value) + function() + print(value)
new file mode 100644 --- /dev/null +++ b/example/simple_arith.py @@ -0,0 +1,4 @@ +a = 3 +b = a +c = b + int(a) +print(c)
new file mode 100644 --- /dev/null +++ b/example/test.py @@ -0,0 +1,7 @@ +value = 2 + +def function(a, b): + return a + b + +if __name__ == '__main__': + print(function(value, 2 + 2))
new file mode 100644 --- /dev/null +++ b/src/ast_convert.rs @@ -0,0 +1,366 @@ +use python_ast::{Module, Statement, Expr, BinOp}; + +use cpython::{Python, PyObject}; +use cpython::ObjectProtocol; //for call method + +fn parse_expr_vec(py: Python, ast: PyObject) -> Vec<Expr> { + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let is_instance = |object: &PyObject, type_: &PyObject| { + return isinstance.call(py, (object, type_), None).unwrap().is_true(py).unwrap(); + }; + + let ast_module = py.import("ast").unwrap(); + let ast_type = ast_module.get(py, "AST").unwrap(); + let arguments_type = ast_module.get(py, "arguments").unwrap(); + + assert!(is_instance(&ast, &ast_type)); + + if is_instance(&ast, &arguments_type) { + let args = ast.getattr(py, "args").unwrap(); + let mut arguments = vec!(); + for arg in args.iter(py).unwrap() { + let arg = parse_expr(py, arg.unwrap()); + arguments.push(arg); + } + arguments + } else { + vec!(Expr::Error) + } +} + +fn parse_binop(py: Python, ast: PyObject) -> BinOp { + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let is_instance = |object: &PyObject, type_: &PyObject| { + return isinstance.call(py, (object, type_), None).unwrap().is_true(py).unwrap(); + }; + + let ast_module = py.import("ast").unwrap(); + let ast_type = ast_module.get(py, "AST").unwrap(); + let add_type = ast_module.get(py, "Add").unwrap(); + let mult_type = ast_module.get(py, "Mult").unwrap(); + let eq_type = ast_module.get(py, "Eq").unwrap(); + let lt_type = ast_module.get(py, "Lt").unwrap(); + + assert!(is_instance(&ast, &ast_type)); + + if is_instance(&ast, &add_type) { + BinOp::BinAdd + } else if is_instance(&ast, &mult_type) { + BinOp::BinMult + } else if is_instance(&ast, &eq_type) { + BinOp::BinEq + } else if is_instance(&ast, <_type) { + BinOp::BinLt + } else { + println!("BinOp {}", ast); + BinOp::Error + } +} + +fn parse_expr(py: Python, ast: PyObject) -> Expr { + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let is_instance = |object: &PyObject, type_: &PyObject| { + return isinstance.call(py, (object, type_), None).unwrap().is_true(py).unwrap(); + }; + + let ast_module = py.import("ast").unwrap(); + let ast_type = ast_module.get(py, "AST").unwrap(); + let arg_type = ast_module.get(py, "arg").unwrap(); + let bin_op_type = ast_module.get(py, "BinOp").unwrap(); + let name_constant_type = ast_module.get(py, "NameConstant").unwrap(); + let name_type = ast_module.get(py, "Name").unwrap(); + let num_type = ast_module.get(py, "Num").unwrap(); + let str_type = ast_module.get(py, "Str").unwrap(); + let compare_type = ast_module.get(py, "Compare").unwrap(); + let call_type = ast_module.get(py, "Call").unwrap(); + let alias_type = ast_module.get(py, "alias").unwrap(); + + assert!(is_instance(&ast, &ast_type)); + + if is_instance(&ast, &arg_type) { + let arg = { + let arg = ast.getattr(py, "arg").unwrap(); + let arg = arg.str(py).unwrap(); + let mut arg = arg.to_string(py).unwrap(); + arg.to_mut().to_string() + }; + Expr::Name(arg) + } else if is_instance(&ast, &name_type) { + let id = { + let id = ast.getattr(py, "id").unwrap(); + let id = id.str(py).unwrap(); + let mut id = id.to_string(py).unwrap(); + id.to_mut().to_string() + }; + Expr::Name(id) + } else if is_instance(&ast, &name_constant_type) { + let value = { + let value = ast.getattr(py, "value").unwrap(); + let value = value.str(py).unwrap(); + let mut value = value.to_string(py).unwrap(); + value.to_mut().to_string() + }; + Expr::NameConstant(value) + } else if is_instance(&ast, &num_type) { + let n = { + let n = ast.getattr(py, "n").unwrap(); + let n = n.str(py).unwrap(); + let mut n = n.to_string(py).unwrap(); + n.to_mut().to_string() + }; + Expr::Num(n) + } else if is_instance(&ast, &str_type) { + let s = { + let s = ast.getattr(py, "s").unwrap(); + let s = s.str(py).unwrap(); + let mut s = s.to_string(py).unwrap(); + s.to_mut().to_string() + }; + Expr::Str(s) + } else if is_instance(&ast, &bin_op_type) { + let left = ast.getattr(py, "left").unwrap(); + let op = ast.getattr(py, "op").unwrap(); + let right = ast.getattr(py, "right").unwrap(); + + let left = parse_expr(py, left); + let op = parse_binop(py, op); + let right = parse_expr(py, right); + + Expr::BinOp(Box::new(left), op, Box::new(right)) + } else if is_instance(&ast, &call_type) { + let func = ast.getattr(py, "func").unwrap(); + let args = ast.getattr(py, "args").unwrap(); + //let keywords = ast.getattr(py, "keywords").unwrap(); + + let func = parse_expr(py, func); + + let mut arguments = vec!(); + for arg in args.iter(py).unwrap() { + let arg = arg.unwrap(); + arguments.push(parse_expr(py, arg)); + } + + Expr::Call(Box::new(func), arguments) + } else if is_instance(&ast, &alias_type) { + let name = ast.getattr(py, "name").unwrap(); + let asname = ast.getattr(py, "asname").unwrap(); + + let name = { + let name = name.str(py).unwrap(); + let mut name = name.to_string(py).unwrap(); + name.to_mut().to_string() + }; + + let asname = { + let asname = asname.str(py).unwrap(); + let mut asname = asname.to_string(py).unwrap(); + let asname = asname.to_mut().to_string(); + if asname == "None" { + "".to_string() + } else { + asname + } + }; + + Expr::Alias(name, asname) + } else if is_instance(&ast, &compare_type) { + let left = ast.getattr(py, "left").unwrap(); + let ops = ast.getattr(py, "ops").unwrap(); + let comparators = ast.getattr(py, "comparators").unwrap(); + + let left = parse_expr(py, left); + let ops = ops.iter(py).unwrap(); + let comparators = comparators.iter(py).unwrap(); + + let mut new_ops = vec!(); + for op in ops { + let op = op.unwrap(); + let op = parse_binop(py, op); + new_ops.push(op); + } + + let mut new_comparators = vec!(); + for comparator in comparators { + let comparator = comparator.unwrap(); + let comparator = parse_expr(py, comparator); + new_comparators.push(comparator); + } + + Expr::Compare(Box::new(left), new_ops, new_comparators) + } else { + println!("Expr {}", ast); + Expr::Error + } +} + +fn parse_statement(py: Python, ast: PyObject) -> Statement { + //Statement::FunctionDef(Expr::Name("function".to_string()), vec!(Expr::Name("a".to_string()), Expr::Name("b".to_string())), vec!()) + //Statement::If(Expr::BinOp(BinOp::BinEq, Box::new(Expr::Name("__name__".to_string())), Box::new(Expr::Str("__main__".to_string()))), vec!(Statement::Expr(Expr::Call(Box::new(Expr::Name("function".to_string())), vec!(Expr::Num(1), Expr::Num(2)))))) + + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let is_instance = |object: &PyObject, type_: &PyObject| { + return isinstance.call(py, (object, type_), None).unwrap().is_true(py).unwrap(); + }; + + let ast_module = py.import("ast").unwrap(); + let ast_type = ast_module.get(py, "AST").unwrap(); + let function_def_type = ast_module.get(py, "FunctionDef").unwrap(); + let global_type = ast_module.get(py, "Global").unwrap(); + let assign_type = ast_module.get(py, "Assign").unwrap(); + let return_type = ast_module.get(py, "Return").unwrap(); + let import_from_type = ast_module.get(py, "ImportFrom").unwrap(); + let if_type = ast_module.get(py, "If").unwrap(); + let expr_type = ast_module.get(py, "Expr").unwrap(); + + assert!(is_instance(&ast, &ast_type)); + + /* + // TODO: implement Hash for PyObject. (trivial) + let map = { + let fields = ast.getattr(py, "_fields").unwrap(); + let mut map = HashMap::new(); + for field in fields.iter(py).unwrap() { + let field = field.unwrap(); + let value = ast.getattr(py, field).unwrap(); + map.insert(field, value); + } + map + }; + */ + + if is_instance(&ast, &function_def_type) { + let name = ast.getattr(py, "name").unwrap(); + let args = ast.getattr(py, "args").unwrap(); + let body = ast.getattr(py, "body").unwrap(); + + let name = { + let name = name.str(py).unwrap(); + let mut name = name.to_string(py).unwrap(); + name.to_mut().to_string() + }; + + let args = parse_expr_vec(py, args); + /* + let mut arguments = vec!(); + for arg in args.iter(py).unwrap() { + let arg = parse_expr(py, arg.unwrap()); + arguments.push(arg); + } + */ + + let mut statements = vec!(); + for statement in body.iter(py).unwrap() { + let statement = parse_statement(py, statement.unwrap()); + statements.push(statement); + } + + Statement::FunctionDef(Expr::Name(name), args, statements) + } else if is_instance(&ast, &global_type) { + let names = ast.getattr(py, "names").unwrap(); + + let mut globals = vec!(); + for name in names.iter(py).unwrap() { + let name = { + let name = name.unwrap().str(py).unwrap(); + let mut name = name.to_string(py).unwrap(); + name.to_mut().to_string() + }; + globals.push(name); + } + + Statement::Global(globals) + } else if is_instance(&ast, &if_type) { + let test = ast.getattr(py, "test").unwrap(); + let body = ast.getattr(py, "body").unwrap(); + let orelse = ast.getattr(py, "orelse").unwrap(); + + let test = parse_expr(py, test); + + let mut statements = vec!(); + for statement in body.iter(py).unwrap() { + let statement = parse_statement(py, statement.unwrap()); + statements.push(statement); + } + + let mut orelse_ = vec!(); + for statement in orelse.iter(py).unwrap() { + let statement = parse_statement(py, statement.unwrap()); + orelse_.push(statement); + } + + Statement::If(test, statements, orelse_) + } else if is_instance(&ast, &assign_type) { + let targets = ast.getattr(py, "targets").unwrap(); + let value = ast.getattr(py, "value").unwrap(); + + let mut arguments = vec!(); + for target in targets.iter(py).unwrap() { + let target = parse_expr(py, target.unwrap()); + arguments.push(target); + } + + let value = parse_expr(py, value); + + Statement::Assign(arguments, value) + } else if is_instance(&ast, &import_from_type) { + let module = ast.getattr(py, "module").unwrap(); + let names = ast.getattr(py, "names").unwrap(); + //let level = ast.getattr(py, "level").unwrap(); + + let module = { + let module = module.str(py).unwrap(); + let mut module = module.to_string(py).unwrap(); + module.to_mut().to_string() + }; + + let mut names_ = vec!(); + for alias in names.iter(py).unwrap() { + let alias = alias.unwrap(); + let alias = parse_expr(py, alias); + names_.push(alias); + } + + Statement::ImportFrom(module, names_) + } else if is_instance(&ast, &return_type) { + let value = ast.getattr(py, "value").unwrap(); + let value = parse_expr(py, value); + Statement::Return(value) + } else if is_instance(&ast, &expr_type) { + let value = ast.getattr(py, "value").unwrap(); + let value = parse_expr(py, value); + Statement::Expr(value) + } else { + println!("Statement {}", ast); + Statement::Error + } +} + +#[allow(dead_code)] +pub fn convert_ast(name: String, module: &PyObject) -> Module { + let gil = Python::acquire_gil(); + let py = gil.python(); + + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let ast_module = py.import("ast").unwrap(); + let module_type = ast_module.get(py, "Module").unwrap(); + + assert!(isinstance.call(py, (module, module_type), None).unwrap().is_true(py).unwrap()); + + let body = module.getattr(py, "body").unwrap(); + let mut statements = vec!(); + for statement in body.iter(py).unwrap() { + let statement = parse_statement(py, statement.unwrap()); + statements.push(statement) + } + Module{name: name, statements: statements} +}
new file mode 100644 --- /dev/null +++ b/src/ast_dump.rs @@ -0,0 +1,131 @@ +use python_ast::{Module, Statement, Expr, BinOp}; + +use std::iter; + +impl BinOp { + fn to_string(&self) -> &'static str { + match *self { + BinOp::BinAdd => "+", + BinOp::BinMult => "*", + BinOp::BinEq => "==", + BinOp::BinLt => "<", + BinOp::Error => "BinOp::Error" + } + } +} + +impl Expr { + fn to_string(&self) -> String { + match self.clone() { + Expr::BinOp(a, op, b) => format!("{} {} {}", a.to_string(), op.to_string(), b.to_string()), + Expr::Compare(left, ops, comparators) => format!("{} {}", left.to_string(), { + let mut arguments = vec!(); + + // XXX: wrong order! + for op in ops { + arguments.push(op.to_string().to_string()) + } + for comparator in comparators { + arguments.push(comparator.to_string()) + } + /* + for (op, comparator) in ops.zip(comparators) { + let op = op.unwrap(); + let comparator = comparator.unwrap(); + arguments.push(format!("{} {}", op.to_string(), comparator.to_string())) + } + */ + + arguments.join(" ") + }), + Expr::Call(func, args) => format!("{}({})", func.to_string(), { + let mut arguments = vec!(); + for arg in args { + arguments.push(arg.to_string()); + } + arguments.join(", ") + }), + Expr::Alias(name, asname) => { + if asname.is_empty() { + format!("{}", name) + } else { + format!("{} as {}", name, asname) + } + } + Expr::Name(name) => format!("{}", name), + Expr::NameConstant(name) => format!("{}", name), + Expr::Str(s) => format!("\"{}\"", s), + Expr::Num(n) => format!("{}", n), + Expr::Error => "Expr::Error".to_string() + } + } +} + +fn make_indent(indent: usize) -> String { + let indent: String = iter::repeat(" ").take(indent).collect(); + indent +} + +impl Statement { + fn to_string(&self, indent: usize) -> String { + match self.clone() { + Statement::FunctionDef(Expr::Name(name), arguments, body) => format!("{}def {}({}):\n{}", make_indent(indent), name, { + let mut args = vec!(); + for arg in arguments { + args.push(arg.to_string()); + } + args.join(", ") + }, { + let mut statements = vec!(); + for statement in body { + statements.push(statement.to_string(indent + 1)); + } + statements.join("\n") + }), + Statement::FunctionDef(_, _, _) => format!("error!"), + Statement::Global(names) => format!("{}global {}", make_indent(indent), names.join(", ")), + Statement::If(test, body, orelse) => format!("{}if {}:\n{}", make_indent(indent), test.to_string(), { + let mut statements = vec!(); + for arg in body { + statements.push(arg.to_string(indent + 1)); + } + + let mut orelse_ = vec!(); + for arg in orelse { + orelse_.push(arg.to_string(indent + 1)); + } + + if orelse_.is_empty() { + statements.join("\n") + } else { + format!("{}\n{}else:\n{}", statements.join("\n"), make_indent(indent), orelse_.join("\n")) + } + }), + Statement::Assign(targets, value) => format!("{}{} = {}", make_indent(indent), { + let mut exprs = vec!(); + for target in targets { + exprs.push(target.to_string()); + } + exprs.join(", ") + }, value.to_string()), + Statement::Return(expr) => format!("{}return {}", make_indent(indent), expr.to_string()), + Statement::ImportFrom(module, names) => format!("{}from {} import {}", make_indent(indent), module.to_string(), { + let mut exprs = vec!(); + for alias in names.iter() { + let alias = alias.to_string(); + exprs.push(alias); + } + exprs.join(", ") + }), + Statement::Expr(expr) => format!("{}{}", make_indent(indent), expr.to_string()), + Statement::Error => format!("{}Statement::Error", make_indent(indent)) + } + } +} + +#[allow(dead_code)] +pub fn dump_ast(ast: &Module) { + for statement in &ast.statements { + println!("{}", statement.to_string(0)); + } +}
new file mode 100644 --- /dev/null +++ b/src/ast_rewrite.rs @@ -0,0 +1,101 @@ +use python_ast::{Module, Statement, Expr}; + +trait Visitor<T> { + fn visit_module(&mut self, module: Module) -> T; + fn visit_statement(&mut self, statement: Statement) -> T; + fn visit_expr(&mut self, expr: Expr) -> T; +} + +struct Rewrite { +} + +impl Visitor<()> for Rewrite { + fn visit_module(&mut self, module: Module) -> () { + for statement in module.statements { + println!("{:?}", statement); + self.visit_statement(statement); + } + } + + fn visit_statement(&mut self, statement: Statement) -> () { + match statement { + Statement::FunctionDef(Expr::Name(name), arguments, body) => { + for expr in arguments { + self.visit_expr(expr); + } + for statement in body { + self.visit_statement(statement); + } + }, + Statement::FunctionDef(_, _, _) => { + println!("Statement:FunctionDef Error"); + panic!() + }, + Statement::If(test, body, orelse) => { + self.visit_expr(test); + for statement in body { + self.visit_statement(statement); + } + for statement in orelse { + self.visit_statement(statement); + } + }, + Statement::Assign(targets, value) => { + self.visit_expr(value); + for target in targets { + self.visit_expr(target); + } + }, + Statement::Return(expr) => { + self.visit_expr(expr); + }, + Statement::ImportFrom(module, names) => { + //self.visit_expr(module); + }, + Statement::Expr(expr) => { + self.visit_expr(expr); + }, + Statement::Error => { + println!("Statement::Error"); + panic!() + }, + } + } + + fn visit_expr(&mut self, expr: Expr) -> () { + match expr { + Expr::BinOp(left, op, right) => { + self.visit_expr(*left); + self.visit_expr(*right); + }, + Expr::Compare(left, ops, comparators) => { + self.visit_expr(*left); + }, + Expr::Call(func, args) => { + let func = *func; + let func = match func { + Expr::Name(arg) => arg, + _ => panic!() + }; + for expr in args { + self.visit_expr(expr); + } + }, + Expr::Alias(_, _) => (), + Expr::Name(_) => (), + Expr::NameConstant(value) => (), + Expr::Str(_) => (), + Expr::Num(_) => (), + Expr::Error => { + println!("Expr::Error"); + panic!() + } + } + } +} + +#[allow(dead_code)] +pub fn rewrite_ast(ast: Module) { + let mut typing = Rewrite{}; + typing.visit_module(ast); +}
new file mode 100644 --- /dev/null +++ b/src/ast_type.rs @@ -0,0 +1,225 @@ +use python_ast::{Module, Statement, Expr, BinOp}; + +use std::collections::HashMap; + +#[derive(Clone, Debug, PartialEq, Eq)] +enum Type { + Top(usize), + Unit, + Bool, + Int, + Str, + Function(Vec<Type>, Vec<Type>), + Bottom +} + +struct TypeVariable { + id: usize, + instance: Box<TypeVariable>, +} + +trait Visitor<T> { + fn visit_module(&mut self, module: Module) -> T; + fn visit_statement(&mut self, statement: Statement) -> T; + fn visit_expr(&mut self, expr: Expr) -> T; +} + +struct Typing { + environment: Vec<HashMap<String, Type>>, + next_id: usize, +} + +impl Visitor<Type> for Typing { + fn visit_module(&mut self, module: Module) -> Type { + println!("{:?}", self.environment); + for statement in module.statements { + self.visit_statement(statement); + } + println!("{:?}", self.environment); + Type::Bottom + } + + fn visit_statement(&mut self, statement: Statement) -> Type { + match statement { + Statement::FunctionDef(Expr::Name(name), arguments, body) => { + let mut env = self.environment.pop().unwrap(); + self.environment.push(env.clone()); + + let nb_args = arguments.len(); + for expr in arguments { + let type_ = self.visit_expr(expr.clone()); + let name = match expr { + Expr::Name(name) => name, + _ => panic!() + }; + env.insert(name.clone(), type_.clone()); + } + self.environment.push(env); + + for statement in body { + self.visit_statement(statement); + } + self.environment.pop(); + + let mut types = Vec::with_capacity(nb_args); + for _ in 0..nb_args { + self.next_id += 1; + types.push(Type::Top(self.next_id)); + } + Type::Bottom + }, + Statement::FunctionDef(_, _, _) => { + println!("Statement:FunctionDef Error"); + panic!() + }, + Statement::Global(_) => { + Type::Bottom + }, + Statement::If(test, body, orelse) => { + self.visit_expr(test); + for statement in body { + self.visit_statement(statement); + } + for statement in orelse { + self.visit_statement(statement); + } + Type::Bottom + }, + Statement::Assign(targets, value) => { + let type_ = self.visit_expr(value); + if targets.len() != 1 { + panic!(); + } + println!("{:?}", self.environment.clone()); + let mut env = self.environment.pop().unwrap(); + for target in targets { + let name = match target { + Expr::Name(name) => name, + _ => panic!() + }; + println!("{} => {:?}", name, type_); + env.insert(name, type_.clone()); + } + self.environment.push(env); + println!("{:?}", self.environment.clone()); + Type::Bottom + }, + Statement::Return(expr) => { + self.visit_expr(expr) + }, + Statement::ImportFrom(module, names) => { + for alias in names { + self.visit_expr(alias); + } + Type::Bottom + }, + Statement::Expr(expr) => { + self.visit_expr(expr) + }, + Statement::Error => { + println!("Statement::Error"); + panic!() + }, + } + } + + fn visit_expr(&mut self, expr: Expr) -> Type { + let expr_str = format!("{:?}", expr); + let type_ = match expr { + Expr::BinOp(left, op, right) => { + let type_left = self.visit_expr(*left); + let type_right = self.visit_expr(*right); + + if op == BinOp::BinMult && type_left == Type::Str && type_right == Type::Int { + return Type::Str; + } + + if type_left != type_right { + return Type::Bottom; + } + if op == BinOp::BinAdd || op == BinOp::BinMult { + if type_left == Type::Int { + Type::Int + } else if type_left == Type::Str { + Type::Str + } else { + Type::Bottom + } + } else if op == BinOp::BinEq || op == BinOp::BinLt { + Type::Bool + } else { + Type::Bottom + } + }, + Expr::Compare(_, _, _) => Type::Bool, + Expr::Call(func, args) => { + let func = *func; + let func = match func { + Expr::Name(arg) => arg, + _ => panic!() + }; + let mut types = vec!(); + for expr in args { + let type_ = self.visit_expr(expr); + types.push(type_); + } + println!("types: {:?}", types); + let env = match self.environment.pop() { + Some(env) => env, + None => return Type::Bottom + }; + if env.contains_key(&func) { + let value = env.get(&func).unwrap().clone(); + self.environment.push(env); + value + } else { + self.environment.push(env); + Type::Bottom + } + }, + Expr::Alias(_, _) => Type::Bottom, + Expr::Name(id) => { + let env = match self.environment.pop() { + Some(env) => env, + None => return Type::Bottom + }; + if env.contains_key(&id) { + let value = env.get(&id).unwrap().clone(); + self.environment.push(env); + value + } else { + self.environment.push(env); + self.next_id += 1; + Type::Top(self.next_id) + } + }, + Expr::NameConstant(value) => { + if value == "True" || value == "False" { + Type::Bool + } else if value == "None" { + Type::Unit + } else { + Type::Bottom + } + }, + Expr::Str(_) => Type::Str, + Expr::Num(_) => Type::Int, + Expr::Error => { + println!("Expr::Error"); + panic!() + } + }; + println!("{} => {:?}", expr_str, type_); + type_ + } +} + +#[allow(dead_code)] +pub fn type_ast(ast: Module) { + let mut environment = HashMap::new(); + environment.insert("int".to_string(), Type::Int); + environment.insert("__name__".to_string(), Type::Str); + environment.insert("print".to_string(), Type::Unit); + let mut typing = Typing{environment: vec!(environment), next_id: 0}; + typing.visit_module(ast); +}
new file mode 100644 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,54 @@ +extern crate cpython; + +mod python_tb; +mod python_parse; +mod python_dump; +mod python_ast; +mod ast_convert; +mod ast_dump; +//mod ast_rewrite; +mod ast_type; + +use std::fs::File; +use std::io::Read; + +fn main() { + let filename = match std::env::args().nth(1) { + Some(filename) => filename, + None => { + // TODO: use stderr instead. + println!("USAGE: {} <filename> [filename…]", std::env::args().nth(0).unwrap()); + std::process::exit(1); + } + }; + + let code = { + let mut file = match File::open(&filename) { + Ok(file) => file, + Err(err) => { + // TODO: use stderr instead. + println!("Error while opening file “{}”: {}", filename, err); + std::process::exit(2); + } + }; + let mut code = String::new(); + file.read_to_string(&mut code).unwrap(); + code + }; + + let module = match python_parse::parse_ast(code) { + Ok(module) => module, + Err(err) => { + // TODO: use stderr instead. + println!("Error while parsing file “{}”:", filename); + python_tb::traceback(err); + std::process::exit(3); + } + }; + + //python_dump::dump_module(&module); + let module_ast = ast_convert::convert_ast("__main__".to_string(), &module); + ast_dump::dump_ast(&module_ast); + //ast_rewrite::rewrite_ast(module_ast); + ast_type::type_ast(module_ast); +}
new file mode 100644 --- /dev/null +++ b/src/python_ast.rs @@ -0,0 +1,39 @@ +#[derive(Clone, Debug)] +pub struct Module { + pub name: String, + pub statements: Vec<Statement>, +} + +#[derive(Clone, Debug)] +pub enum Statement { + FunctionDef(Expr, Vec<Expr>, Vec<Statement>), + Global(Vec<String>), + If(Expr, Vec<Statement>, Vec<Statement>), + Assign(Vec<Expr>, Expr), + Return(Expr), + ImportFrom(String, Vec<Expr>), + Expr(Expr), + Error, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Expr { + BinOp(Box<Expr>, BinOp, Box<Expr>), + Compare(Box<Expr>, Vec<BinOp>, Vec<Expr>), + Call(Box<Expr>, Vec<Expr>), + Alias(String, String), + Name(String), + NameConstant(String), + Str(String), + Num(String), + Error +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum BinOp { + BinAdd, + BinMult, + BinEq, + BinLt, + Error, +}
new file mode 100644 --- /dev/null +++ b/src/python_dump.rs @@ -0,0 +1,237 @@ +extern crate cpython; + +use std::iter; + +use cpython::{Python, PyObject}; +use cpython::ObjectProtocol; //for call method + +fn dump(py: Python, indent: usize, ast: PyObject) -> String { + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let is_instance = |object: &PyObject, type_: &PyObject| { + return isinstance.call(py, (object, type_), None).unwrap().is_true(py).unwrap(); + }; + + let ast_module = py.import("ast").unwrap(); + let ast_type = ast_module.get(py, "AST").unwrap(); + let function_def_type = ast_module.get(py, "FunctionDef").unwrap(); + let arguments_type = ast_module.get(py, "arguments").unwrap(); + let arg_type = ast_module.get(py, "arg").unwrap(); + let assign_type = ast_module.get(py, "Assign").unwrap(); + let return_type = ast_module.get(py, "Return").unwrap(); + let bin_op_type = ast_module.get(py, "BinOp").unwrap(); + let name_constant_type = ast_module.get(py, "NameConstant").unwrap(); + let name_type = ast_module.get(py, "Name").unwrap(); + let num_type = ast_module.get(py, "Num").unwrap(); + let str_type = ast_module.get(py, "Str").unwrap(); + let add_type = ast_module.get(py, "Add").unwrap(); + let mult_type = ast_module.get(py, "Mult").unwrap(); + let eq_type = ast_module.get(py, "Eq").unwrap(); + let lt_type = ast_module.get(py, "Lt").unwrap(); + let if_type = ast_module.get(py, "If").unwrap(); + let compare_type = ast_module.get(py, "Compare").unwrap(); + let expr_type = ast_module.get(py, "Expr").unwrap(); + let call_type = ast_module.get(py, "Call").unwrap(); + let import_from_type = ast_module.get(py, "ImportFrom").unwrap(); + let alias_type = ast_module.get(py, "alias").unwrap(); + + assert!(is_instance(&ast, &ast_type)); + + /* + // TODO: implement Hash for PyObject. (trivial) + let map = { + let fields = ast.getattr(py, "_fields").unwrap(); + let mut map = HashMap::new(); + for field in fields.iter(py).unwrap() { + let field = field.unwrap(); + let value = ast.getattr(py, field).unwrap(); + map.insert(field, value); + } + map + }; + */ + + if is_instance(&ast, &function_def_type) { + let name = ast.getattr(py, "name").unwrap(); + let args = ast.getattr(py, "args").unwrap(); + let body = ast.getattr(py, "body").unwrap(); + let args = dump(py, indent, args); + let declaration = format!("def {}({}):", name, args); + let mut statements = vec!("".to_string()); + for statement in body.iter(py).unwrap() { + let statement = dump(py, indent + 1, statement.unwrap()); + statements.push(statement); + } + let indent: String = iter::repeat(" ").take(indent + 1).collect(); + let indent = format!("\n{}", indent); + let indent = indent.as_str(); + let body = statements.join(indent); + format!("{}{}", declaration, body) + } else if is_instance(&ast, &if_type) { + let test = ast.getattr(py, "test").unwrap(); + let body = ast.getattr(py, "body").unwrap(); + + let test = dump(py, indent, test); + + let declaration = format!("if {}:", test); + let mut statements = vec!("".to_string()); + for statement in body.iter(py).unwrap() { + let statement = dump(py, indent + 1, statement.unwrap()); + statements.push(statement); + } + let indent: String = iter::repeat(" ").take(indent + 1).collect(); + let indent = format!("\n{}", indent); + let indent = indent.as_str(); + let body = statements.join(indent); + format!("{}{}", declaration, body) + } else if is_instance(&ast, &arguments_type) { + let args_list = ast.getattr(py, "args").unwrap(); + let mut arguments = vec!(); + for arg in args_list.iter(py).unwrap() { + let arg = arg.unwrap(); + arguments.push(dump(py, indent, arg)); + } + format!("{}", arguments.join(", ")) + } else if is_instance(&ast, &arg_type) { + let arg = ast.getattr(py, "arg").unwrap(); + format!("{}", arg) + } else if is_instance(&ast, &compare_type) { + let left = ast.getattr(py, "left").unwrap(); + let ops = ast.getattr(py, "ops").unwrap(); + let comparators = ast.getattr(py, "comparators").unwrap(); + + let left = dump(py, indent, left); + let ops = ops.iter(py).unwrap(); + let comparators = comparators.iter(py).unwrap(); + + let mut comparisons = vec!(); + for (op, comparator) in ops.zip(comparators) { + let op = op.unwrap(); + let comparator = comparator.unwrap(); + + let op = dump(py, indent, op); + let comparator = dump(py, indent, comparator); + + comparisons.push(format!("{} {}", op, comparator)); + } + format!("{} {}", left, comparisons.join(" ")) + } else if is_instance(&ast, &assign_type) { + let targets = ast.getattr(py, "targets").unwrap(); + let value = ast.getattr(py, "value").unwrap(); + let mut cibles = vec!(); + for target in targets.iter(py).unwrap() { + let target = target.unwrap(); + let target = dump(py, indent, target); + cibles.push(target.to_string()); + } + let value = dump(py, indent, value); + format!("{} = {}", cibles.join(", "), value) + } else if is_instance(&ast, &return_type) { + let value = ast.getattr(py, "value").unwrap(); + let value = dump(py, indent, value); + format!("return {}", value) + } else if is_instance(&ast, &bin_op_type) { + let left = ast.getattr(py, "left").unwrap(); + let op = ast.getattr(py, "op").unwrap(); + let right = ast.getattr(py, "right").unwrap(); + + let left = dump(py, indent, left); + let op = dump(py, indent, op); + let right = dump(py, indent, right); + + format!("{} {} {}", left, op, right) + } else if is_instance(&ast, &name_type) { + let id = ast.getattr(py, "id").unwrap(); + format!("{}", id) + } else if is_instance(&ast, &name_constant_type) { + let value = ast.getattr(py, "value").unwrap(); + format!("{}", value) + } else if is_instance(&ast, &expr_type) { + let value = ast.getattr(py, "value").unwrap(); + let value = dump(py, indent, value); + format!("{}", value) + } else if is_instance(&ast, &call_type) { + let func = ast.getattr(py, "func").unwrap(); + let args = ast.getattr(py, "args").unwrap(); + //let keywords = ast.getattr(py, "keywords").unwrap(); + + let func = dump(py, indent, func); + + let mut arguments = vec!(); + for arg in args.iter(py).unwrap() { + let arg = arg.unwrap(); + arguments.push(dump(py, indent, arg)); + } + + format!("{}({})", func, arguments.join(", ")) + } else if is_instance(&ast, &import_from_type) { + let module = ast.getattr(py, "module").unwrap(); + let names = ast.getattr(py, "names").unwrap(); + //let level = ast.getattr(py, "level").unwrap(); + + let mut arguments = vec!(); + for name in names.iter(py).unwrap() { + let name = name.unwrap(); + arguments.push(dump(py, indent, name)); + } + + format!("from {} import {}", module, arguments.join(", ")) + } else if is_instance(&ast, &alias_type) { + let name = ast.getattr(py, "name").unwrap(); + let asname = ast.getattr(py, "asname").unwrap(); + + let name = { + let name = name.str(py).unwrap(); + let mut name = name.to_string(py).unwrap(); + name.to_mut().to_string() + }; + + let asname = { + let asname = asname.str(py).unwrap(); + let mut asname = asname.to_string(py).unwrap(); + asname.to_mut().to_string() + }; + + if asname == "None" { + format!("{}", name) + } else { + format!("{} as {}", name, asname) + } + } else if is_instance(&ast, &num_type) { + let n = ast.getattr(py, "n").unwrap(); + format!("{}", n) + } else if is_instance(&ast, &str_type) { + let s = ast.getattr(py, "s").unwrap(); + format!("\"{}\"", s) + } else if is_instance(&ast, &add_type) { + format!("+") + } else if is_instance(&ast, &mult_type) { + format!("*") + } else if is_instance(&ast, &eq_type) { + format!("==") + } else if is_instance(&ast, <_type) { + format!("<") + } else { + format!("unknown {}", ast) + } +} + +#[allow(dead_code)] +pub fn dump_module(module: &PyObject) { + let gil = Python::acquire_gil(); + let py = gil.python(); + + let builtins_module = py.import("builtins").unwrap(); + let isinstance = builtins_module.get(py, "isinstance").unwrap(); + + let ast_module = py.import("ast").unwrap(); + let module_type = ast_module.get(py, "Module").unwrap(); + + assert!(isinstance.call(py, (module, module_type), None).unwrap().is_true(py).unwrap()); + + let body = module.getattr(py, "body").unwrap(); + for statement in body.iter(py).unwrap() { + println!("{}", dump(py, 0, statement.unwrap())); + } +}
new file mode 100644 --- /dev/null +++ b/src/python_parse.rs @@ -0,0 +1,15 @@ +extern crate cpython; + +use cpython::{Python, PyObject, PyErr}; +use cpython::ObjectProtocol; //for call method + +pub fn parse_ast(code: String) -> Result<PyObject, PyErr> { + let gil = Python::acquire_gil(); + let py = gil.python(); + + let ast_module = py.import("ast").unwrap(); + let ast_parse = ast_module.get(py, "parse").unwrap(); + + let real_ast = try!(ast_parse.call(py, (code,), None)); + Ok(real_ast) +}