changeset 91:859d44f143b8

Implement Bytes in the AST.
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Sat, 25 Jun 2016 02:08:50 +0100
parents 4e62a8927dcc
children 7977a52c3202
files src/ast_convert.rs src/ast_dump.rs src/python_ast.rs tests/test_parse_files/test_bytes.py
diffstat 4 files changed, 32 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/ast_convert.rs
+++ b/src/ast_convert.rs
@@ -1,6 +1,6 @@
 use python_ast::{Module, stmt, expr, expr_context, cmpop, boolop, operator, unaryop, arguments, arg, alias, comprehension, keyword, withitem, excepthandler, slice, name_constant};
 
-use cpython::{Python, PyObject, PyBool, PyResult};
+use cpython::{Python, PyObject, PyBool, PyResult, PyBytes};
 use cpython::ObjectProtocol; //for call method
 
 fn get_str(py: Python, object: PyObject) -> String {
@@ -338,6 +338,7 @@ fn parse_expr(py: Python, ast: PyObject)
     let name_type = ast_module.get(py, "Name").unwrap();
     let num_type = ast_module.get(py, "Num").unwrap();
     let str_type = ast_module.get(py, "Str").unwrap();
+    let bytes_type = ast_module.get(py, "Bytes").unwrap();
     let list_type = ast_module.get(py, "List").unwrap();
     let compare_type = ast_module.get(py, "Compare").unwrap();
     let call_type = ast_module.get(py, "Call").unwrap();
@@ -396,6 +397,11 @@ fn parse_expr(py: Python, ast: PyObject)
         let s = ast.getattr(py, "s").unwrap();
         let s = get_str(py, s);
         expr::Str(s)
+    } else if is_instance(&ast, &bytes_type) {
+        let s = ast.getattr(py, "s").unwrap();
+        let s: PyBytes = s.extract(py).unwrap();
+        let s = s.as_slice(py).to_vec();
+        expr::Bytes(s)
     } else if is_instance(&ast, &list_type) {
         let elts = ast.getattr(py, "elts").unwrap();
         let elements = parse_list(py, elts, parse_expr);
--- a/src/ast_dump.rs
+++ b/src/ast_dump.rs
@@ -196,6 +196,28 @@ impl ToStringable for expr {
             }),
             expr::Num(n) => format!("{}", n),
             expr::Str(s) => format!("\"{}\"", s),
+            expr::Bytes(s) => format!("b\"{}\"", {
+                let mut string = String::with_capacity(s.len());
+                for ascii_code in s {
+                    let c = ascii_code as char;
+                    if c >= ' ' && c <= '~' {
+                        if c == '"' || c == '\\' {
+                            string.push('\\');
+                        }
+                        string.push(c);
+                    } else if c == '\t' {
+                        string.push_str("\\t");
+                    } else if c == '\n' {
+                        string.push_str("\\n");
+                    } else if c == '\r' {
+                        string.push_str("\\r");
+                    } else /* if c > '~' */ {
+                        let value = format!("\\x{:02x}", ascii_code);
+                        string.push_str(value.as_str());
+                    }
+                }
+                string
+            }),
             expr::NameConstant(constant) => match constant {
                 name_constant::True => String::from("True"),
                 name_constant::False => String::from("False"),
--- a/src/python_ast.rs
+++ b/src/python_ast.rs
@@ -102,7 +102,7 @@ pub enum expr {
     Call(Box<expr>, Vec<expr>, Vec<keyword>),
     Num(String),
     Str(String),
-    //Bytes(String),
+    Bytes(Vec<u8>),
     NameConstant(name_constant),
     Ellipsis,
 
new file mode 100644
--- /dev/null
+++ b/tests/test_parse_files/test_bytes.py
@@ -0,0 +1,2 @@
+b"test bytes"
+b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"