//-------------------------------------- //--- 010 Editor v3.0.4 Binary Template // // File: PYCTemplate.bt // Author: Kuang-che Wu // Revision: 2009/04/02 // Purpose: parse python bytecode .pyc & .pyo // support python 2.4 to 2.7 //-------------------------------------- enum MagicValue { PY_24a0 = 62041, PY_24a3 = 62051, PY_24b1 = 62061, PY_25a0_1 = 62071, PY_25a0_2 = 62081, PY_25a0_3 = 62091, PY_25a0_4 = 62092, PY_25b3_1 = 62101, PY_25b3_2 = 62111, PY_25c1 = 62121, PY_25c2 = 62131, PY_26a0 = 62151, PY_26a1 = 62161, PY_27a0_1 = 62171, PY_27a0_2 = 62181, }; // marshal obj type of version 2 // version 2 is backward compatible to version 1 (for read) enum ObjType { TYPE_NULL = '0', TYPE_NONE = 'N', TYPE_FALSE = 'F', TYPE_TRUE = 'T', TYPE_STOPITER = 'S', TYPE_ELLIPSIS = '.', TYPE_INT = 'i', TYPE_INT64 = 'I', TYPE_FLOAT = 'f', TYPE_BINARY_FLOAT = 'g', TYPE_COMPLEX = 'x', TYPE_BINARY_COMPLEX = 'y', TYPE_LONG = 'l', TYPE_STRING = 's', TYPE_INTERNED = 't', TYPE_STRINGREF = 'R', TYPE_TUPLE = '(', TYPE_LIST = '[', TYPE_DICT = '{', TYPE_CODE = 'c', TYPE_UNICODE = 'u', TYPE_UNKNOWN = '?', TYPE_SET = '<', TYPE_FROZENSET = '>', }; // Python/import.c struct Magic { MagicValue magic1; char magic2[2]; if (magic2 != "\x0d\x0a") { Warning("bad magic"); return 0; } if (EnumToString(magic1) == "") { Warning("Unknown magic version"); return 0; } }; // opcode.h // this is opname of python 2.4 // please add new opcode in ReadInstruction() enum OpCode { STOP_CODE = 0, POP_TOP = 1, ROT_TWO = 2, ROT_THREE = 3, DUP_TOP = 4, ROT_FOUR = 5, UNARY_POSITIVE = 10, UNARY_NEGATIVE = 11, UNARY_NOT = 12, UNARY_CONVERT = 13, UNARY_INVERT = 15, LIST_APPEND = 18, BINARY_POWER = 19, BINARY_MULTIPLY = 20, BINARY_DIVIDE = 21, BINARY_MODULO = 22, BINARY_ADD = 23, BINARY_SUBTRACT = 24, BINARY_SUBSCR = 25, BINARY_FLOOR_DIVIDE = 26, BINARY_TRUE_DIVIDE = 27, INPLACE_FLOOR_DIVIDE = 28, INPLACE_TRUE_DIVIDE = 29, SLICE = 30, /* Also uses 31-33 */ SLICE_a = 31, SLICE_b = 32, SLICE_c = 33, STORE_SLICE = 40, /* Also uses 41-43 */ STORE_SLICE_a = 41, STORE_SLICE_b = 42, STORE_SLICE_c = 43, DELETE_SLICE = 50, /* Also uses 51-53 */ DELETE_SLICE_a = 51, DELETE_SLICE_b = 52, DELETE_SLICE_c = 53, INPLACE_ADD = 55, INPLACE_SUBTRACT = 56, INPLACE_MULTIPLY = 57, INPLACE_DIVIDE = 58, INPLACE_MODULO = 59, STORE_SUBSCR = 60, DELETE_SUBSCR = 61, BINARY_LSHIFT = 62, BINARY_RSHIFT = 63, BINARY_AND = 64, BINARY_XOR = 65, BINARY_OR = 66, INPLACE_POWER = 67, GET_ITER = 68, PRINT_EXPR = 70, PRINT_ITEM = 71, PRINT_NEWLINE = 72, PRINT_ITEM_TO = 73, PRINT_NEWLINE_TO = 74, INPLACE_LSHIFT = 75, INPLACE_RSHIFT = 76, INPLACE_AND = 77, INPLACE_XOR = 78, INPLACE_OR = 79, BREAK_LOOP = 80, WITH_CLEANUP = 81, LOAD_LOCALS = 82, RETURN_VALUE = 83, IMPORT_STAR = 84, EXEC_STMT = 85, YIELD_VALUE = 86, POP_BLOCK = 87, END_FINALLY = 88, BUILD_CLASS = 89, STORE_NAME = 90, /* Index in name list */ DELETE_NAME = 91, /* "" */ UNPACK_SEQUENCE = 92, /* Number of sequence items */ FOR_ITER = 93, STORE_ATTR = 95, /* Index in name list */ DELETE_ATTR = 96, /* "" */ STORE_GLOBAL = 97, /* "" */ DELETE_GLOBAL = 98, /* "" */ DUP_TOPX = 99, /* number of items to duplicate */ LOAD_CONST = 100, /* Index in const list */ LOAD_NAME = 101, /* Index in name list */ BUILD_TUPLE = 102, /* Number of tuple items */ BUILD_LIST = 103, /* Number of list items */ BUILD_MAP = 104, /* Always zero for now */ LOAD_ATTR = 105, /* Index in name list */ COMPARE_OP = 106, /* Comparison operator */ IMPORT_NAME = 107, /* Index in name list */ IMPORT_FROM = 108, /* Index in name list */ JUMP_FORWARD = 110, /* Number of bytes to skip */ JUMP_IF_FALSE = 111, /* "" */ JUMP_IF_TRUE = 112, /* "" */ JUMP_ABSOLUTE = 113, /* Target byte offset from beginning of code */ LOAD_GLOBAL = 116, /* Index in name list */ CONTINUE_LOOP = 119, /* Start of loop (absolute) */ SETUP_LOOP = 120, /* Target address (relative) */ SETUP_EXCEPT = 121, /* "" */ SETUP_FINALLY = 122, /* "" */ LOAD_FAST = 124, /* Local variable number */ STORE_FAST = 125, /* Local variable number */ DELETE_FAST = 126, /* Local variable number */ RAISE_VARARGS = 130, /* Number of raise arguments (1, 2 or 3) */ /* CALL_FUNCTION_XXX opcodes defined below depend on this definition */ CALL_FUNCTION = 131, /* #args + (#kwargs<<8) */ MAKE_FUNCTION = 132, /* #defaults */ BUILD_SLICE = 133, /* Number of items */ MAKE_CLOSURE = 134, /* #free vars */ LOAD_CLOSURE = 135, /* Load free variable from closure */ LOAD_DEREF = 136, /* Load and dereference from closure cell */ STORE_DEREF = 137, /* Store into cell */ /* The next 3 opcodes must be contiguous and satisfy (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ CALL_FUNCTION_VAR = 140, /* #args + (#kwargs<<8) */ CALL_FUNCTION_KW = 141, /* #args + (#kwargs<<8) */ CALL_FUNCTION_VAR_KW = 142, /* #args + (#kwargs<<8) */ /* Support for opargs more than 16 bits long */ EXTENDED_ARG = 143, }; // ceval.c const int HAVE_ARGUMENT = 90; const int EXTENDED_ARG = 143; struct Instruction { if (ReadUByte(FTell()) == EXTENDED_ARG) { ubyte opcode_extended_arg; uint16 oparg_hi; ubyte opcode; if (opcode >= HAVE_ARGUMENT) uint16 oparg; } else { ubyte opcode; if (opcode >= HAVE_ARGUMENT) uint16 oparg; } }; typedef int32 r_long; typedef int64 r_long64; typedef int16 r_short; typedef ubyte r_byte; struct Code { ObjType type; if (type != TYPE_STRING) { Warning("code not in string type"); Exit(1); } r_long n; local int remain = n; local int end = FTell() + n; /* trick to optimize parse speed */ while (remain >= 6) { Instruction inst[remain/6] ; remain = end - FTell(); } remain = end - FTell(); while (remain > 0) { Instruction inst ; remain -= sizeof(inst); } }; string Opcode2Opname(OpCode opcode) { uint16 magic = file.magic.magic1; local string opname = EnumToString(opcode); if (magic >= 0) { // history between python 2.0 and 2.4 // r27197 if (opcode == 114) opname = ""; // r28249 if (opcode == 81) opname = "RETURN_NONE"; // r28494 if (opcode == 81) opname = ""; // r32346 if (opcode == 9) opname = "NOP"; // r32389 if (opcode == 9) opcode = ""; // r35378 if (opcode == 18) opname = "LIST_APPEND"; // r36216 if (opcode == 9) opname = "NOP"; } // magic 62041 r36242 marshal version 1 // magic 62051 r37112 // magic 62061 r37403 // magic 62071 r38931 marshal version 2 // magic 62081 r39773 if (magic >= 62091) { // r42624 // r42624 if (opcode == 81) opname = "WITH_CLEANUP"; } // magic 62092 r42952 // magic 62101 r50600 // magic 62111 r50968 // magic 62121 r51082 // magic 62131 r51729 if (magic >= 62151) { // r59548 // r59548 if (opcode == 54) opname = "STORE_MAP"; } // magic 62161 r61290 if (magic >= 62171) { // r67818 // r67818 if (opcode == 18) opname = ""; if (opcode == 94) opname = "LIST_APPEND"; } if (magic >= 62181) { // r70071 // r70071 if (opcode == 111) opname = "JUMP_IF_FALSE_OR_POP"; if (opcode == 112) opname = "JUMP_IF_TRUE_OR_POP"; if (opcode == 114) opname = "POP_JUMP_IF_FALSE"; if (opcode == 115) opname = "POP_JUMP_IF_TRUE"; } return opname; } string ReadInstruction(Instruction &ins) { string s; uint16 magic = file.magic.magic1; OpCode opcode = (OpCode)ins.opcode; string opname = Opcode2Opname(opcode); if (exists(ins.oparg)) { uint32 oparg = ins.oparg; if (exists(ins.oparg_hi)) oparg += (uint32)ins.oparg_hi << 16; // Note, COMPARE_OP oparg change name in r24970 if (opname == "COMPARE_OP") { string cmp_op; switch (oparg) { case 0: cmp_op = "<"; break; case 1: cmp_op = "<="; break; case 2: cmp_op = "=="; break; case 3: cmp_op = "!="; break; case 4: cmp_op = ">"; break; case 5: cmp_op = ">="; break; case 6: cmp_op = "in"; break; case 7: cmp_op = "not in"; break; case 8: cmp_op = "is"; break; case 9: cmp_op = "is not"; break; case 10: cmp_op = "exception match"; break; case 11: cmp_op = "BAD"; break; } SPrintf(s, "%s (%s)", opname, cmp_op); } else { SPrintf(s, "%s %d", opname, oparg); } } else { s = opname; } return s; } struct LnoTab { ObjType type; if (type != TYPE_STRING) { Warning("lnotab not in string type"); Exit(1); } r_long n; struct { uchar bytecode_offset_diff; uchar line_diff; } pair[n/2]; }; // Python/marshal.c typedef struct r_object { ObjType type; switch (type) { case TYPE_NULL: case TYPE_NONE: case TYPE_STOPITER: case TYPE_ELLIPSIS: case TYPE_FALSE: case TYPE_TRUE: break; case TYPE_INT: r_long value; break; case TYPE_INT64: r_long64 value; break; case TYPE_LONG: r_long n; local int size = n<0?-n:n; r_short digit[size]; break; case TYPE_FLOAT: r_byte n; char value[n]; break; case TYPE_BINARY_FLOAT: double value; break; case TYPE_COMPLEX: r_byte nr; char real[nr]; r_byte ni; char imag[ni]; break; case TYPE_BINARY_COMPLEX: double real; double imag; break; case TYPE_INTERNED: case TYPE_STRING: r_long n; if (n) char str[n]; break; case TYPE_STRINGREF: r_long n; break; case TYPE_TUPLE: r_long n; if (n) struct r_object elements[n] ; break; case TYPE_LIST: r_long n; if (n) struct r_object elements[n] ; break; case TYPE_DICT: while (1) { struct r_object key; if (key.type == TYPE_NULL) break; struct r_object val; } break; case TYPE_SET: case TYPE_FROZENSET: r_long n; if (n) struct r_object elements[n] ; break; case TYPE_CODE: r_long argcount; r_long nlocals; r_long stacksize; r_long flags; //struct r_object code; Code code; struct r_object consts; struct r_object names; struct r_object varnames; struct r_object freevars; struct r_object cellvars; struct r_object filename; struct r_object name; r_long firstlineno; //struct r_object lnotab; LnoTab lnotab; break; default: Warning("unknown type code"); Exit(1); } } r_object; struct { Magic magic; char mtime[4]; r_object data; } file;