btparser/cparser/tests/PYCTemplate.bt

472 lines
10 KiB
Plaintext

//--------------------------------------
//--- 010 Editor v3.0.4 Binary Template
//
// File: PYCTemplate.bt
// Author: Kuang-che Wu
// Revision: 2009/04/02
// Purpose: parse python bytecode .pyc & .pyo
// support python 2.4 to 2.7
//--------------------------------------
enum <uint16> MagicValue {
PY_24a0 = 62041,
PY_24a3 = 62051,
PY_24b1 = 62061,
PY_25a0_1 = 62071,
PY_25a0_2 = 62081,
PY_25a0_3 = 62091,
PY_25a0_4 = 62092,
PY_25b3_1 = 62101,
PY_25b3_2 = 62111,
PY_25c1 = 62121,
PY_25c2 = 62131,
PY_26a0 = 62151,
PY_26a1 = 62161,
PY_27a0_1 = 62171,
PY_27a0_2 = 62181,
};
// marshal obj type of version 2
// version 2 is backward compatible to version 1 (for read)
enum <char> ObjType {
TYPE_NULL = '0',
TYPE_NONE = 'N',
TYPE_FALSE = 'F',
TYPE_TRUE = 'T',
TYPE_STOPITER = 'S',
TYPE_ELLIPSIS = '.',
TYPE_INT = 'i',
TYPE_INT64 = 'I',
TYPE_FLOAT = 'f',
TYPE_BINARY_FLOAT = 'g',
TYPE_COMPLEX = 'x',
TYPE_BINARY_COMPLEX = 'y',
TYPE_LONG = 'l',
TYPE_STRING = 's',
TYPE_INTERNED = 't',
TYPE_STRINGREF = 'R',
TYPE_TUPLE = '(',
TYPE_LIST = '[',
TYPE_DICT = '{',
TYPE_CODE = 'c',
TYPE_UNICODE = 'u',
TYPE_UNKNOWN = '?',
TYPE_SET = '<',
TYPE_FROZENSET = '>',
};
// Python/import.c
struct Magic {
MagicValue magic1;
char magic2[2];
if (magic2 != "\x0d\x0a") {
Warning("bad magic");
return 0;
}
if (EnumToString(magic1) == "") {
Warning("Unknown magic version");
return 0;
}
};
// opcode.h
// this is opname of python 2.4
// please add new opcode in ReadInstruction()
enum <ubyte> OpCode {
STOP_CODE = 0,
POP_TOP = 1,
ROT_TWO = 2,
ROT_THREE = 3,
DUP_TOP = 4,
ROT_FOUR = 5,
UNARY_POSITIVE = 10,
UNARY_NEGATIVE = 11,
UNARY_NOT = 12,
UNARY_CONVERT = 13,
UNARY_INVERT = 15,
LIST_APPEND = 18,
BINARY_POWER = 19,
BINARY_MULTIPLY = 20,
BINARY_DIVIDE = 21,
BINARY_MODULO = 22,
BINARY_ADD = 23,
BINARY_SUBTRACT = 24,
BINARY_SUBSCR = 25,
BINARY_FLOOR_DIVIDE = 26,
BINARY_TRUE_DIVIDE = 27,
INPLACE_FLOOR_DIVIDE = 28,
INPLACE_TRUE_DIVIDE = 29,
SLICE = 30,
/* Also uses 31-33 */
SLICE_a = 31,
SLICE_b = 32,
SLICE_c = 33,
STORE_SLICE = 40,
/* Also uses 41-43 */
STORE_SLICE_a = 41,
STORE_SLICE_b = 42,
STORE_SLICE_c = 43,
DELETE_SLICE = 50,
/* Also uses 51-53 */
DELETE_SLICE_a = 51,
DELETE_SLICE_b = 52,
DELETE_SLICE_c = 53,
INPLACE_ADD = 55,
INPLACE_SUBTRACT = 56,
INPLACE_MULTIPLY = 57,
INPLACE_DIVIDE = 58,
INPLACE_MODULO = 59,
STORE_SUBSCR = 60,
DELETE_SUBSCR = 61,
BINARY_LSHIFT = 62,
BINARY_RSHIFT = 63,
BINARY_AND = 64,
BINARY_XOR = 65,
BINARY_OR = 66,
INPLACE_POWER = 67,
GET_ITER = 68,
PRINT_EXPR = 70,
PRINT_ITEM = 71,
PRINT_NEWLINE = 72,
PRINT_ITEM_TO = 73,
PRINT_NEWLINE_TO = 74,
INPLACE_LSHIFT = 75,
INPLACE_RSHIFT = 76,
INPLACE_AND = 77,
INPLACE_XOR = 78,
INPLACE_OR = 79,
BREAK_LOOP = 80,
WITH_CLEANUP = 81,
LOAD_LOCALS = 82,
RETURN_VALUE = 83,
IMPORT_STAR = 84,
EXEC_STMT = 85,
YIELD_VALUE = 86,
POP_BLOCK = 87,
END_FINALLY = 88,
BUILD_CLASS = 89,
STORE_NAME = 90, /* Index in name list */
DELETE_NAME = 91, /* "" */
UNPACK_SEQUENCE = 92, /* Number of sequence items */
FOR_ITER = 93,
STORE_ATTR = 95, /* Index in name list */
DELETE_ATTR = 96, /* "" */
STORE_GLOBAL = 97, /* "" */
DELETE_GLOBAL = 98, /* "" */
DUP_TOPX = 99, /* number of items to duplicate */
LOAD_CONST = 100, /* Index in const list */
LOAD_NAME = 101, /* Index in name list */
BUILD_TUPLE = 102, /* Number of tuple items */
BUILD_LIST = 103, /* Number of list items */
BUILD_MAP = 104, /* Always zero for now */
LOAD_ATTR = 105, /* Index in name list */
COMPARE_OP = 106, /* Comparison operator */
IMPORT_NAME = 107, /* Index in name list */
IMPORT_FROM = 108, /* Index in name list */
JUMP_FORWARD = 110, /* Number of bytes to skip */
JUMP_IF_FALSE = 111, /* "" */
JUMP_IF_TRUE = 112, /* "" */
JUMP_ABSOLUTE = 113, /* Target byte offset from beginning of code */
LOAD_GLOBAL = 116, /* Index in name list */
CONTINUE_LOOP = 119, /* Start of loop (absolute) */
SETUP_LOOP = 120, /* Target address (relative) */
SETUP_EXCEPT = 121, /* "" */
SETUP_FINALLY = 122, /* "" */
LOAD_FAST = 124, /* Local variable number */
STORE_FAST = 125, /* Local variable number */
DELETE_FAST = 126, /* Local variable number */
RAISE_VARARGS = 130, /* Number of raise arguments (1, 2 or 3) */
/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */
CALL_FUNCTION = 131, /* #args + (#kwargs<<8) */
MAKE_FUNCTION = 132, /* #defaults */
BUILD_SLICE = 133, /* Number of items */
MAKE_CLOSURE = 134, /* #free vars */
LOAD_CLOSURE = 135, /* Load free variable from closure */
LOAD_DEREF = 136, /* Load and dereference from closure cell */
STORE_DEREF = 137, /* Store into cell */
/* The next 3 opcodes must be contiguous and satisfy
(CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */
CALL_FUNCTION_VAR = 140, /* #args + (#kwargs<<8) */
CALL_FUNCTION_KW = 141, /* #args + (#kwargs<<8) */
CALL_FUNCTION_VAR_KW = 142, /* #args + (#kwargs<<8) */
/* Support for opargs more than 16 bits long */
EXTENDED_ARG = 143,
};
// ceval.c
const int HAVE_ARGUMENT = 90;
const int EXTENDED_ARG = 143;
struct Instruction {
if (ReadUByte(FTell()) == EXTENDED_ARG) {
ubyte opcode_extended_arg;
uint16 oparg_hi;
ubyte opcode;
if (opcode >= HAVE_ARGUMENT)
uint16 oparg;
} else {
ubyte opcode;
if (opcode >= HAVE_ARGUMENT)
uint16 oparg;
}
};
typedef int32 r_long;
typedef int64 r_long64;
typedef int16 r_short;
typedef ubyte r_byte;
struct Code {
ObjType type;
if (type != TYPE_STRING) {
Warning("code not in string type");
Exit(1);
}
r_long n;
local int remain = n;
local int end = FTell() + n;
/* trick to optimize parse speed */
while (remain >= 6) {
Instruction inst[remain/6] <read=ReadInstruction,optimize=false>;
remain = end - FTell();
}
remain = end - FTell();
while (remain > 0) {
Instruction inst <read=ReadInstruction>;
remain -= sizeof(inst);
}
};
string Opcode2Opname(OpCode opcode)
{
uint16 magic = file.magic.magic1;
local string opname = EnumToString(opcode);
if (magic >= 0) { // history between python 2.0 and 2.4
// r27197
if (opcode == 114) opname = "";
// r28249
if (opcode == 81) opname = "RETURN_NONE";
// r28494
if (opcode == 81) opname = "";
// r32346
if (opcode == 9) opname = "NOP";
// r32389
if (opcode == 9) opcode = "";
// r35378
if (opcode == 18) opname = "LIST_APPEND";
// r36216
if (opcode == 9) opname = "NOP";
}
// magic 62041 r36242 marshal version 1
// magic 62051 r37112
// magic 62061 r37403
// magic 62071 r38931 marshal version 2
// magic 62081 r39773
if (magic >= 62091) { // r42624
// r42624
if (opcode == 81) opname = "WITH_CLEANUP";
}
// magic 62092 r42952
// magic 62101 r50600
// magic 62111 r50968
// magic 62121 r51082
// magic 62131 r51729
if (magic >= 62151) { // r59548
// r59548
if (opcode == 54) opname = "STORE_MAP";
}
// magic 62161 r61290
if (magic >= 62171) { // r67818
// r67818
if (opcode == 18) opname = "";
if (opcode == 94) opname = "LIST_APPEND";
}
if (magic >= 62181) { // r70071
// r70071
if (opcode == 111) opname = "JUMP_IF_FALSE_OR_POP";
if (opcode == 112) opname = "JUMP_IF_TRUE_OR_POP";
if (opcode == 114) opname = "POP_JUMP_IF_FALSE";
if (opcode == 115) opname = "POP_JUMP_IF_TRUE";
}
return opname;
}
string ReadInstruction(Instruction &ins)
{
string s;
uint16 magic = file.magic.magic1;
OpCode opcode = (OpCode)ins.opcode;
string opname = Opcode2Opname(opcode);
if (exists(ins.oparg)) {
uint32 oparg = ins.oparg;
if (exists(ins.oparg_hi))
oparg += (uint32)ins.oparg_hi << 16;
// Note, COMPARE_OP oparg change name in r24970
if (opname == "COMPARE_OP") {
string cmp_op;
switch (oparg) {
case 0: cmp_op = "<"; break;
case 1: cmp_op = "<="; break;
case 2: cmp_op = "=="; break;
case 3: cmp_op = "!="; break;
case 4: cmp_op = ">"; break;
case 5: cmp_op = ">="; break;
case 6: cmp_op = "in"; break;
case 7: cmp_op = "not in"; break;
case 8: cmp_op = "is"; break;
case 9: cmp_op = "is not"; break;
case 10: cmp_op = "exception match"; break;
case 11: cmp_op = "BAD"; break;
}
SPrintf(s, "%s (%s)", opname, cmp_op);
} else {
SPrintf(s, "%s %d", opname, oparg);
}
} else {
s = opname;
}
return s;
}
struct LnoTab {
ObjType type;
if (type != TYPE_STRING) {
Warning("lnotab not in string type");
Exit(1);
}
r_long n;
struct {
uchar bytecode_offset_diff;
uchar line_diff;
} pair[n/2];
};
// Python/marshal.c
typedef struct r_object {
ObjType type;
switch (type) {
case TYPE_NULL:
case TYPE_NONE:
case TYPE_STOPITER:
case TYPE_ELLIPSIS:
case TYPE_FALSE:
case TYPE_TRUE:
break;
case TYPE_INT:
r_long value;
break;
case TYPE_INT64:
r_long64 value;
break;
case TYPE_LONG:
r_long n;
local int size = n<0?-n:n;
r_short digit[size];
break;
case TYPE_FLOAT:
r_byte n;
char value[n];
break;
case TYPE_BINARY_FLOAT:
double value;
break;
case TYPE_COMPLEX:
r_byte nr;
char real[nr];
r_byte ni;
char imag[ni];
break;
case TYPE_BINARY_COMPLEX:
double real;
double imag;
break;
case TYPE_INTERNED:
case TYPE_STRING:
r_long n;
if (n)
char str[n];
break;
case TYPE_STRINGREF:
r_long n;
break;
case TYPE_TUPLE:
r_long n;
if (n)
struct r_object elements[n] <optimize=false>;
break;
case TYPE_LIST:
r_long n;
if (n)
struct r_object elements[n] <optimize=false>;
break;
case TYPE_DICT:
while (1) {
struct r_object key;
if (key.type == TYPE_NULL)
break;
struct r_object val;
}
break;
case TYPE_SET:
case TYPE_FROZENSET:
r_long n;
if (n)
struct r_object elements[n] <optimize=false>;
break;
case TYPE_CODE:
r_long argcount;
r_long nlocals;
r_long stacksize;
r_long flags;
//struct r_object code;
Code code;
struct r_object consts;
struct r_object names;
struct r_object varnames;
struct r_object freevars;
struct r_object cellvars;
struct r_object filename;
struct r_object name;
r_long firstlineno;
//struct r_object lnotab;
LnoTab lnotab;
break;
default:
Warning("unknown type code");
Exit(1);
}
} r_object;
struct {
Magic magic;
char mtime[4];
r_object data;
} file;