// avoiding using r2 internals asserts
#define if_true_return(cond,ret) if (cond) { return (ret); }
-// TODO: kill globals
-static R_TH_LOCAL ut32 Gmagic = 0;
-static R_TH_LOCAL ut32 Gscount = 0;
-static R_TH_LOCAL RList *Grefs = NULL; // If you don't have a good reason, do not change this. And also checkout !refs in get_code_object()
+// All mutable parse state is carried in PycUnmarshalCtx (see marshal.h).
-/* interned_table is used to handle TYPE_INTERNED object */
-extern R_TH_LOCAL RList *interned_table;
-
-static pyc_object *get_object(RBuffer *buffer, int wtype);
+static pyc_object *get_object(PycUnmarshalCtx *ctx, RBuffer *buffer, int wtype);
static pyc_object *copy_object(pyc_object *object);
static void free_object(pyc_object *object);
return ret;
}
-static pyc_object *get_stringref_object(RBuffer *buffer) {
+static pyc_object *get_stringref_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
pyc_object *ret = NULL;
bool error = false;
ut32 n = get_st32 (buffer, &error);
- if (n >= r_list_length (interned_table)) {
+ if (n >= r_list_length (ctx->interned_table)) {
R_LOG_DEBUG ("bad marshal data (string ref out of range)");
return NULL;
}
return NULL;
}
ret->type = TYPE_STRINGREF;
- ret->data = r_list_get_n (interned_table, n);
+ ret->data = r_list_get_n (ctx->interned_table, n);
if (!ret->data) {
R_FREE (ret);
}
return ret;
}
-static pyc_object *get_complex_object(RBuffer *buffer) {
+static pyc_object *get_complex_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
ut32 size = 0;
st32 n1 = 0;
return NULL;
}
- if ((Gmagic & 0xffff) <= 62061) {
+ if ((ctx->magic & 0xffff) <= 62061) {
n1 = get_ut8 (buffer, &error);
} else {
n1 = get_st32 (buffer, &error);
}
s1[n1] = '\0';
- if ((Gmagic & 0xffff) <= 62061) {
+ if ((ctx->magic & 0xffff) <= 62061) {
n2 = get_ut8 (buffer, &error);
} else {
n2 = get_st32 (buffer, &error);
return ret;
}
-static pyc_object *get_interned_object(RBuffer *buffer) {
+static pyc_object *get_interned_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
pyc_object *ret = NULL;
bool error = false;
ut32 n = get_ut32 (buffer, &error);
ret->type = TYPE_INTERNED;
ret->data = get_bytes (buffer, n);
/* add data pointer to interned table */
- r_list_append (interned_table, ret->data);
+ r_list_append (ctx->interned_table, ret->data);
if (!ret->data) {
R_FREE (ret);
}
return ret;
}
-static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size) {
+static pyc_object *get_array_object_generic(PycUnmarshalCtx *ctx, RBuffer *buffer, ut32 size) {
pyc_object *ret = R_NEW0 (pyc_object);
if (!ret) {
return NULL;
}
ut32 i;
for (i = 0; i < size; i++) {
- pyc_object *tmp = get_object (buffer, 0);
+ pyc_object *tmp = get_object (ctx, buffer, 0);
if (!tmp || !r_list_append (ret->data, tmp)) {
free_object (tmp);
((RList*)ret->data)->free = NULL;
}
/* small TYPE_SMALL_TUPLE doesn't exist in python2 */
-static pyc_object *get_small_tuple_object(RBuffer *buffer) {
+static pyc_object *get_small_tuple_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
ut8 n = get_ut8 (buffer, &error);
if (error) {
return NULL;
}
- pyc_object *ret = get_array_object_generic (buffer, n);
+ pyc_object *ret = get_array_object_generic (ctx, buffer, n);
if (ret) {
ret->type = TYPE_SMALL_TUPLE;
return ret;
return NULL;
}
-static pyc_object *get_tuple_object(RBuffer *buffer) {
+static pyc_object *get_tuple_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
ut32 n = get_ut32 (buffer, &error);
if (n > ST32_MAX) {
if (error) {
return NULL;
}
- pyc_object *ret = get_array_object_generic (buffer, n);
+ pyc_object *ret = get_array_object_generic (ctx, buffer, n);
if (ret) {
ret->type = TYPE_TUPLE;
}
return ret;
}
-static pyc_object *get_list_object(RBuffer *buffer) {
+static pyc_object *get_list_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
pyc_object *ret = NULL;
bool error = false;
ut32 n = get_ut32 (buffer, &error);
if (error) {
return NULL;
}
- ret = get_array_object_generic (buffer, n);
+ ret = get_array_object_generic (ctx, buffer, n);
if (ret) {
ret->type = TYPE_LIST;
return ret;
return NULL;
}
-static pyc_object *get_dict_object(RBuffer *buffer) {
+static pyc_object *get_dict_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
pyc_object *key = NULL,
*val = NULL;
return NULL;
}
for (;;) {
- key = get_object (buffer, 0);
+ key = get_object (ctx, buffer, 0);
if (!key) {
break;
}
free_object (key);
return NULL;
}
- val = get_object (buffer, 0);
+ val = get_object (ctx, buffer, 0);
if (!val) {
break;
}
return ret;
}
-static pyc_object *get_set_object(RBuffer *buffer) {
+static pyc_object *get_set_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
ut32 n = get_ut32 (buffer, &error);
if (n > ST32_MAX) {
if (error) {
return NULL;
}
- pyc_object *ret = get_array_object_generic (buffer, n);
+ pyc_object *ret = get_array_object_generic (ctx, buffer, n);
if (ret) {
ret->type = TYPE_SET;
}
return error? NULL: get_ascii_object_generic (buffer, n, true);
}
-static pyc_object *get_ref_object(RBuffer *buffer) {
+static pyc_object *get_ref_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
ut32 index = get_ut32 (buffer, &error);
if (error) {
return NULL;
}
- if (index >= r_list_length (Grefs)) {
+ if (index >= r_list_length (ctx->refs)) {
return NULL;
}
- pyc_object *obj = r_list_get_n (Grefs, index);
+ pyc_object *obj = r_list_get_n (ctx->refs, index);
return obj? copy_object (obj): NULL;
}
return copy;
}
-static pyc_object *get_code_object(RBuffer *buffer) {
+static pyc_object *get_code_object(PycUnmarshalCtx *ctx, RBuffer *buffer) {
bool error = false;
pyc_object *ret = R_NEW0 (pyc_object);
// support start from v1.0
ret->data = cobj;
- const char *ver = get_pyc_version (Gmagic).version;
+ const char *ver = get_pyc_version (ctx->magic).version;
bool v10_to_12 = magic_int_within (ver, "1.0.1", "1.2", &error);
bool v13_to_22 = magic_int_within (ver, "1.3b1", "2.2a1", &error);
bool v11_to_14 = magic_int_within (ver, "1.0.1", "1.4", &error);
cobj->posonlyargcount = 0; // None
}
- if (((3020 < (Gmagic & 0xffff)) && ((Gmagic & 0xffff) < 20121)) && (!v11_to_14)) {
+ if (((3020 < (ctx->magic & 0xffff)) && ((ctx->magic & 0xffff) < 20121)) && (!v11_to_14)) {
cobj->kwonlyargcount = get_ut32 (buffer, &error); // Not included in argcount
} else {
cobj->kwonlyargcount = 0;
// to help disassemble the code
// 1 from get_object() and 4 from get_string_object()
cobj->start_offset = r_buf_tell (buffer) + 5;
- if (!Grefs) {
+ if (!ctx->refs) {
return ret; //return for entried part to get the root object of this file
}
- cobj->code = get_object (buffer, 0);
+ cobj->code = get_object (ctx, buffer, 0);
cobj->end_offset = r_buf_tell (buffer);
- cobj->consts = get_object (buffer, 0);
- cobj->names = get_object (buffer, 0);
+ cobj->consts = get_object (ctx, buffer, 0);
+ cobj->names = get_object (ctx, buffer, 0);
if (v10_to_12) {
cobj->varnames = NULL;
} else {
- cobj->varnames = get_object (buffer, 0);
+ cobj->varnames = get_object (ctx, buffer, 0);
}
if (!(v10_to_12 || v13_to_20)) {
- cobj->freevars = get_object (buffer, 0);
- cobj->cellvars = get_object (buffer, 0);
+ cobj->freevars = get_object (ctx, buffer, 0);
+ cobj->cellvars = get_object (ctx, buffer, 0);
} else {
cobj->freevars = NULL;
cobj->cellvars = NULL;
}
- cobj->filename = get_object (buffer, 0);
- cobj->name = get_object (buffer, 0);
+ cobj->filename = get_object (ctx, buffer, 0);
+ cobj->name = get_object (ctx, buffer, 0);
if (v15_to_22) {
cobj->firstlineno = get_ut16 (buffer, &error);
if (v11_to_14) {
cobj->lnotab = NULL;
} else {
- cobj->lnotab = get_object (buffer, 0);
+ cobj->lnotab = get_object (ctx, buffer, 0);
}
if (error) {
}
ut64 get_code_object_addr(RBuffer *buffer, ut32 magic) {
- Gmagic = magic;
- pyc_object *co = get_code_object (buffer);
+ // Create a temporary context for entrypoint detection
+ // refs is NULL so get_code_object will return early with just start_offset
+ PycUnmarshalCtx ctx = {0};
+ ctx.magic = magic;
+ ctx.refs = NULL;
+ pyc_object *co = get_code_object (&ctx, buffer);
if (co) {
pyc_code_object *cobj = co->data;
ut64 result = cobj->start_offset;
return 0;
}
-static pyc_object *get_object(RBuffer *buffer, int wanted_type) {
+static pyc_object *get_object(PycUnmarshalCtx *ctx, RBuffer *buffer, int wanted_type) {
bool error = false;
pyc_object *ret = NULL;
ut8 code = get_ut8 (buffer, &error);
if (flag) {
pyc_object *noneret = get_none_object ();
if (noneret) {
- ref_idx = r_list_append (Grefs, noneret);
+ ref_idx = r_list_append (ctx->refs, noneret);
}
}
if (wanted_type != 0) {
return get_none_object ();
case TYPE_REF:
free_object (ret);
- return get_ref_object (buffer);
+ return get_ref_object (ctx, buffer);
case TYPE_SMALL_TUPLE:
- ret = get_small_tuple_object (buffer);
+ ret = get_small_tuple_object (ctx, buffer);
break;
case TYPE_TUPLE:
- ret = get_tuple_object (buffer);
+ ret = get_tuple_object (ctx, buffer);
break;
case TYPE_STRING:
ret = get_string_object (buffer);
break;
case TYPE_CODE_v0:
- ret = get_code_object (buffer);
+ ret = get_code_object (ctx, buffer);
if (ret) {
ret->type = TYPE_CODE_v0;
}
break;
case TYPE_CODE_v1:
- ret = get_code_object (buffer);
+ ret = get_code_object (ctx, buffer);
if (ret) {
ret->type = TYPE_CODE_v1;
}
ret = get_int64_object (buffer);
break;
case TYPE_INTERNED:
- ret = get_interned_object (buffer);
+ ret = get_interned_object (ctx, buffer);
break;
case TYPE_STRINGREF:
- ret = get_stringref_object (buffer);
+ ret = get_stringref_object (ctx, buffer);
break;
case TYPE_FLOAT:
ret = get_float_object (buffer);
ret = get_binary_float_object (buffer);
break;
case TYPE_COMPLEX:
- ret = get_complex_object (buffer); // behaviour depends on Python version
+ ret = get_complex_object (ctx, buffer); // behaviour depends on Python version
break;
case TYPE_BINARY_COMPLEX:
ret = get_binary_complex_object (buffer);
break;
case TYPE_LIST:
- ret = get_list_object (buffer);
+ ret = get_list_object (ctx, buffer);
break;
case TYPE_LONG:
ret = get_long_object (buffer);
ret = get_unicode_object (buffer);
break;
case TYPE_DICT:
- ret = get_dict_object (buffer);
+ ret = get_dict_object (ctx, buffer);
break;
case TYPE_FROZENSET:
case TYPE_SET:
- ret = get_set_object (buffer);
+ ret = get_set_object (ctx, buffer);
break;
case TYPE_STOPITER:
case TYPE_ELLIPSIS:
break;
case TYPE_UNKNOWN:
R_LOG_DEBUG ("Get not implemented for type 0x%x", type);
- // r_list_pop (Grefs);
free_object (ret);
return NULL;
case 0:
break;
default:
R_LOG_DEBUG ("Undefined type in get_object (0x%x)", type);
- // r_list_pop (Grefs);
return NULL;
}
if (!ret) {
ret = get_none_object ();
if (ret) {
- r_list_append (Grefs, ret);
+ r_list_append (ctx->refs, ret);
}
}
return ret;
}
-static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *symbols, RList *cobjs, char *prefix) {
+static bool extract_sections_symbols(PycUnmarshalCtx *ctx, pyc_object *obj, RList *sections, RList *symbols, RList *cobjs, char *prefix) {
RListIter *i = NULL;
// each code object is a section
symbol->size = cobj->end_offset - cobj->start_offset;
symbol->vaddr = cobj->start_offset;
symbol->paddr = cobj->start_offset;
- symbol->ordinal = Gscount++;
+ symbol->ordinal = ctx->scount++;
if (cobj->consts->type != TYPE_TUPLE && cobj->consts->type != TYPE_SMALL_TUPLE) {
goto fail2;
}
goto fail2;
}
r_list_foreach (((RList *)(cobj->consts->data)), i, obj) {
- extract_sections_symbols (obj, sections, symbols, cobjs, prefix);
+ extract_sections_symbols (ctx, obj, sections, symbols, cobjs, prefix);
}
free (prefix);
return true;
return false;
}
-bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *cobjs, ut32 magic) {
- Gmagic = magic;
- Grefs = r_list_newf (NULL); // (RListFree)free_object);
+bool get_sections_symbols_from_code_objects(PycUnmarshalCtx *ctx, RBuffer *buffer, RList *sections, RList *symbols, RList *cobjs) {
+ if (!ctx) {
+ return false;
+ }
+ ctx->refs = r_list_newf (NULL); // (RListFree)free_object);
bool ret = false;
- if (Grefs) {
- pyc_object *pobj = get_object (buffer, 0);
- ret = extract_sections_symbols (pobj, sections, symbols, cobjs, NULL);
- r_list_free (Grefs);
- Grefs = NULL;
+ if (ctx->refs) {
+ pyc_object *pobj = get_object (ctx, buffer, 0);
+ ret = extract_sections_symbols (ctx, pobj, sections, symbols, cobjs, NULL);
+ r_list_free (ctx->refs);
+ ctx->refs = NULL;
}
return ret;
}
-/* radare - LGPL3 - Copyright 2016-2023 - c0riolis, x0urc3 */
+/* radare - LGPL3 - Copyright 2016-2025 - c0riolis, x0urc3 */
#include <r_bin.h>
#include "../format/pyc/pyc.h"
-static R_TH_LOCAL ut64 code_start_offset = 0;
-static R_TH_LOCAL struct pyc_version version;
-static R_TH_LOCAL RList *sections_cache = NULL;
-RList R_TH_LOCAL *interned_table = NULL; // used from marshall.c
-
static bool check(RBinFile *bf, RBuffer *b) {
if (r_buf_size (b) > 4) {
ut32 buf;
r_buf_read_at (b, 0, (ut8 *)&buf, sizeof (buf));
- version = get_pyc_version (buf);
- return version.magic != -1;
+ struct pyc_version v = get_pyc_version (buf);
+ return v.magic != -1;
}
return false;
}
static bool load(RBinFile *bf, RBuffer *buf, ut64 loadaddr) {
- return check (bf, buf);
+ if (!check (bf, buf)) {
+ return false;
+ }
+ ut32 m;
+ r_buf_read_at (buf, 0, (ut8 *)&m, sizeof (m));
+ RBinPycObj *obj = R_NEW0 (RBinPycObj);
+ if (!obj) {
+ return false;
+ }
+ obj->version = get_pyc_version (m);
+ bf->bo->bin_obj = obj;
+ return true;
}
-static ut64 get_entrypoint(RBuffer *buf) {
+static ut64 get_entrypoint(RBuffer *buf, ut32 magic, ut64 *out_code_start_offset) {
ut8 b;
ut64 result;
int addr;
for (addr = 0x8; addr <= 0x10; addr += 0x4) {
r_buf_read_at (buf, addr, &b, sizeof (b));
- if (pyc_is_code (b, version.magic)) {
- code_start_offset = addr;
+ if (pyc_is_code (b, magic)) {
+ if (out_code_start_offset) {
+ *out_code_start_offset = addr;
+ }
r_buf_seek (buf, addr + 1, R_BUF_SET);
- if ((result = get_code_object_addr (buf, version.magic)) == 0) {
+ if ((result = get_code_object_addr (buf, magic)) == 0) {
return addr;
}
return result;
}
static RBinInfo *info(RBinFile *arch) {
+ RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL;
RBinInfo *ret = R_NEW0 (RBinInfo);
if (!ret) {
return NULL;
}
ret->file = strdup (arch->file);
- ret->type = r_str_newf ("Python %s byte-compiled file", version.version);
+ ret->type = r_str_newf ("Python %s byte-compiled file", obj ? obj->version.version : "");
ret->bclass = strdup ("Python byte-compiled file");
ret->rclass = strdup ("pyc");
ret->arch = strdup ("pyc");
- ret->machine = r_str_newf ("Python %s VM (rev %s)", version.version,
- version.revision);
+ ret->machine = r_str_newf ("Python %s VM (rev %s)", obj ? obj->version.version : "",
+ obj ? obj->version.revision : "");
ret->os = strdup ("any");
ret->bits = 32; // TODO py_version_cmp (version.version, "3.6") >= 0? 32: 16;????
- ret->cpu = strdup (version.version); // pass version info in cpu, Asm plugin will get it
+ ret->cpu = strdup (obj ? obj->version.version : ""); // pass version info in cpu, Asm plugin will get it
return ret;
}
static RList *sections(RBinFile *arch) {
- return sections_cache;
+ RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL;
+ return obj ? obj->sections_cache : NULL;
}
static RList *entries(RBinFile *arch) {
+ RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL;
RList *entries = r_list_newf ((RListFree)free);
if (!entries) {
return NULL;
r_list_free (entries);
return NULL;
}
- ut64 entrypoint = get_entrypoint (arch->buf);
+ ut64 entrypoint = get_entrypoint (arch->buf, obj ? obj->version.magic : 0, obj ? &obj->code_start_offset : NULL);
addr->paddr = entrypoint;
addr->vaddr = entrypoint;
r_buf_seek (arch->buf, entrypoint, R_IO_SEEK_SET);
}
static RList *symbols(RBinFile *arch) {
- RList *shared = r_list_newf ((RListFree)r_list_free);
- if (!shared) {
+ RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL;
+ if (!obj) {
return NULL;
}
- RList *cobjs = r_list_newf ((RListFree)free);
- if (!cobjs) {
- r_list_free (shared);
- return NULL;
+ if (!obj->cobjs) {
+ obj->cobjs = r_list_newf ((RListFree)free);
+ if (!obj->cobjs) {
+ return NULL;
+ }
}
- interned_table = r_list_newf ((RListFree)free);
- if (!interned_table) {
- r_list_free (shared);
- r_list_free (cobjs);
- return NULL;
+ if (!obj->interned_table) {
+ obj->interned_table = r_list_newf ((RListFree)free);
+ if (!obj->interned_table) {
+ return NULL;
+ }
}
- r_list_append (shared, cobjs);
- r_list_append (shared, interned_table);
- arch->bo->bin_obj = shared;
- RList *sections = r_list_newf (NULL); // (RListFree)free);
+ RList *sections = r_list_newf (NULL); // keep old behavior; free on destroy if needed
if (!sections) {
- r_list_free (shared);
- arch->bo->bin_obj = NULL;
return NULL;
}
RList *symbols = r_list_newf ((RListFree)free);
if (!symbols) {
- r_list_free (shared);
- arch->bo->bin_obj = NULL;
r_list_free (sections);
return NULL;
}
RBuffer *buffer = arch->buf;
- r_buf_seek (buffer, code_start_offset, R_BUF_SET);
- pyc_get_sections_symbols (sections, symbols, cobjs, buffer, version.magic);
- sections_cache = sections;
+ if (!obj->code_start_offset) {
+ // ensure code_start_offset is initialized
+ (void) get_entrypoint (buffer, obj->version.magic, &obj->code_start_offset);
+ }
+ r_buf_seek (buffer, obj->code_start_offset, R_BUF_SET);
+ pyc_get_sections_symbols (sections, symbols, obj->cobjs, buffer, obj->version.magic, obj->interned_table);
+ obj->sections_cache = sections;
return symbols;
}
+static void destroy(RBinFile *bf) {
+ if (!bf || !bf->bo) {
+ return;
+ }
+ RBinPycObj *obj = (RBinPycObj *)bf->bo->bin_obj;
+ if (!obj) {
+ return;
+ }
+ r_list_free (obj->interned_table);
+ r_list_free (obj->cobjs);
+ // sections_cache is handled by RBin core
+ free (obj);
+ bf->bo->bin_obj = NULL;
+}
+
RBinPlugin r_bin_plugin_pyc = {
.meta = {
.name = "pyc",
.sections = §ions,
.baddr = &baddr,
.symbols = &symbols,
+ .destroy = &destroy,
};
#ifndef R2_PLUGIN_INCORE