working buildsystem; still cannot open PE32+ binaries with zipr

This commit is contained in:
Ludwig Lehnert 2025-03-04 19:08:59 +01:00
parent 30ea9163c9
commit 3f1cf080f2
19 changed files with 239 additions and 511 deletions

7
.gitignore vendored
View File

@ -1,6 +1,11 @@
*.o *.o
*.so *.so
*.a *.a
*.test
build/
plugins_install/ plugins_install/
.sconsign.dblite
.sconsign.dblite
peasoup_executable*/

View File

@ -1,36 +0,0 @@
import os
#
# import the environment and clone it so we can make changes.
#
Import('env')
irdb_env = env.Clone()
#
# These settings are recommended, but you can choose what you like
#
# be strict about syntax/warnings
irdb_env.Append(CXXFLAGS=" -Wall -Werror -fmax-errors=2 ")
# link against core and transform libraries
irdb_env.Append(LIBS=Split("irdb-core irdb-transform"))
# be able to include the SDK files
irdb_env.Append(CPPPATH=" $IRDB_SDK/include ")
# this is where the libraries are.
irdb_env.Append(LIBPATH=Split(" $IRDB_LIBS "))
# this is where to place plugins.
irdb_env.Replace(INSTALL_PATH=os.environ['PWD']+"/plugins_install")
#
# export the new environment for children sub-conscripts
#
Export('irdb_env')
#
# include the children sconscript files.
#
libs = irdb_env.SConscript("./transforms/SConscript")
#
# And we are done
#
Return('libs')

View File

@ -1,33 +0,0 @@
import os
env = Environment()
pwd = os.getcwd()
env.Replace(TRANSFORMS_HOME=os.path.join(pwd, "transforms"))
env.Replace(IRDB_SDK=os.path.join(pwd, "irdb-sdk"))
env.Replace(IRDB_LIBS=os.environ['IRDB_LIBS'])
env.Replace(debug=ARGUMENTS.get("debug", 0))
env.Append(CXXFLAGS=" -std=c++11 ")
env.Append(LINKFLAGS=" -Wl,-unresolved-symbols=ignore-in-shared-libs ")
if int(env['debug']) == 1:
env.Append(CFLAGS=" -g ")
env.Append(CXXFLAGS=" -g ")
env.Append(LINKFLAGS=" -g ")
env.Append(SHLINKFLAGS=" -g ")
else:
env.Append(CFLAGS=" -O ")
env.Append(CXXFLAGS=" -O ")
env.Append(LINKFLAGS=" -O ")
env.Append(SHLINKFLAGS=" -O ")
Export('env')
SConscript("SConscript", variant_dir='build')

View File

@ -1,36 +0,0 @@
import os
#
# import the environment and clone it so we can make changes.
#
Import('env')
irdb_env = env.Clone()
#
# These settings are recommended, but you can choose what you like
#
# be strict about syntax/warnings
irdb_env.Append(CXXFLAGS=" -Wall -Werror -fmax-errors=2 ")
# link against core and transform libraries
irdb_env.Append(LIBS=Split("irdb-core irdb-transform"))
# be able to include the SDK files
irdb_env.Append(CPPPATH=" $IRDB_SDK/include ")
# this is where the libraries are.
irdb_env.Append(LIBPATH=Split(" $IRDB_LIBS "))
# this is where to place plugins.
irdb_env.Replace(INSTALL_PATH=os.environ['PWD']+"/plugins_install")
#
# export the new environment for children sub-conscripts
#
Export('irdb_env')
#
# include the children sconscript files.
#
libs = irdb_env.SConscript("./transforms/SConscript")
#
# And we are done
#
Return('libs')

View File

@ -1,23 +0,0 @@
# import and create a copy of the environment so we don't change
# anyone else's env.
Import('irdb_env')
myenv = irdb_env.Clone()
#
# set input files and output program name
#
files = Glob(Dir('.').srcnode().abspath+"/*.cpp")
pgm_name = "initialize_stack.exe"
#
# build, install and return the program by default.
#
pgm = irdb_env.Program(pgm_name, files)
install = myenv.Install("$INSTALL_PATH/", pgm)
Default(install)
#
# and we're done
#
Return('install')

View File

@ -1,3 +1,7 @@
#!/bin/bash #!/bin/bash
podman run --rm -it -v $(pwd):/work:z -w /work git.zephyr-software.com:4567/opensrc/zipr/zipr-bin iagree sudo chmod -R 777 transforms/
podman run --rm -it -v $(pwd):/work:Z -w /work git.zephyr-software.com:4567/opensrc/zipr/zipr-bin iagree
# ./do-build
# pszr programs/compiled/JustOpen.exe JustOpen.test

145
do-build Executable file
View File

@ -0,0 +1,145 @@
#!/bin/env python3
import os
import hashlib
import base64
import argparse
DIR = os.path.dirname(os.path.realpath(__file__))
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--force', action='store_true')
return parser.parse_args()
args = parse_args()
CXX = "g++"
LDFLAGS = "-Wl,-unresolved-symbols=ignore-in-shared-libs -L/opt/ps_zipr/irdb-libs/lib"
CXXFLAGS = f"-g -I{DIR}/irdb-sdk/include -std=c++11 -Wall -Werror -fmax-errors=2 -fPIC"
def list_hash(list: list[str]):
md5 = hashlib.md5()
for item in list:
md5.update(item.encode())
return md5.hexdigest()
def file_hash(file: str):
BUFSIZE = 65536
md5 = hashlib.md5()
with open(file, 'rb') as f:
data = f.read(BUFSIZE)
while data:
md5.update(data)
data = f.read(BUFSIZE)
return md5.hexdigest()
def needs_rebuild(file: str, libs: list[str] = []) -> bool:
return True
global args
hash = file_hash(file)
hash += list_hash(libs)
abspath = os.path.abspath(file)
b64path = os.path.join('/tmp', base64.b64encode(abspath.encode()).decode())
if not os.path.exists(b64path) or args.force:
return True
with open(b64path, 'r') as f:
return f.read() != hash
def signal_built(file: str, libs: list[str] = []) -> bool:
return
hash = file_hash(file)
hash += list_hash(libs)
abspath = os.path.abspath(file)
b64path = os.path.join('/tmp', base64.b64encode(abspath.encode()).decode())
with open(b64path, 'w') as f:
f.write(hash)
def build(targetObject: str, cppFile: str):
if not needs_rebuild(cppFile):
return
global CXXFLAGS
cmd = f'{CXX} -c "{cppFile}" -o "{targetObject}" {CXXFLAGS}'
print(cmd)
status = os.system(cmd)
if status == 0:
signal_built(cppFile)
return True
return False
def link(target: str, objects: list[str], libs: list[str]):
global LDFLAGS
cmd = f'{CXX} -shared -o {target}'
for object in objects:
cmd += f' {object}'
for lib in libs:
cmd += f' -l{lib}'
cmd += f' {LDFLAGS}'
print(cmd)
return os.system(cmd) == 0
def main():
for dir in os.listdir(f'{DIR}/transforms'):
path = os.path.join(f'{DIR}/transforms', dir)
if not os.path.isdir(path):
continue
libs: list[str] = []
if os.path.exists(os.path.join(path, '.libs')):
with open(os.path.join(path, '.libs'), 'r') as f:
libs = f.read().split()
libs = filter(lambda l: len(l.strip()) > 0, libs)
libs = list(libs)
hadError = False
objects: list[str] = []
for file in os.listdir(path):
if not file.endswith('.cpp'):
continue
filePath = os.path.join(path, file)
objectPath = os.path.splitext(filePath)[0] + '.o'
newError = not build(objectPath, filePath)
hadError = hadError or newError
objects += [objectPath]
if hadError:
continue
targetDir = os.path.join(DIR, 'plugins_install')
os.makedirs(targetDir, exist_ok=True)
target = os.path.join(targetDir, f'lib{dir}.so')
link(target, objects, libs)
if __name__ == '__main__':
main()

Binary file not shown.

View File

@ -0,0 +1,22 @@
#include <windows.h>
#include <commdlg.h>
#include <stdio.h>
int main() {
char file[MAX_PATH] = {0};
OPENFILENAME ofn = {
.lStructSize = sizeof(ofn),
.lpstrFilter = "All Files\0*.*\0",
.lpstrFile = file,
.nMaxFile = MAX_PATH,
.lpstrTitle = "Select File",
.Flags = OFN_FILEMUSTEXIST | OFN_PATHMUSTEXIST
};
int res = GetOpenFileName(&ofn);
if (res) printf("%s\n", file);
return res != 0;
}

View File

@ -1 +1 @@
export PSPATH=$PSPATH:$TRANSFORMS_HOME/plugins_install export PSPATH=$PSPATH:$(pwd)/plugins_install

View File

@ -1,23 +0,0 @@
# import and create a copy of the environment so we don't change
# anyone else's env.
Import('irdb_env')
myenv = irdb_env.Clone()
#
# set input files and output program name
#
files = Glob(Dir('.').srcnode().abspath+"/*.cpp")
pgm_name = "initialize_stack.exe"
#
# build, install and return the program by default.
#
pgm = irdb_env.Program(pgm_name, files)
install = myenv.Install("$INSTALL_PATH/", pgm)
Default(install)
#
# and we're done
#
Return('install')

View File

@ -0,0 +1 @@
irdb-core irdb-transform

View File

@ -0,0 +1,23 @@
#include "irdb-core"
#include "logic.hpp"
#include <memory>
#include <vector>
class ForwardFileOpenDriver : public TransformStep_t {
public:
int parseArgs(const vector<string> args) override { return 0; }
int executeStep() override {
auto firp = getMainFileIR();
auto success = ForwardFileOpen(firp).execute();
return success ? 0 : 2;
}
string getStepName() const override { return "forward_file_open"; }
private:
};
extern "C" shared_ptr<TransformStep_t> getTransformStep(void) {
return shared_ptr<TransformStep_t>(new ForwardFileOpenDriver());
}

View File

@ -0,0 +1,17 @@
#include "logic.hpp"
#include "irdb-transform"
ForwardFileOpen::ForwardFileOpen(FileIR_t *p_variantIR)
: Transform_t(p_variantIR) {}
bool ForwardFileOpen::execute() {
const auto insts = getFileIR()->getInstructions();
cout << "Hello World!" << std::endl;
for (auto &inst : insts) {
cout << inst->getDisassembly() << std::endl;
}
return true;
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <irdb-core>
#include <irdb-deep>
#include <irdb-transform>
#include <iostream>
#include <memory>
using namespace std;
using namespace IRDB_SDK;
class ForwardFileOpen : protected Transform_t {
public:
ForwardFileOpen(FileIR_t *p_variantIR);
bool execute();
private:
};

View File

@ -1,147 +0,0 @@
#include "initialize_stack.hpp"
#include <algorithm>
#include <fstream>
#include <math.h>
#define ALLOF(a) begin(a), end(a)
using namespace std;
using namespace IRDB_SDK;
using namespace InitStack;
//
// constructor
//
InitStack_t::InitStack_t(FileIR_t *p_variantIR,
const string &p_functions_filename, int p_init_value,
bool p_verbose)
: Transform_t(p_variantIR), // initialize the Transform class so things like
// insertAssembly and getFileIR() can be used
m_init_value(p_init_value), // member variable inits, these will vary
// depending on your transform's objectives
m_verbose(p_verbose), m_num_transformed(0) {
// check whether to read in a list of functions to transform
if (p_functions_filename == "") {
cout << "Auto-initialize all functions" << endl;
m_funcs_to_init =
getFileIR()->getFunctions(); // use all functions from the IR
} else {
cout << "Auto-initialize functions specified in: " << p_functions_filename
<< endl;
readFunctionsFromFile(p_functions_filename); // read functions from file
}
}
//
// read list of functions to auto-initialize
//
// post conditions: set of functions to auto-initialize
//
void InitStack_t::readFunctionsFromFile(const string &p_filename) {
// get all functions for readability of the rest of the code
const auto &all_funcs = getFileIR()->getFunctions();
// open input file and check for successful open
ifstream functionsFile(
p_filename); // can't use auto decl here because of lack of copy
// constructor in ifstream class
if (!functionsFile.is_open())
throw runtime_error("Cannot open " + p_filename);
// read each line of the input file.
auto line = string();
while (functionsFile >> line) {
// locate a function with the name read from the file.
const auto func_it = find_if(ALLOF(all_funcs), [&](const Function_t *f) {
return f->getName() == line;
});
// if found, log and insert it into the set to transform
if (func_it != end(all_funcs)) {
auto f = *func_it;
cout << "Adding " << f->getName() << " to function list" << endl;
m_funcs_to_init.insert(f);
}
}
}
//
// Execute the transform by transforming all to-transform functions
//
// preconditions: the FileIR is read as from the IRDB. valid file listing
// functions to auto-initialize postcondition: instructions added to
// auto-initialize stack for each specified function
//
//
bool InitStack_t::execute() {
// transform all functions
for (auto f : m_funcs_to_init)
initStack(f);
// #ATTRIBUTE is a convention used to help find useful information in log
// files
cout << "#ATTRIBUTE InitStack::num_transformed=" << m_num_transformed << endl;
return m_num_transformed > 0; // true means success
}
//
// preconditions : f is not NULL
// postconditions: stack auto-initialized if stack frame size > 0
//
void InitStack_t::initStack(Function_t *f) {
// preconditions
assert(f != nullptr);
// check that the frame size is in the area we care about
const auto frame_size = f->getStackFrameSize();
// nothing to init.
if (frame_size == 0)
return;
const auto num_locs = static_cast<uint64_t>(ceil(frame_size / 4.0));
// debug output
cout << "Function: " << f->getName()
<< " frame size: " << f->getStackFrameSize() << endl;
// not all functions have an entry point
const auto entry = f->getEntryPoint();
if (!entry)
return;
// log what we are doing
cout << "Function: " << f->getName() << " auto-initialize " << dec << num_locs
<< " stack memory locations (4 bytes at a time) with value = " << hex
<< m_init_value << endl;
// determine the registers to use on x86-32 or x86-64
const auto sp_reg =
getFileIR()->getArchitectureBitWidth() == 64 ? "rsp" : "esp";
const auto scratch_reg =
getFileIR()->getArchitectureBitWidth() == 64 ? "r11" : "ecx";
// Now, do the dirty work of inserting new assembly to initialize the stack.
// Insert these instructions at the start of the function (to initialize the
// stack frame before the function runs) Assume: flags are dead at function
// entry. Future work: Verify this is true using dead register list. Note: we
// spill a scratch register into the red zone at 120 bytes passed the end of
// the frame
const auto newInsns = insertAssemblyInstructionsBefore(
entry,
string() + " mov [%%1 + %%2], %%3\n"
" mov %%3, -%%4\n"
"L1: mov dword [%%1 + %%3 * 4 - 4], %%5\n"
" inc %%3\n"
" jnz 0\n"
" mov %%3, [%%1 + %%2]\n",
{sp_reg, to_string(-f->getStackFrameSize() - 120), scratch_reg,
to_string(num_locs), to_string(m_init_value)});
newInsns[4]->setTarget(newInsns[2]); // Link jnz to L1.
// bump stats
m_num_transformed++;
}

View File

@ -1,61 +0,0 @@
#pragma once
#include <irdb-core>
#include <irdb-transform>
//
// Put the transform in its own namespace
// just to keep the header files easy to read.
// This is not an IRDB transform requirement, just good coding practice.
//
namespace InitStack {
using namespace std;
using namespace IRDB_SDK;
//
// This class handles initializing stack frames to a constant value
//
// Note: Using private inheritence here for "principle of minimum access",
// but you can choose what's best for your needs.
//
class InitStack_t : private Transform_t {
public:
// construct an object
InitStack_t(
FileIR_t *p_variantIR, // the FileIR object to transform
const string &p_function_filename, // the name of a file with functions to
// transform. "" -> no file and
// transform all functions
int init_value = 0, // the value to write when initializing the stack
bool p_verbose = false // use verbose logging?
);
// execute the transform
// input: m_funcs_to_init the set of functions to transform, the fileIR to
// transform output: the transformed fileIR, with extra instructions to init
// stack frames return value: true -> success, false -> fail
bool execute();
private:
// methods
// read in the given file full of function names to transform (called from
// constructor) input: the filename and FileIR to transform output:
// m_funcs_to_init with the functions listed in the file
void readFunctionsFromFile(const string &p_filename);
// initialize the stack for a given function
// input: the fileIR to transform
// output: the transformed fileIR
void initStack(Function_t *f);
// data
set<Function_t *> m_funcs_to_init; // the functions whose stacks this object
// should initialize
int m_init_value; // the value with which to init the stack.
bool m_verbose; // do verbose logging
int m_num_transformed; // stats about how many functions that this object has
// transformed
};
} // namespace InitStack

View File

@ -1,149 +0,0 @@
#include "initialize_stack.hpp"
#include <getopt.h>
using namespace std;
using namespace IRDB_SDK;
using namespace InitStack;
//
// Print usage info
//
void usage(char *p_name) {
cerr << "Usage: " << p_name << " <variant_id>\n";
cerr << "\t[--functions <file> | -f <file>] Read in the functions to "
"auto-initialize "
<< endl;
cerr << "\t[--initvalue <value> | -i <value>] Specify stack "
"initialization value (default=0)"
<< endl;
cerr << "\t[--verbose | -v] Verbose mode "
" "
<< endl;
cerr << "\t[--help,--usage,-?,-h] Display this message "
" "
<< endl;
}
//
// The entry point for a stand-alone executable transform.
// Note: Thanos-enabled transforms are easier to write, faster to execute, and
// generally preferred. Stand-alone transforms may be useful if the transform
// has issues with memory leaks and/or memory errors. Memory issues in a stand
// alone transform cannot affect correctness of other transforms.
//
int main(int argc, char **argv) {
//
// Sanity check that the command line has at least a variant ID, otherwise we
// won't know what variant to operate on.
//
if (argc < 2) {
usage(argv[0]);
exit(1);
}
// constant parameters read from argv
const auto program_name = string(argv[0]);
const auto variantID = atoi(argv[1]);
// initial values of parameters to parse
auto verbose = false;
auto funcs_filename = string();
auto init_value = 0;
// declare some options for the transform
const char *short_opts = "f:i:v?h";
struct option long_options[] = {{"functions", required_argument, 0, 'f'},
{"initvalue", required_argument, 0, 'i'},
{"verbose", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{"usage", no_argument, 0, '?'},
{0, 0, 0, 0}};
// parse the options in a standard getopts_long loop
while (true) {
int c = getopt_long(argc, argv, short_opts, long_options, nullptr);
if (c == -1)
break;
switch (c) {
case 'f':
funcs_filename = optarg;
cout << "Reading file with function specifiers: " << funcs_filename
<< endl;
break;
case 'i':
init_value = strtoll(optarg, NULL, 0);
cout << " Stack initialization value: " << hex << init_value << endl;
break;
case 'v':
verbose = true;
break;
case '?':
case 'h':
usage(argv[0]);
exit(1);
break;
default:
break;
}
}
// stand alone transforms must setup the interface to the sql server
auto pqxx_interface = pqxxDB_t::factory();
BaseObj_t::setInterface(pqxx_interface.get());
// stand alone transforms must create and read a variant ID from the database
auto pidp = VariantID_t::factory(variantID);
assert(pidp->isRegistered() == true);
// stand alone transforms must create and read the main file's IR from the
// datatbase
auto this_file = pidp->getMainFile();
auto url = this_file->getURL();
// declare for later so we can return the right value
auto success = false;
// now try to load the IR and execute a transform
try {
// Create and download the file's IR.
// Note: this is achieved differently with thanos-enabled plugins
auto firp = FileIR_t::factory(pidp.get(), this_file);
// sanity
assert(firp && pidp);
// log
cout << "Transforming " << this_file->getURL() << endl;
// create and invoke the transform
InitStack_t is(firp.get(), funcs_filename, init_value, verbose);
success = is.execute();
// conditionally write the IR back to the database on success
if (success) {
cout << "Writing changes for " << url << endl;
// Stand alone trnasforms must manually write the IR back to the IRDB and
// commit the transactions
firp->writeToDB();
// and commit the the transaction to postgres
pqxx_interface->commit();
} else {
cout << "Skipping write back on failure. " << url << endl;
}
} catch (const DatabaseError_t &db_error) {
// log any databse errors that might come up in the transform process
cerr << program_name << ": Unexpected database error: " << db_error
<< "file url: " << url << endl;
} catch (...) {
// log any other errors
cerr << program_name << ": Unexpected error file url: " << url << endl;
}
//
// return success code to driver (as a shell-style return value). 0=success,
// 1=warnings, 2=errors
//
return success ? 0 : 2;
}