diff --git a/.clangd b/.clangd new file mode 100755 index 0000000..695f34f --- /dev/null +++ b/.clangd @@ -0,0 +1,3 @@ +CompileFlags: + Add: + - "-I/var/home/ludwig/git/windows-binary-fuzzing/irdb-sdk/include" diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..0b113c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.o +*.so +*.a + +plugins_install/ +.sconsign.dblite \ No newline at end of file diff --git a/.gitmodules b/.gitmodules old mode 100644 new mode 100755 diff --git a/SConscript b/SConscript new file mode 100755 index 0000000..21dc388 --- /dev/null +++ b/SConscript @@ -0,0 +1,36 @@ +import os + +# +# import the environment and clone it so we can make changes. +# +Import('env') +irdb_env = env.Clone() + +# +# These settings are recommended, but you can choose what you like +# +# be strict about syntax/warnings +irdb_env.Append(CXXFLAGS=" -Wall -Werror -fmax-errors=2 ") +# link against core and transform libraries +irdb_env.Append(LIBS=Split("irdb-core irdb-transform")) +# be able to include the SDK files +irdb_env.Append(CPPPATH=" $IRDB_SDK/include ") +# this is where the libraries are. +irdb_env.Append(LIBPATH=Split(" $IRDB_LIBS ")) +# this is where to place plugins. +irdb_env.Replace(INSTALL_PATH=os.environ['PWD']+"/plugins_install") + +# +# export the new environment for children sub-conscripts +# +Export('irdb_env') + +# +# include the children sconscript files. +# +libs = irdb_env.SConscript("./transforms/SConscript") + +# +# And we are done +# +Return('libs') diff --git a/SConstruct b/SConstruct new file mode 100755 index 0000000..a4ff8be --- /dev/null +++ b/SConstruct @@ -0,0 +1,33 @@ +import os + + +env = Environment() + +pwd = os.getcwd() + + +env.Replace(TRANSFORMS_HOME=os.path.join(pwd, "transforms")) +env.Replace(IRDB_SDK=os.path.join(pwd, "irdb-sdk")) +env.Replace(IRDB_LIBS=os.environ['IRDB_LIBS']) + + +env.Replace(debug=ARGUMENTS.get("debug", 0)) + +env.Append(CXXFLAGS=" -std=c++11 ") +env.Append(LINKFLAGS=" -Wl,-unresolved-symbols=ignore-in-shared-libs ") + + +if int(env['debug']) == 1: + env.Append(CFLAGS=" -g ") + env.Append(CXXFLAGS=" -g ") + env.Append(LINKFLAGS=" -g ") + env.Append(SHLINKFLAGS=" -g ") +else: + env.Append(CFLAGS=" -O ") + env.Append(CXXFLAGS=" -O ") + env.Append(LINKFLAGS=" -O ") + env.Append(SHLINKFLAGS=" -O ") + + +Export('env') +SConscript("SConscript", variant_dir='build') diff --git a/build/SConscript b/build/SConscript new file mode 100755 index 0000000..21dc388 --- /dev/null +++ b/build/SConscript @@ -0,0 +1,36 @@ +import os + +# +# import the environment and clone it so we can make changes. +# +Import('env') +irdb_env = env.Clone() + +# +# These settings are recommended, but you can choose what you like +# +# be strict about syntax/warnings +irdb_env.Append(CXXFLAGS=" -Wall -Werror -fmax-errors=2 ") +# link against core and transform libraries +irdb_env.Append(LIBS=Split("irdb-core irdb-transform")) +# be able to include the SDK files +irdb_env.Append(CPPPATH=" $IRDB_SDK/include ") +# this is where the libraries are. +irdb_env.Append(LIBPATH=Split(" $IRDB_LIBS ")) +# this is where to place plugins. +irdb_env.Replace(INSTALL_PATH=os.environ['PWD']+"/plugins_install") + +# +# export the new environment for children sub-conscripts +# +Export('irdb_env') + +# +# include the children sconscript files. +# +libs = irdb_env.SConscript("./transforms/SConscript") + +# +# And we are done +# +Return('libs') diff --git a/build/transforms/SConscript b/build/transforms/SConscript new file mode 100755 index 0000000..6ed2177 --- /dev/null +++ b/build/transforms/SConscript @@ -0,0 +1,23 @@ +# import and create a copy of the environment so we don't change +# anyone else's env. +Import('irdb_env') +myenv = irdb_env.Clone() + +# +# set input files and output program name +# +files = Glob(Dir('.').srcnode().abspath+"/*.cpp") +pgm_name = "initialize_stack.exe" + +# +# build, install and return the program by default. +# +pgm = irdb_env.Program(pgm_name, files) +install = myenv.Install("$INSTALL_PATH/", pgm) +Default(install) + + +# +# and we're done +# +Return('install') diff --git a/build/transforms/initialize_stack.exe b/build/transforms/initialize_stack.exe new file mode 100755 index 0000000..a4e8cb6 Binary files /dev/null and b/build/transforms/initialize_stack.exe differ diff --git a/container b/container new file mode 100755 index 0000000..779b732 --- /dev/null +++ b/container @@ -0,0 +1,3 @@ +#!/bin/bash + +podman run --rm -it -v $(pwd):/work:z -w /work git.zephyr-software.com:4567/opensrc/zipr/zipr-bin iagree diff --git a/set_env_vars b/set_env_vars new file mode 100755 index 0000000..636f5f2 --- /dev/null +++ b/set_env_vars @@ -0,0 +1 @@ +export PSPATH=$PSPATH:$TRANSFORMS_HOME/plugins_install diff --git a/transforms/SConscript b/transforms/SConscript new file mode 100755 index 0000000..6ed2177 --- /dev/null +++ b/transforms/SConscript @@ -0,0 +1,23 @@ +# import and create a copy of the environment so we don't change +# anyone else's env. +Import('irdb_env') +myenv = irdb_env.Clone() + +# +# set input files and output program name +# +files = Glob(Dir('.').srcnode().abspath+"/*.cpp") +pgm_name = "initialize_stack.exe" + +# +# build, install and return the program by default. +# +pgm = irdb_env.Program(pgm_name, files) +install = myenv.Install("$INSTALL_PATH/", pgm) +Default(install) + + +# +# and we're done +# +Return('install') diff --git a/transforms/initialize_stack.cpp b/transforms/initialize_stack.cpp new file mode 100755 index 0000000..34da995 --- /dev/null +++ b/transforms/initialize_stack.cpp @@ -0,0 +1,147 @@ +#include "initialize_stack.hpp" +#include +#include +#include + +#define ALLOF(a) begin(a), end(a) + +using namespace std; +using namespace IRDB_SDK; +using namespace InitStack; + +// +// constructor +// +InitStack_t::InitStack_t(FileIR_t *p_variantIR, + const string &p_functions_filename, int p_init_value, + bool p_verbose) + : Transform_t(p_variantIR), // initialize the Transform class so things like + // insertAssembly and getFileIR() can be used + m_init_value(p_init_value), // member variable inits, these will vary + // depending on your transform's objectives + m_verbose(p_verbose), m_num_transformed(0) { + // check whether to read in a list of functions to transform + if (p_functions_filename == "") { + cout << "Auto-initialize all functions" << endl; + m_funcs_to_init = + getFileIR()->getFunctions(); // use all functions from the IR + } else { + cout << "Auto-initialize functions specified in: " << p_functions_filename + << endl; + readFunctionsFromFile(p_functions_filename); // read functions from file + } +} + +// +// read list of functions to auto-initialize +// +// post conditions: set of functions to auto-initialize +// +void InitStack_t::readFunctionsFromFile(const string &p_filename) { + // get all functions for readability of the rest of the code + const auto &all_funcs = getFileIR()->getFunctions(); + + // open input file and check for successful open + ifstream functionsFile( + p_filename); // can't use auto decl here because of lack of copy + // constructor in ifstream class + if (!functionsFile.is_open()) + throw runtime_error("Cannot open " + p_filename); + + // read each line of the input file. + auto line = string(); + while (functionsFile >> line) { + // locate a function with the name read from the file. + const auto func_it = find_if(ALLOF(all_funcs), [&](const Function_t *f) { + return f->getName() == line; + }); + + // if found, log and insert it into the set to transform + if (func_it != end(all_funcs)) { + auto f = *func_it; + cout << "Adding " << f->getName() << " to function list" << endl; + m_funcs_to_init.insert(f); + } + } +} + +// +// Execute the transform by transforming all to-transform functions +// +// preconditions: the FileIR is read as from the IRDB. valid file listing +// functions to auto-initialize postcondition: instructions added to +// auto-initialize stack for each specified function +// +// +bool InitStack_t::execute() { + // transform all functions + for (auto f : m_funcs_to_init) + initStack(f); + + // #ATTRIBUTE is a convention used to help find useful information in log + // files + cout << "#ATTRIBUTE InitStack::num_transformed=" << m_num_transformed << endl; + + return m_num_transformed > 0; // true means success +} + +// +// preconditions : f is not NULL +// postconditions: stack auto-initialized if stack frame size > 0 +// +void InitStack_t::initStack(Function_t *f) { + // preconditions + assert(f != nullptr); + + // check that the frame size is in the area we care about + const auto frame_size = f->getStackFrameSize(); + + // nothing to init. + if (frame_size == 0) + return; + + const auto num_locs = static_cast(ceil(frame_size / 4.0)); + + // debug output + cout << "Function: " << f->getName() + << " frame size: " << f->getStackFrameSize() << endl; + + // not all functions have an entry point + const auto entry = f->getEntryPoint(); + if (!entry) + return; + + // log what we are doing + cout << "Function: " << f->getName() << " auto-initialize " << dec << num_locs + << " stack memory locations (4 bytes at a time) with value = " << hex + << m_init_value << endl; + + // determine the registers to use on x86-32 or x86-64 + const auto sp_reg = + getFileIR()->getArchitectureBitWidth() == 64 ? "rsp" : "esp"; + const auto scratch_reg = + getFileIR()->getArchitectureBitWidth() == 64 ? "r11" : "ecx"; + + // Now, do the dirty work of inserting new assembly to initialize the stack. + // Insert these instructions at the start of the function (to initialize the + // stack frame before the function runs) Assume: flags are dead at function + // entry. Future work: Verify this is true using dead register list. Note: we + // spill a scratch register into the red zone at 120 bytes passed the end of + // the frame + + const auto newInsns = insertAssemblyInstructionsBefore( + entry, + string() + " mov [%%1 + %%2], %%3\n" + " mov %%3, -%%4\n" + "L1: mov dword [%%1 + %%3 * 4 - 4], %%5\n" + " inc %%3\n" + " jnz 0\n" + " mov %%3, [%%1 + %%2]\n", + {sp_reg, to_string(-f->getStackFrameSize() - 120), scratch_reg, + to_string(num_locs), to_string(m_init_value)}); + + newInsns[4]->setTarget(newInsns[2]); // Link jnz to L1. + + // bump stats + m_num_transformed++; +} diff --git a/transforms/initialize_stack.hpp b/transforms/initialize_stack.hpp new file mode 100755 index 0000000..03f0628 --- /dev/null +++ b/transforms/initialize_stack.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +// +// Put the transform in its own namespace +// just to keep the header files easy to read. +// This is not an IRDB transform requirement, just good coding practice. +// +namespace InitStack { +using namespace std; +using namespace IRDB_SDK; + +// +// This class handles initializing stack frames to a constant value +// +// Note: Using private inheritence here for "principle of minimum access", +// but you can choose what's best for your needs. +// +class InitStack_t : private Transform_t { +public: + // construct an object + InitStack_t( + FileIR_t *p_variantIR, // the FileIR object to transform + const string &p_function_filename, // the name of a file with functions to + // transform. "" -> no file and + // transform all functions + int init_value = 0, // the value to write when initializing the stack + bool p_verbose = false // use verbose logging? + ); + + // execute the transform + // input: m_funcs_to_init the set of functions to transform, the fileIR to + // transform output: the transformed fileIR, with extra instructions to init + // stack frames return value: true -> success, false -> fail + bool execute(); + +private: + // methods + + // read in the given file full of function names to transform (called from + // constructor) input: the filename and FileIR to transform output: + // m_funcs_to_init with the functions listed in the file + void readFunctionsFromFile(const string &p_filename); + + // initialize the stack for a given function + // input: the fileIR to transform + // output: the transformed fileIR + void initStack(Function_t *f); + + // data + set m_funcs_to_init; // the functions whose stacks this object + // should initialize + int m_init_value; // the value with which to init the stack. + bool m_verbose; // do verbose logging + int m_num_transformed; // stats about how many functions that this object has + // transformed +}; + +} // namespace InitStack diff --git a/transforms/initialize_stack_driver.cpp b/transforms/initialize_stack_driver.cpp new file mode 100755 index 0000000..5cbe0d2 --- /dev/null +++ b/transforms/initialize_stack_driver.cpp @@ -0,0 +1,149 @@ +#include "initialize_stack.hpp" +#include + +using namespace std; +using namespace IRDB_SDK; +using namespace InitStack; + +// +// Print usage info +// +void usage(char *p_name) { + cerr << "Usage: " << p_name << " \n"; + cerr << "\t[--functions | -f ] Read in the functions to " + "auto-initialize " + << endl; + cerr << "\t[--initvalue | -i ] Specify stack " + "initialization value (default=0)" + << endl; + cerr << "\t[--verbose | -v] Verbose mode " + " " + << endl; + cerr << "\t[--help,--usage,-?,-h] Display this message " + " " + << endl; +} + +// +// The entry point for a stand-alone executable transform. +// Note: Thanos-enabled transforms are easier to write, faster to execute, and +// generally preferred. Stand-alone transforms may be useful if the transform +// has issues with memory leaks and/or memory errors. Memory issues in a stand +// alone transform cannot affect correctness of other transforms. +// +int main(int argc, char **argv) { + // + // Sanity check that the command line has at least a variant ID, otherwise we + // won't know what variant to operate on. + // + if (argc < 2) { + usage(argv[0]); + exit(1); + } + + // constant parameters read from argv + const auto program_name = string(argv[0]); + const auto variantID = atoi(argv[1]); + + // initial values of parameters to parse + auto verbose = false; + auto funcs_filename = string(); + auto init_value = 0; + + // declare some options for the transform + const char *short_opts = "f:i:v?h"; + struct option long_options[] = {{"functions", required_argument, 0, 'f'}, + {"initvalue", required_argument, 0, 'i'}, + {"verbose", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"usage", no_argument, 0, '?'}, + {0, 0, 0, 0}}; + + // parse the options in a standard getopts_long loop + while (true) { + int c = getopt_long(argc, argv, short_opts, long_options, nullptr); + if (c == -1) + break; + switch (c) { + case 'f': + funcs_filename = optarg; + cout << "Reading file with function specifiers: " << funcs_filename + << endl; + break; + case 'i': + init_value = strtoll(optarg, NULL, 0); + cout << " Stack initialization value: " << hex << init_value << endl; + break; + case 'v': + verbose = true; + break; + case '?': + case 'h': + usage(argv[0]); + exit(1); + break; + default: + break; + } + } + + // stand alone transforms must setup the interface to the sql server + auto pqxx_interface = pqxxDB_t::factory(); + BaseObj_t::setInterface(pqxx_interface.get()); + + // stand alone transforms must create and read a variant ID from the database + auto pidp = VariantID_t::factory(variantID); + assert(pidp->isRegistered() == true); + + // stand alone transforms must create and read the main file's IR from the + // datatbase + auto this_file = pidp->getMainFile(); + auto url = this_file->getURL(); + + // declare for later so we can return the right value + auto success = false; + + // now try to load the IR and execute a transform + try { + // Create and download the file's IR. + // Note: this is achieved differently with thanos-enabled plugins + auto firp = FileIR_t::factory(pidp.get(), this_file); + + // sanity + assert(firp && pidp); + + // log + cout << "Transforming " << this_file->getURL() << endl; + + // create and invoke the transform + InitStack_t is(firp.get(), funcs_filename, init_value, verbose); + success = is.execute(); + + // conditionally write the IR back to the database on success + if (success) { + cout << "Writing changes for " << url << endl; + + // Stand alone trnasforms must manually write the IR back to the IRDB and + // commit the transactions + firp->writeToDB(); + + // and commit the the transaction to postgres + pqxx_interface->commit(); + } else { + cout << "Skipping write back on failure. " << url << endl; + } + } catch (const DatabaseError_t &db_error) { + // log any databse errors that might come up in the transform process + cerr << program_name << ": Unexpected database error: " << db_error + << "file url: " << url << endl; + } catch (...) { + // log any other errors + cerr << program_name << ": Unexpected error file url: " << url << endl; + } + + // + // return success code to driver (as a shell-style return value). 0=success, + // 1=warnings, 2=errors + // + return success ? 0 : 2; +}