Source code for androlyze.model.script.AndroScript


# encoding: utf-8

__author__ = "Nils Tobias Schmidt"
__email__ = "schmidt89 at informatik.uni-marburg.de"

from datetime import datetime

from androlyze.analyze.exception import AndroScriptError
from androlyze.log.Log import log
from androlyze.model.Resetable import Resetable
from androlyze.model.analysis.result.ResultObject import ResultObject
from androlyze.model.analysis.result.StaticResultKeys import *
from androlyze.model.script import ScriptUtil
from androlyze.storage.Constants import JSON_FILE_EXT
from androlyze.util.Util import timeit, sha256

[docs]class AndroScript(object, Resetable, Hashable):
    '''
    Base class for `androguard` scripts which offers a consistent way of logging the analysis results
    with the help of the :py:class:`~ResultObject`

    If you don't want json data as output you can use a different object for result logging.
    See :py:meth:`.AndroScript.custom_result_object`.

    Overwrite the `_analyze` function to write your custom script!
    Also set the options your script needs.

    See the methods prefixed with `needs`.

    Be sure to specify the script version with the `VERSION` variable!

    You can test your script with the `test` method.
    This helps to find errors and unregistered keys very fast.
    '''

    # Set your script version!
    VERSION = None

    def __init__(self):
        Hashable.__init__(self)

        self.__name = self.__class__.__name__

        self.reset()

    def __str__(self):
        if self.VERSION:
            return '%s %s' % (self.name, self.VERSION)
        return self.name

    def __repr__(self):
        return "%s" % (self.name)

    def __cmp__(self, other):
        if isinstance(other, AndroScript):
            return cmp(self.name, other.name)
        return 1

[docs]    def get_cres(self):
        return self.__cres

[docs]    def set_cres(self, value):
        self.__cres = value

[docs]    def del_cres(self):
        del self.__cres

[docs]    def get_res(self):
        return self.__res

[docs]    def set_res(self, value):
        self.__res = value

[docs]    def del_res(self):
        del self.__res

[docs]    def get_name(self):
        return self.__name

[docs]    def set_name(self, value):
        self.__name = value

[docs]    def del_name(self):
        del self.__name

[docs]    def get_file_name_ext(self):
        return self.__file_name_ext

[docs]    def set_file_name_ext(self, value):
        self.__file_name_ext = value

[docs]    def del_file_name_ext(self):
        del self.__file_name_ext

    file_name_ext = property(get_file_name_ext, set_file_name_ext, del_file_name_ext, "str, optional (default is `JSON_FILE_EXT`) : The file name extension.")
    res = property(get_res, set_res, del_res, "ResultObject : keeps the analysis results")
    cres = property(get_cres, set_cres, del_cres, "object, optional (default is None) : Custom result object for logging")
    name = property(get_name, set_name, del_name, "str : the name of the script (class name)")

[docs]    def analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        '''
        Analyze the given `EAndroApk` and return the `ResultObject`.

        Parameters
        ----------
        apk: EAndroApk
        dalvik_vm_format: DalvikVMFormat
            Parsed .dex file.
            Only available if `needs_dalvik_vm_format` returns True.
        vm_analysis: VMAnalysis
            Dex analyzer.
            Only available if `needs_vmanalysis` returns True.
        gvm_analysis : GVMAnalysis

        Other Parameters
        ----------------
        log_script_meta : bool, optional (default is True)
            Can be used to disable logging of script meta infos at all.
            Otherwise they will be logged only once.

        Returns
        -------
        ResultObject

        Raises
        ------
        NotImplementedError
            If `AndroScript.VERSION` not specified
        '''
        res = self.res
        log_script_meta = kwargs.get("log_script_meta", True)

        if log_script_meta:
            self._log_script_meta_before_act_run(res)

        # analyze and measure time
        time_s = timeit(self._analyze,
                        *((apk, dalvik_vm_format, vm_analysis, gvm_analysis) +  args),
                         **kwargs)

        if log_script_meta and self.create_script_stats():
            self._log_script_meta_after_act_run(res, time_s)

        return self.res

    ############################################################
    #---Implement these functions in a subclass
    ############################################################

    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        '''
        Overwrite this function in apk subclass to build your own script!
        Use the `ResultObject` for logging.

        Parameters
        ----------
        apk: EAndroApk
        dalvik_vm_format: DalvikVMFormat
            Parsed .dex file.
            Only available if `needs_dalvik_vm_format` returns True.
        vm_analysis: VMAnalysis
            Dex analyzer.
            Only available if `needs_vmanalysis` returns True.
        gvm_analysis : GVMAnalysis
        '''
        raise NotImplementedError

[docs]    def custom_result_object(self):
        '''
        Overwrite this method, if you want to use your own result logging framework/object,
        You can supply it here and access it via `self.cres`.

        E.g. you could return ("", "txt") for simply logging with a string to a .txt file.

        The str representation of it will be stored!
        Automatically stores your data (str() of `self.cres`) in mongodb's gridfs.

        The `ResultObject` in `self.res` is still existing and internally used to log some meta information.

        Returns
        -------
        tuple<object, str>
            First argument is the result object you want to use,
            the second is the file name extension used for storage (without a leading point)
        '''
        raise NotImplementedError

[docs]    def reset(self):
        '''
        Reset the `AndroScript` so that it can be used for a new analysis.
        If you do a custom initialization in your script,
        you probably want do put the init code inside this method.

        Don't forget to call the super `reset` !
        '''
        # we need to (re)init the result object
        self.__res, self.__file_name_ext = ResultObject(None), JSON_FILE_EXT

        # custom result object
        try:
            self.__cres, self.__file_name_ext = self.custom_result_object()
        except NotImplementedError:
            self.__cres = None

    ############################################################
    #---Script requirements
    ############################################################

[docs]    def needs_dalvik_vm_format(self):
        ''' Gives access to the `DalvikVMFormat` object which is a parser for the classes.dex file '''
        return False

[docs]    def needs_vmanalysis(self):
        ''' Gives access to the `VMAnalysis` object which is a analyzer for the `DalvikVMFormat` object '''
        return False

[docs]    def needs_gvmanalysis(self):
        ''' Gives access to the `GVMAnalysis` object.
        Creates a graph which you can use for export (gexf etc) or do your custom stuff
        '''
        return False

[docs]    def needs_xref(self):
        ''' Create cross references. Automatically implies `needs_dalvik_vm_format`, `needs_vmanalysis` and `needs_gvmanalysis` '''
        return False

[docs]    def needs_dref(self):
        ''' Create data references. Automatically implies `needs_dalvik_vm_format`, `needs_vmanalysis` and `needs_gvmanalysis` '''
        return False

    ############################################################
    #---Options
    ############################################################

[docs]    def create_script_stats(self):
        ''' If true, create some script statistics and
        write them into the `ResultObject` '''
        return False

[docs]    def is_big_res(self):
        ''' Return true, if your result may exceed 16mb.
        This will store your data (str() of `self.cres`) in mongodb's gridfs.

        You don't need to return true, if you're using a different result object! (see :py:meth:`.custom_result_object`)
        This will be done automatically.
        '''
        return False

    ############################################################
    #---Testing stuff
    ############################################################

    @staticmethod
[docs]    def test(script, apk_paths):
        '''
        Use this function to develop and test your script.

        E.g. find unregistered keys and other errors.

        Parameters
        ----------
        script : type
            The reference to the script which shall be tested (not instantiated!)
        apk_paths : iterable<str>
            Paths to apks

        Examples
        --------
        >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]):
        ...     # get result object
        ...     print res
        ...     # get json
        ...     print res.write_to_json()

        Returns
        -------
        list<ResultObject>
            The `ResultObject` for every analyzed apk
        '''
        # no circular import
        from androlyze.analyze.Analyzer import Analyzer

        res = []
        try:
            # init scripts to get options
            inst_script_list = ScriptUtil.instantiate_scripts([script])
            script_options = ScriptUtil.get_minimum_script_options(inst_script_list)

            script_list = [script]
            # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths
            # but the analyzer needs the scripts uninitialized!
            ana = Analyzer(None, script_list, None, script_options, apk_paths)
            res = ana.analyze(test = True)
        except AndroScriptError as e:
            log.exception(e)

        return res

    ############################################################
    #---Script meta logging
    ############################################################

    def _log_script_meta_before_act_run(self, res, *args, **kwargs):
        ''' Log script meta infos before actual script run '''
        if self.VERSION is None:
            raise NotImplementedError("You need to define the version of your script!")

        res.register_keys([RESOBJ_SCRIPT_META_NAME, RESOBJ_SCRIPT_META_HASH, RESOBJ_SCRIPT_META_ANALYSIS_DATE, RESOBJ_SCRIPT_META_VERSION], RESOBJ_SCRIPT_META)
        res.log(RESOBJ_SCRIPT_META_NAME, self.name, RESOBJ_SCRIPT_META)
        res.log(self.KEY_HASH, self.hash, RESOBJ_SCRIPT_META)
        res.log(RESOBJ_SCRIPT_META_VERSION, self.VERSION, RESOBJ_SCRIPT_META)

        # add analysis date
        res.log(RESOBJ_SCRIPT_META_ANALYSIS_DATE, datetime.utcnow(), RESOBJ_SCRIPT_META)

    def _log_script_meta_after_act_run(self, res, time_s, *args, **kwargs):
        ''' Log script meta infos after actual script run '''
        # log time
        res.register_keys([RESOBJ_SCRIPT_META_TIME_SCRIPT], RESOBJ_SCRIPT_META)
        res.log(RESOBJ_SCRIPT_META_TIME_SCRIPT, time_s, RESOBJ_SCRIPT_META)

    ############################################################
    #---Other
    ############################################################

[docs]    def uses_custom_result_object(self):
        ''' Check if the script uses a custom result object for logging '''
        return self.cres is not None

[docs]    def add_apk_androguard_analyze_time(self, seconds):
        ''' Add the androguard analyze time to the `ResultObject`.
        This is also a good moment to calculate the complete time. '''
        if self.create_script_stats():
            res = self.res

            # log androguard open time
            res.register_keys([RESOBJ_SCRIPT_META_ANALYZE_TIME, RESOBJ_SCRIPT_META_TIME_TOTAL], RESOBJ_SCRIPT_META)
            res.log(RESOBJ_SCRIPT_META_ANALYZE_TIME, seconds, RESOBJ_SCRIPT_META)

            # log total time
            total_time = seconds + res[RESOBJ_SCRIPT_META][RESOBJ_SCRIPT_META_TIME_SCRIPT]
            res.log(RESOBJ_SCRIPT_META_TIME_TOTAL, total_time, RESOBJ_SCRIPT_META)

[docs]    def result_dict(self, gen_id = False):
        ''' Returns an `OrderedDict` holding information about the analyzed `Apk` as well as the script,
        as well as eventually user logged infos.

        Parameters
        ----------
        gen_id : bool, optional (default is False)
            Generate an id = sha256(apk hash + script name)
            and store it under the "_id" key.

        Returns
        -------
        OrderedDict
        '''
        res_dict = self.res.description_dict()
        if gen_id:
            res_dict[RESOBJ_ID] = self.gen_unique_id()
        return res_dict

[docs]    def gen_unique_id(self):
        ''' Generate an unique id = sha256(apk hash + script name) '''
        try:
            return sha256(self.res.apk.hash + self.name)
        except AttributeError:
            log.warn('Could not calculate unique id for %s', self)
            raise

[docs]    def get_file_name(self):
        ''' Get the file name used for storage '''
        apk = self.res.apk
        return '%s_%s_%s.%s' % (apk.package_name, apk.version_name, self.name, self.file_name_ext)

    @staticmethod
[docs]    def load_from_result_dict(res_dict, apk = None):
        '''
        Load an `AndroScript` from the `res_dict`.

        Parameters
        ----------
        res_dict : dict
            See `ResultObject.description_dict`
        apk : Apk, optional (default is None)
            Link to `ResultObject` to `apk`
        '''
        ascript = AndroScript()

        ascript.name = res_dict[RESOBJ_SCRIPT_META][RESOBJ_SCRIPT_META_NAME]
        ascript.hash = res_dict[RESOBJ_SCRIPT_META][RESOBJ_SCRIPT_META_HASH]
        result_object = ResultObject()
        result_object.results = res_dict
        ascript.res = result_object

        # link to apk
        ascript.res.apk = apk

        return ascript