Source code for androlyze.storage.resultdb.MongoUtil


# encoding: utf-8

__author__ = "Nils Tobias Schmidt"
__email__ = "schmidt89 at informatik.uni-marburg.de"

'''
Utility functions for mongodb.
'''

from androlyze.log.Log import log
from androlyze.model.script.ScriptUtil import dict2json
from androlyze.storage import Util
from gridfs.grid_file import GridOutCursor
from pymongo.cursor import Cursor

# in operator
MONGODB_IN_OPERATOR = "$in"

############################################################
#---MongoDB key escaping
############################################################

[docs]def escape_key(k): ''' Escape key `k` so that in conforms to mongodb's key restrictions. See Also -------- http://docs.mongodb.org/manual/faq/developers/#dollar-sign-operator-escaping ''' replaced_key = k DOT = '.' DOT_REPL = '_' DOLLAR = '$' DOLLAR_REPL = '_$' # replace dot if DOT in k: replaced_key = k.replace(DOT, DOT_REPL) # replace starting dollar if k.startswith(DOLLAR): replaced_key = k.replace(DOLLAR, DOLLAR_REPL, 1) return replaced_key
[docs]def escape_keys(_dict): ''' Escape the keys in the `_dict` so that the `_dict` can be inserted into mongodb. Will do a deepcopy of the `dict`! So escaping isn't in-place! Parameters ---------- _dict : dict Returns ------- dict ''' return Util.escape_dict(_dict, escape_key, escape_keys = True, escape_values = False)
############################################################ #---MongoDB query builder helper functions ############################################################
[docs]def get_attr_str(key, attr, gridfs = False): ''' Get the attribute string depending on `gridfs`''' from androlyze.storage.resultdb.ResultDatabaseStorage import GRIDFS_FILES_METADATA BASE = '%s.%s' % (key, attr) if gridfs: return '%s.%s' % (GRIDFS_FILES_METADATA, BASE) return BASE
[docs]def build_apk_meta_where(kwargs, gridfs = False): ''' Create where clause from `kwargs` for apk meta key ''' from androlyze.model.analysis.result.StaticResultKeys import RESOBJ_APK_META, \ RESOBJ_APK_META_PACKAGE_NAME, RESOBJ_APK_META_HASH, \ RESOBJ_APK_META_VERSION_NAME, RESOBJ_APK_META_TAG wheres = [] # get from kwargs # apk stuff package_name = kwargs.get("package_name", None) apk_hash = kwargs.get("apk_hash", None) version_name = kwargs.get("version_name", None) tag = kwargs.get("tag", None) def apk_meta_attr(attr): return get_attr_str(RESOBJ_APK_META, attr, gridfs) if package_name is not None: wheres += [(apk_meta_attr(RESOBJ_APK_META_PACKAGE_NAME), package_name)] if apk_hash is not None: wheres += [(apk_meta_attr(RESOBJ_APK_META_HASH), apk_hash)] if version_name is not None: wheres += [(apk_meta_attr(RESOBJ_APK_META_VERSION_NAME), version_name)] if tag is not None: wheres += [(apk_meta_attr(RESOBJ_APK_META_TAG), tag)] return wheres
[docs]def build_script_meta_where(kwargs, gridfs = False): ''' Create where clause from `kwargs` for script meta key ''' from androlyze.model.analysis.result.StaticResultKeys import RESOBJ_SCRIPT_META, \ RESOBJ_SCRIPT_META_HASH, RESOBJ_SCRIPT_META_NAME, RESOBJ_SCRIPT_META_VERSION wheres = [] # get from kwargs # script stuff script_hash = kwargs.get("script_hash", None) script_name = kwargs.get("script_name", None) script_version = kwargs.get("script_version", None) def apk_meta_attr(attr): return get_attr_str(RESOBJ_SCRIPT_META, attr, gridfs) if script_hash is not None: wheres += [(apk_meta_attr(RESOBJ_SCRIPT_META_HASH), script_hash)] if script_name is not None: wheres += [(apk_meta_attr(RESOBJ_SCRIPT_META_NAME), script_name)] if script_version is not None: wheres += [(apk_meta_attr(RESOBJ_SCRIPT_META_VERSION), script_version)] return wheres
[docs]def build_checks_filter( checks_non_empty_list = None, checks_empty_list = None, checks_true = None, checks_false = None, checks_not_null = None, checks_null = None, conjunction = 'or' ): ''' Helper function to easily check if some value has been set. E.g. == [],!= [], != null, == null, == true, == false. Parameters ---------- checks_non_empty_list : iterable<str>, optional (default is ()) Check the keys against a non empty list. checks_empty_list : iterable<str>, optional (default is ()) Check the keys against an empty list. checks_true : iterable<str>, optional (default is ()) Check if the values of the given keys are true. checks_false : iterable<str>, optional (default is ()) Check if the values of the given keys are false. checks_not_null : iterable<str>, optional (default is ()) Check if the values of the given keys are null (python None). checks_null : iterable<str>, optional (default is ()) Check if the values of the given keys are not null (python None). conjunction : str, optional (default is 'or') Choose between 'or' and 'and'. Specifies how to to link the filter arguments. Examples -------- >>> print build_checks_filter(checks_non_empty_list = ['logged.enum'], checks_true = ['logged.bool']) {'$or': [{'logged.enum': {'$ne': []}}, {'logged.bool': True}]} >>> print build_checks_filter(checks_empty_list = ["foo"]) {'foo': []} Returns ------- dict Dictionary describing the checks. Can be used for mongodb. ''' if checks_empty_list is None: checks_empty_list = () if checks_non_empty_list is None: checks_non_empty_list = () if checks_false is None: checks_false = () if checks_true is None: checks_true = () if checks_null is None: checks_null = () if checks_not_null is None: checks_not_null = () filters = [] def gen_not_equal(key, val): ''' Generate not equals clause for mongodb ''' OPERATOR_NON_EQ = '$ne' return {key : {OPERATOR_NON_EQ : val}} def gen_equal(key, val): ''' Generate equals clause for mongodb ''' return {key : val} # check for non empty list for key in checks_non_empty_list: filters.append( gen_not_equal(key, []) ) # check for empty list for key in checks_empty_list: filters.append( gen_equal(key, []) ) # check for True for key in checks_true: filters.append( gen_equal(key, True) ) # check for False for key in checks_false: filters.append( gen_equal(key, False) ) # checks for null for key in checks_null: filters.append( gen_equal(key, None) ) # checks for not null for key in checks_not_null: filters.append( gen_not_equal(key, None) ) cnt_filters = len(filters) if cnt_filters > 0: if cnt_filters > 1: # apply conjunction (n-digit operator, n > 1) if conjunction.lower() == 'or': return {'$or' : filters} return {'$and' : filters} else: # return dictionary with values and keys from dicts in filters res = {} for fdict in filters: res.update(fdict) return res return {}
############################################################ #---Results ############################################################
[docs]def split_result_ids(results): ''' Split the id's into non-gridfs and gridfs id's. Parameters ---------- results : iterable<tuple<str, bool>> First component is the id of the entry and the second a boolean indication if the result has been stored in gridfs. See e.g. output of :py:method:`.ResultDatabaseStorage.store_result_for_apk` Returns ------- tuple<list<str>, list<str>> First component holds the non-gridfs id's, the second the gridfs id's ''' non_gridfs_ids = map(lambda x : x[0], filter(lambda x :x[1] is False, results)) gridfs_ids = map(lambda x : x[0], filter(lambda x : x[1] is True, results)) return non_gridfs_ids, gridfs_ids
[docs]def format_query_result_db(res_cursor, distict_generator = False, count = False, raw = False, html = False): ''' Format the results from the result db (mongodb). Parameters ---------- res_cursor : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if distinct values wanted. Thirst otherwise. distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. count : bool, optional (default is False) Only print count, not results raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. If `raw` will not be converted to html! html : bool, optional (default is False) Format as html. Returns ------- str ''' from pymongo.errors import PyMongoError from androlyze.ui.util import HtmlUtil # if html enabled convert to table view if `json2html` is present # otherwise use pygmentize json_convert = lambda json : json if html: try: from json2html import json2html json_convert = lambda j : json2html.convert(json = j) except ImportError: from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name json_convert = lambda json: highlight(json, get_lexer_by_name('json'), HtmlFormatter()) # collect results as list<str> resl = [] def anl(text): ''' Append a newline ''' # dont format raw data as html return '%s\n' % text if not html or raw else HtmlUtil.newline(HtmlUtil.prefy(text)) try: # return count if count: cnt = 0 if is_pymongo_cursor(res_cursor): cnt = res_cursor.count() elif distict_generator: cnt = len(list(res_cursor)) return '%d' % cnt else: if distict_generator: for r in sorted(res_cursor): if isinstance(r, dict): r = dict2json(res_cursor) resl.append(r) elif isinstance(r, (str, unicode)): resl.append(r) else: for i, res in enumerate(res_cursor, 1): delimiter = '/* %d */' % i text = HtmlUtil.newline(delimiter) if html else delimiter if html: text = HtmlUtil.redify(text) resl.append(text) # return raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: resl.append(gridout_obj) # return json else: j = dict2json(res) # convert json (if enabled) j = json_convert(j) resl.append(j) # return result by joining single strings return ''.join([anl(res_str) for res_str in resl]) except PyMongoError as e: log.exception(e)
############################################################ #---Cursor stuff ############################################################
[docs]def is_pymongo_cursor(cursor): ''' Check if `cursor` is a mongodb cursor ''' return isinstance(cursor, (GridOutCursor, Cursor))
if __name__ == '__main__': print build_checks_filter(checks_empty_list = ["foo"]) from collections import OrderedDict test = OrderedDict([('script meta', OrderedDict([('name', 'CodePermissions'), ('sha256', None), ('analysis date', 'time'), ('version', '0.1')])), ('code permissions', ('code', OrderedDict([('BLUETOOTH', [{'La2dp.Vol.service$1.onReceive': ''}])])))]) escaped = escape_keys(test) import json print json.dumps(escaped, indent = 4)