Optimizer's skeleton: use advisor to optimize config options (#4169)
Summary: In https://github.com/facebook/rocksdb/pull/3934 we introduced advisor scripts that make suggestions in the config options based on the log file and stats from a run of rocksdb. The optimizer runs the advisor on a benchmark application in a loop and automatically applies the suggested changes until the config options are optimized. This is a work in progress and the patch is the initial skeleton for the optimizer. The sample application that is run in the loop is currently dbbench. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4169 Reviewed By: maysamyabandeh Differential Revision: D9023671 Pulled By: poojam23 fbshipit-source-id: a6192d475c462cf6eb2b316716f97cb400fcb64d
This commit is contained in:
parent
bdc6abd0b4
commit
134a52e144
39
tools/advisor/advisor/bench_runner.py
Normal file
39
tools/advisor/advisor/bench_runner.py
Normal file
@ -0,0 +1,39 @@
|
||||
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
# This source code is licensed under both the GPLv2 (found in the
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import re
|
||||
|
||||
|
||||
class BenchmarkRunner(ABC):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def is_metric_better(new_metric, old_metric):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def run_experiment(self):
|
||||
# should return a list of DataSource objects
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_info_log_file_name(log_dir, db_path):
|
||||
# Example: DB Path = /dev/shm and OPTIONS file has option
|
||||
# db_log_dir=/tmp/rocks/, then the name of the log file will be
|
||||
# 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is
|
||||
# not specified in the OPTIONS file, then the location of the log file
|
||||
# will be /dev/shm and the name of the file will be 'LOG'
|
||||
file_name = ''
|
||||
if log_dir:
|
||||
# refer GetInfoLogPrefix() in rocksdb/util/filename.cc
|
||||
# example db_path: /dev/shm/dbbench
|
||||
file_name = db_path[1:] # to ignore the leading '/' character
|
||||
to_be_replaced = re.compile('[^0-9a-zA-Z\-_\.]')
|
||||
for character in to_be_replaced.findall(db_path):
|
||||
file_name = file_name.replace(character, '_')
|
||||
if not file_name.endswith('_'):
|
||||
file_name += '_'
|
||||
file_name += 'LOG'
|
||||
return file_name
|
134
tools/advisor/advisor/config_optimizer_example.py
Normal file
134
tools/advisor/advisor/config_optimizer_example.py
Normal file
@ -0,0 +1,134 @@
|
||||
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
# This source code is licensed under both the GPLv2 (found in the
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
import argparse
|
||||
from advisor.db_config_optimizer import ConfigOptimizer
|
||||
from advisor.db_log_parser import NO_COL_FAMILY
|
||||
from advisor.db_options_parser import DatabaseOptions
|
||||
from advisor.rule_parser import RulesSpec
|
||||
|
||||
|
||||
CONFIG_OPT_NUM_ITER = 10
|
||||
|
||||
|
||||
def main(args):
|
||||
# initialise the RulesSpec parser
|
||||
rule_spec_parser = RulesSpec(args.rules_spec)
|
||||
# initialise the benchmark runner
|
||||
bench_runner_module = __import__(
|
||||
args.benchrunner_module, fromlist=[args.benchrunner_class]
|
||||
)
|
||||
bench_runner_class = getattr(bench_runner_module, args.benchrunner_class)
|
||||
ods_args = {}
|
||||
if args.ods_client and args.ods_entity:
|
||||
ods_args['client_script'] = args.ods_client
|
||||
ods_args['entity'] = args.ods_entity
|
||||
if args.ods_key_prefix:
|
||||
ods_args['key_prefix'] = args.ods_key_prefix
|
||||
db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args)
|
||||
# initialise the database configuration
|
||||
db_options = DatabaseOptions(args.rocksdb_options, args.misc_options)
|
||||
# set the frequency at which stats are dumped in the LOG file and the
|
||||
# location of the LOG file.
|
||||
db_log_dump_settings = {
|
||||
"DBOptions.stats_dump_period_sec": {
|
||||
NO_COL_FAMILY: args.stats_dump_period_sec
|
||||
}
|
||||
}
|
||||
db_options.update_options(db_log_dump_settings)
|
||||
# initialise the configuration optimizer
|
||||
config_optimizer = ConfigOptimizer(
|
||||
db_bench_runner,
|
||||
db_options,
|
||||
rule_spec_parser,
|
||||
args.base_db_path
|
||||
)
|
||||
# run the optimiser to improve the database configuration for given
|
||||
# benchmarks, with the help of expert-specified rules
|
||||
final_db_options = config_optimizer.run()
|
||||
# generate the final rocksdb options file
|
||||
print(
|
||||
'Final configuration in: ' +
|
||||
final_db_options.generate_options_config('final')
|
||||
)
|
||||
print(
|
||||
'Final miscellaneous options: ' +
|
||||
repr(final_db_options.get_misc_options())
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
'''
|
||||
An example run of this tool from the command-line would look like:
|
||||
python3 -m advisor.config_optimizer_example
|
||||
--base_db_path=/tmp/rocksdbtest-155919/dbbench
|
||||
--rocksdb_options=temp/OPTIONS_boot.tmp --misc_options bloom_bits=2
|
||||
--rules_spec=advisor/rules.ini --stats_dump_period_sec=20
|
||||
--benchrunner_module=advisor.db_bench_runner
|
||||
--benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench
|
||||
readwhilewriting use_existing_db=true duration=90
|
||||
'''
|
||||
parser = argparse.ArgumentParser(description='This script is used for\
|
||||
searching for a better database configuration')
|
||||
parser.add_argument(
|
||||
'--rocksdb_options', required=True, type=str,
|
||||
help='path of the starting Rocksdb OPTIONS file'
|
||||
)
|
||||
# these are options that are column-family agnostic and are not yet
|
||||
# supported by the Rocksdb Options file: eg. bloom_bits=2
|
||||
parser.add_argument(
|
||||
'--base_db_path', required=True, type=str,
|
||||
help='path for the Rocksdb database'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--misc_options', nargs='*',
|
||||
help='whitespace-separated list of options that are not supported ' +
|
||||
'by the Rocksdb OPTIONS file, given in the ' +
|
||||
'<option_name>=<option_value> format eg. "bloom_bits=2 ' +
|
||||
'rate_limiter_bytes_per_sec=128000000"')
|
||||
parser.add_argument(
|
||||
'--rules_spec', required=True, type=str,
|
||||
help='path of the file containing the expert-specified Rules'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--stats_dump_period_sec', required=True, type=int,
|
||||
help='the frequency (in seconds) at which STATISTICS are printed to ' +
|
||||
'the Rocksdb LOG file'
|
||||
)
|
||||
# ODS arguments
|
||||
parser.add_argument(
|
||||
'--ods_client', type=str, help='the ODS client binary'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--ods_entity', type=str,
|
||||
help='the servers for which the ODS stats need to be fetched'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--ods_key_prefix', type=str,
|
||||
help='the prefix that needs to be attached to the keys of time ' +
|
||||
'series to be fetched from ODS'
|
||||
)
|
||||
# benchrunner_module example: advisor.db_benchmark_client
|
||||
parser.add_argument(
|
||||
'--benchrunner_module', required=True, type=str,
|
||||
help='the module containing the BenchmarkRunner class to be used by ' +
|
||||
'the Optimizer, example: advisor.db_bench_runner'
|
||||
)
|
||||
# benchrunner_class example: DBBenchRunner
|
||||
parser.add_argument(
|
||||
'--benchrunner_class', required=True, type=str,
|
||||
help='the name of the BenchmarkRunner class to be used by the ' +
|
||||
'Optimizer, should be present in the module provided in the ' +
|
||||
'benchrunner_module argument, example: DBBenchRunner'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--benchrunner_pos_args', nargs='*',
|
||||
help='whitespace-separated positional arguments that are passed on ' +
|
||||
'to the constructor of the BenchmarkRunner class provided in the ' +
|
||||
'benchrunner_class argument, example: "use_existing_db=true ' +
|
||||
'duration=900"'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
312
tools/advisor/advisor/db_bench_runner.py
Normal file
312
tools/advisor/advisor/db_bench_runner.py
Normal file
@ -0,0 +1,312 @@
|
||||
from advisor.bench_runner import BenchmarkRunner
|
||||
from advisor.db_log_parser import DataSource, DatabaseLogs, NO_COL_FAMILY
|
||||
from advisor.db_options_parser import DatabaseOptions
|
||||
from advisor.db_stats_fetcher import (
|
||||
LogStatsParser, OdsStatsFetcher, DatabasePerfContext
|
||||
)
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
|
||||
'''
|
||||
NOTE: This is not thread-safe, because the output file is simply overwritten.
|
||||
'''
|
||||
|
||||
|
||||
class DBBenchRunner(BenchmarkRunner):
|
||||
OUTPUT_FILE = "temp/dbbench_out.tmp"
|
||||
ERROR_FILE = "temp/dbbench_err.tmp"
|
||||
DB_PATH = "DB path"
|
||||
THROUGHPUT = "ops/sec"
|
||||
PERF_CON = " PERF_CONTEXT:"
|
||||
|
||||
@staticmethod
|
||||
def is_metric_better(new_metric, old_metric):
|
||||
# for db_bench 'throughput' is the metric returned by run_experiment
|
||||
return new_metric >= old_metric
|
||||
|
||||
@staticmethod
|
||||
def get_opt_args_str(misc_options_dict):
|
||||
optional_args_str = ""
|
||||
for option_name, option_value in misc_options_dict.items():
|
||||
if option_value:
|
||||
optional_args_str += (
|
||||
" --" + option_name + "=" + str(option_value)
|
||||
)
|
||||
return optional_args_str
|
||||
|
||||
def __init__(self, positional_args, ods_args=None):
|
||||
# parse positional_args list appropriately
|
||||
self.db_bench_binary = positional_args[0]
|
||||
self.benchmark = positional_args[1]
|
||||
self.db_bench_args = None
|
||||
# TODO(poojam23): move to unittest with method get_available_workloads
|
||||
self.supported_benchmarks = None
|
||||
if len(positional_args) > 2:
|
||||
# options list with each option given as "<option>=<value>"
|
||||
self.db_bench_args = positional_args[2:]
|
||||
# save ods_args if provided
|
||||
self.ods_args = ods_args
|
||||
|
||||
def _parse_output(self, get_perf_context=False):
|
||||
'''
|
||||
Sample db_bench output after running 'readwhilewriting' benchmark:
|
||||
DB path: [/tmp/rocksdbtest-155919/dbbench]\n
|
||||
readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
|
||||
of 5427999 found)\n
|
||||
PERF_CONTEXT:\n
|
||||
user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
|
||||
'''
|
||||
output = {
|
||||
self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None
|
||||
}
|
||||
perf_context_begins = False
|
||||
with open(self.OUTPUT_FILE, 'r') as fp:
|
||||
for line in fp:
|
||||
if line.startswith(self.benchmark):
|
||||
print(line) # print output of db_bench run
|
||||
token_list = line.strip().split()
|
||||
for ix, token in enumerate(token_list):
|
||||
if token.startswith(self.THROUGHPUT):
|
||||
output[self.THROUGHPUT] = (
|
||||
float(token_list[ix - 1])
|
||||
)
|
||||
break
|
||||
elif line.startswith(self.PERF_CON):
|
||||
perf_context_begins = True
|
||||
elif get_perf_context and perf_context_begins:
|
||||
# Sample perf_context output:
|
||||
# user_key_comparison_count = 500, block_cache_hit_count =\
|
||||
# 468, block_read_count = 580, block_read_byte = 445, ...
|
||||
token_list = line.strip().split(',')
|
||||
perf_context = {
|
||||
tk.split('=')[0].strip(): tk.split('=')[1].strip()
|
||||
for tk in token_list
|
||||
if tk
|
||||
}
|
||||
# TODO(poojam23): this is a hack and should be replaced
|
||||
# with the timestamp that db_bench will provide per printed
|
||||
# perf_context
|
||||
timestamp = int(time.time())
|
||||
perf_context_ts = {}
|
||||
for stat in perf_context.keys():
|
||||
perf_context_ts[stat] = {
|
||||
timestamp: int(perf_context[stat])
|
||||
}
|
||||
output[self.PERF_CON] = perf_context_ts
|
||||
perf_context_begins = False
|
||||
elif line.startswith(self.DB_PATH):
|
||||
output[self.DB_PATH] = (
|
||||
line.split('[')[1].split(']')[0]
|
||||
)
|
||||
return output
|
||||
|
||||
def get_log_options(self, db_options, db_path):
|
||||
# get the location of the LOG file and the frequency at which stats are
|
||||
# dumped in the LOG file
|
||||
log_dir_path = None
|
||||
stats_freq_sec = None
|
||||
logs_file_prefix = None
|
||||
|
||||
# fetch the options
|
||||
dump_period = 'DBOptions.stats_dump_period_sec'
|
||||
log_dir = 'DBOptions.db_log_dir'
|
||||
log_options = db_options.get_options([dump_period, log_dir])
|
||||
if dump_period in log_options:
|
||||
stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
|
||||
if log_dir in log_options:
|
||||
log_dir_path = log_options[log_dir][NO_COL_FAMILY]
|
||||
|
||||
log_file_name = DBBenchRunner.get_info_log_file_name(
|
||||
log_dir_path, db_path
|
||||
)
|
||||
|
||||
if not log_dir_path:
|
||||
log_dir_path = db_path
|
||||
if not log_dir_path.endswith('/'):
|
||||
log_dir_path += '/'
|
||||
|
||||
logs_file_prefix = log_dir_path + log_file_name
|
||||
return (logs_file_prefix, stats_freq_sec)
|
||||
|
||||
def _get_options_command_line_args_str(self, curr_options):
|
||||
'''
|
||||
This method uses the provided Rocksdb OPTIONS to create a string of
|
||||
command-line arguments for db_bench.
|
||||
The --options_file argument is always given and the options that are
|
||||
not supported by the OPTIONS file are given as separate arguments.
|
||||
'''
|
||||
optional_args_str = DBBenchRunner.get_opt_args_str(
|
||||
curr_options.get_misc_options()
|
||||
)
|
||||
# generate an options configuration file
|
||||
options_file = curr_options.generate_options_config(nonce='12345')
|
||||
optional_args_str += " --options_file=" + options_file
|
||||
return optional_args_str
|
||||
|
||||
def _setup_db_before_experiment(self, curr_options, db_path):
|
||||
# remove destination directory if it already exists
|
||||
try:
|
||||
shutil.rmtree(db_path, ignore_errors=True)
|
||||
except OSError as e:
|
||||
print('Error: rmdir ' + e.filename + ' ' + e.strerror)
|
||||
command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
|
||||
self.db_bench_binary, db_path
|
||||
)
|
||||
args_str = self._get_options_command_line_args_str(curr_options)
|
||||
command += args_str
|
||||
self._run_command(command)
|
||||
|
||||
def _build_experiment_command(self, curr_options, db_path):
|
||||
command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
|
||||
self.db_bench_binary, self.benchmark, db_path
|
||||
)
|
||||
args_str = self._get_options_command_line_args_str(curr_options)
|
||||
# handle the command-line args passed in the constructor
|
||||
for cmd_line_arg in self.db_bench_args:
|
||||
args_str += (" --" + cmd_line_arg)
|
||||
command += args_str
|
||||
return command
|
||||
|
||||
def _run_command(self, command):
|
||||
# run db_bench and return the
|
||||
out_file = open(self.OUTPUT_FILE, "w+")
|
||||
err_file = open(self.ERROR_FILE, "w+")
|
||||
print('executing... - ' + command)
|
||||
subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
|
||||
out_file.close()
|
||||
err_file.close()
|
||||
|
||||
def run_experiment(self, db_options, db_path):
|
||||
# type: (List[str], str) -> str
|
||||
self._setup_db_before_experiment(db_options, db_path)
|
||||
command = self._build_experiment_command(db_options, db_path)
|
||||
self._run_command(command)
|
||||
|
||||
parsed_output = self._parse_output(get_perf_context=True)
|
||||
|
||||
# Create the LOGS object
|
||||
# get the log options from the OPTIONS file
|
||||
logs_file_prefix, stats_freq_sec = self.get_log_options(
|
||||
db_options, parsed_output[self.DB_PATH]
|
||||
)
|
||||
db_logs = DatabaseLogs(
|
||||
logs_file_prefix, db_options.get_column_families()
|
||||
)
|
||||
# Create the Log STATS object
|
||||
db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
|
||||
# Create the PerfContext STATS object
|
||||
db_perf_context = DatabasePerfContext(
|
||||
parsed_output[self.PERF_CON], 0, False
|
||||
)
|
||||
data_sources = {
|
||||
DataSource.Type.DB_OPTIONS: [db_options],
|
||||
DataSource.Type.LOG: [db_logs],
|
||||
DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context]
|
||||
}
|
||||
# Create the ODS STATS object
|
||||
if self.ods_args:
|
||||
data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher(
|
||||
self.ods_args['client_script'],
|
||||
self.ods_args['entity'],
|
||||
self.ods_args['key_prefix']
|
||||
))
|
||||
return data_sources, parsed_output[self.THROUGHPUT]
|
||||
|
||||
# TODO: this method is for testing, shift it out to unit-tests when ready
|
||||
def get_available_workloads(self):
|
||||
if not self.supported_benchmarks:
|
||||
self.supported_benchmarks = []
|
||||
command = '%s --help' % self.db_bench_binary
|
||||
self._run_command(command)
|
||||
with open(self.OUTPUT_FILE, 'r') as fp:
|
||||
start = False
|
||||
for line in fp:
|
||||
if re.search('available benchmarks', line, re.IGNORECASE):
|
||||
start = True
|
||||
continue
|
||||
elif start:
|
||||
if re.search('meta operations', line, re.IGNORECASE):
|
||||
break
|
||||
benchmark_info = line.strip()
|
||||
if benchmark_info:
|
||||
token_list = benchmark_info.split()
|
||||
if len(token_list) > 2 and token_list[1] == '--':
|
||||
self.supported_benchmarks.append(token_list[0])
|
||||
else:
|
||||
continue
|
||||
self.supported_benchmarks = sorted(self.supported_benchmarks)
|
||||
return self.supported_benchmarks
|
||||
|
||||
|
||||
# TODO: remove this method, used only for testing
|
||||
def main():
|
||||
pos_args = [
|
||||
'/home/poojamalik/workspace/rocksdb/db_bench',
|
||||
'readwhilewriting',
|
||||
'use_existing_db=true',
|
||||
'duration=10'
|
||||
]
|
||||
db_bench_helper = DBBenchRunner(pos_args)
|
||||
# populate benchmarks with the available ones in the db_bench tool
|
||||
benchmarks = db_bench_helper.get_available_workloads()
|
||||
print(benchmarks)
|
||||
print()
|
||||
options_file = (
|
||||
'/home/poojamalik/workspace/rocksdb/tools/advisor/temp/' +
|
||||
'OPTIONS_temp.tmp'
|
||||
)
|
||||
misc_options = ["rate_limiter_bytes_per_sec=1024000", "bloom_bits=2"]
|
||||
db_options = DatabaseOptions(options_file, misc_options)
|
||||
data_sources, _ = db_bench_helper.run_experiment(db_options)
|
||||
print(data_sources[DataSource.Type.DB_OPTIONS][0].options_dict)
|
||||
print()
|
||||
print(data_sources[DataSource.Type.LOG][0].logs_path_prefix)
|
||||
if os.path.isfile(data_sources[DataSource.Type.LOG][0].logs_path_prefix):
|
||||
print('log file exists!')
|
||||
else:
|
||||
print('error: log file does not exist!')
|
||||
print(data_sources[DataSource.Type.LOG][0].column_families)
|
||||
print()
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix)
|
||||
if (
|
||||
os.path.isfile(
|
||||
data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix
|
||||
)
|
||||
):
|
||||
print('log file exists!')
|
||||
else:
|
||||
print('error: log file does not exist!')
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][0].stats_freq_sec)
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][1].keys_ts)
|
||||
|
||||
db_options = DatabaseOptions(options_file, None)
|
||||
data_sources, _ = db_bench_helper.run_experiment(db_options)
|
||||
print(data_sources[DataSource.Type.DB_OPTIONS][0].options_dict)
|
||||
print()
|
||||
print(data_sources[DataSource.Type.LOG][0].logs_path_prefix)
|
||||
if os.path.isfile(data_sources[DataSource.Type.LOG][0].logs_path_prefix):
|
||||
print('log file exists!')
|
||||
else:
|
||||
print('error: log file does not exist!')
|
||||
print(data_sources[DataSource.Type.LOG][0].column_families)
|
||||
print()
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix)
|
||||
if (
|
||||
os.path.isfile(
|
||||
data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix
|
||||
)
|
||||
):
|
||||
print('log file exists!')
|
||||
else:
|
||||
print('error: log file does not exist!')
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][0].stats_freq_sec)
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][1].keys_ts)
|
||||
print(data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
282
tools/advisor/advisor/db_config_optimizer.py
Normal file
282
tools/advisor/advisor/db_config_optimizer.py
Normal file
@ -0,0 +1,282 @@
|
||||
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
# This source code is licensed under both the GPLv2 (found in the
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from advisor.db_log_parser import NO_COL_FAMILY
|
||||
from advisor.db_options_parser import DatabaseOptions
|
||||
from advisor.rule_parser import Suggestion
|
||||
import copy
|
||||
import random
|
||||
|
||||
|
||||
class ConfigOptimizer:
|
||||
SCOPE = 'scope'
|
||||
SUGG_VAL = 'suggested values'
|
||||
|
||||
@staticmethod
|
||||
def apply_action_on_value(old_value, action, suggested_values):
|
||||
chosen_sugg_val = None
|
||||
if suggested_values:
|
||||
chosen_sugg_val = random.choice(list(suggested_values))
|
||||
new_value = None
|
||||
if action is Suggestion.Action.set or not old_value:
|
||||
assert(chosen_sugg_val)
|
||||
new_value = chosen_sugg_val
|
||||
else:
|
||||
# For increase/decrease actions, currently the code tries to make
|
||||
# a 30% change in the option's value per iteration. An addend is
|
||||
# also present (+1 or -1) to handle the cases when the option's
|
||||
# old value was 0 or the final int() conversion suppressed the 30%
|
||||
# change made to the option
|
||||
old_value = float(old_value)
|
||||
mul = 0
|
||||
add = 0
|
||||
if action is Suggestion.Action.increase:
|
||||
if old_value < 0:
|
||||
mul = 0.7
|
||||
add = 2
|
||||
else:
|
||||
mul = 1.3
|
||||
add = 2
|
||||
elif action is Suggestion.Action.decrease:
|
||||
if old_value < 0:
|
||||
mul = 1.3
|
||||
add = -2
|
||||
else:
|
||||
mul = 0.7
|
||||
add = -2
|
||||
new_value = int(old_value * mul + add)
|
||||
return new_value
|
||||
|
||||
@staticmethod
|
||||
def improve_db_config(options, rule, suggestions_dict):
|
||||
# this method takes ONE 'rule' and applies all its suggestions on the
|
||||
# appropriate options
|
||||
required_options = []
|
||||
rule_suggestions = []
|
||||
for sugg_name in rule.get_suggestions():
|
||||
option = suggestions_dict[sugg_name].option
|
||||
action = suggestions_dict[sugg_name].action
|
||||
# A Suggestion in the rules spec must have the 'option' and
|
||||
# 'action' fields defined, always call perform_checks() method
|
||||
# after parsing the rules file using RulesSpec
|
||||
assert(option)
|
||||
assert(action)
|
||||
required_options.append(option)
|
||||
rule_suggestions.append(suggestions_dict[sugg_name])
|
||||
current_config = options.get_options(required_options)
|
||||
# Create the updated configuration from the rule's suggestions
|
||||
updated_config = {}
|
||||
for sugg in rule_suggestions:
|
||||
# case: when the option is not present in the current configuration
|
||||
if sugg.option not in current_config:
|
||||
try:
|
||||
new_value = ConfigOptimizer.apply_action_on_value(
|
||||
None, sugg.action, sugg.suggested_values
|
||||
)
|
||||
if sugg.option not in updated_config:
|
||||
updated_config[sugg.option] = {}
|
||||
if DatabaseOptions.is_misc_option(sugg.option):
|
||||
# this suggestion is on an option that is not yet
|
||||
# supported by the Rocksdb OPTIONS file and so it is
|
||||
# not prefixed by a section type.
|
||||
updated_config[sugg.option][NO_COL_FAMILY] = new_value
|
||||
else:
|
||||
for col_fam in rule.get_trigger_column_families():
|
||||
updated_config[sugg.option][col_fam] = new_value
|
||||
except AssertionError:
|
||||
print(
|
||||
'WARNING(ConfigOptimizer): provide suggested_values ' +
|
||||
'for ' + sugg.option
|
||||
)
|
||||
continue
|
||||
# case: when the option is present in the current configuration
|
||||
if NO_COL_FAMILY in current_config[sugg.option]:
|
||||
old_value = current_config[sugg.option][NO_COL_FAMILY]
|
||||
try:
|
||||
new_value = ConfigOptimizer.apply_action_on_value(
|
||||
old_value, sugg.action, sugg.suggested_values
|
||||
)
|
||||
if sugg.option not in updated_config:
|
||||
updated_config[sugg.option] = {}
|
||||
updated_config[sugg.option][NO_COL_FAMILY] = new_value
|
||||
except AssertionError:
|
||||
print(
|
||||
'WARNING(ConfigOptimizer): provide suggested_values ' +
|
||||
'for ' + sugg.option
|
||||
)
|
||||
else:
|
||||
for col_fam in rule.get_trigger_column_families():
|
||||
old_value = None
|
||||
if col_fam in current_config[sugg.option]:
|
||||
old_value = current_config[sugg.option][col_fam]
|
||||
try:
|
||||
new_value = ConfigOptimizer.apply_action_on_value(
|
||||
old_value, sugg.action, sugg.suggested_values
|
||||
)
|
||||
if sugg.option not in updated_config:
|
||||
updated_config[sugg.option] = {}
|
||||
updated_config[sugg.option][col_fam] = new_value
|
||||
except AssertionError:
|
||||
print(
|
||||
'WARNING(ConfigOptimizer): provide ' +
|
||||
'suggested_values for ' + sugg.option
|
||||
)
|
||||
return current_config, updated_config
|
||||
|
||||
@staticmethod
|
||||
def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack):
|
||||
if not rules:
|
||||
print('\nNo more rules triggered!')
|
||||
return None
|
||||
# if the last rule provided an improvement in the database performance,
|
||||
# and it was triggered again (i.e. it is present in 'rules'), then pick
|
||||
# the same rule for this iteration too.
|
||||
if last_rule_name and not backtrack:
|
||||
for rule in rules:
|
||||
if rule.name == last_rule_name:
|
||||
return rule
|
||||
# there was no previous rule OR the previous rule did not improve db
|
||||
# performance OR it was not triggered for this iteration,
|
||||
# then pick another rule that has not been tried yet
|
||||
for rule in rules:
|
||||
if rule.name not in rules_tried:
|
||||
return rule
|
||||
print('\nAll rules have been exhausted')
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def apply_suggestions(
|
||||
triggered_rules,
|
||||
current_rule_name,
|
||||
rules_tried,
|
||||
backtrack,
|
||||
curr_options,
|
||||
suggestions_dict
|
||||
):
|
||||
curr_rule = ConfigOptimizer.pick_rule_to_apply(
|
||||
triggered_rules, current_rule_name, rules_tried, backtrack
|
||||
)
|
||||
if not curr_rule:
|
||||
return tuple([None]*4)
|
||||
# if a rule has been picked for improving db_config, update rules_tried
|
||||
rules_tried.add(curr_rule.name)
|
||||
# get updated config based on the picked rule
|
||||
curr_conf, updated_conf = ConfigOptimizer.improve_db_config(
|
||||
curr_options, curr_rule, suggestions_dict
|
||||
)
|
||||
conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf)
|
||||
if not conf_diff: # the current and updated configs are the same
|
||||
curr_rule, rules_tried, curr_conf, updated_conf = (
|
||||
ConfigOptimizer.apply_suggestions(
|
||||
triggered_rules,
|
||||
None,
|
||||
rules_tried,
|
||||
backtrack,
|
||||
curr_options,
|
||||
suggestions_dict
|
||||
)
|
||||
)
|
||||
print('returning from apply_suggestions')
|
||||
return (curr_rule, rules_tried, curr_conf, updated_conf)
|
||||
|
||||
# TODO(poojam23): check if this method is required or can we directly set
|
||||
# the config equal to the curr_config
|
||||
@staticmethod
|
||||
def get_backtrack_config(curr_config, updated_config):
|
||||
diff = DatabaseOptions.get_options_diff(curr_config, updated_config)
|
||||
bt_config = {}
|
||||
for option in diff:
|
||||
bt_config[option] = {}
|
||||
for col_fam in diff[option]:
|
||||
bt_config[option][col_fam] = diff[option][col_fam][0]
|
||||
print(bt_config)
|
||||
return bt_config
|
||||
|
||||
def __init__(self, bench_runner, db_options, rule_parser, base_db):
|
||||
self.bench_runner = bench_runner
|
||||
self.db_options = db_options
|
||||
self.rule_parser = rule_parser
|
||||
self.base_db_path = base_db
|
||||
|
||||
def run(self):
|
||||
# In every iteration of this method's optimization loop we pick ONE
|
||||
# RULE from all the triggered rules and apply all its suggestions to
|
||||
# the appropriate options.
|
||||
# bootstrapping the optimizer
|
||||
print('Bootstrapping optimizer:')
|
||||
options = copy.deepcopy(self.db_options)
|
||||
old_data_sources, old_metric = (
|
||||
self.bench_runner.run_experiment(options, self.base_db_path)
|
||||
)
|
||||
print('Initial metric: ' + str(old_metric))
|
||||
self.rule_parser.load_rules_from_spec()
|
||||
self.rule_parser.perform_section_checks()
|
||||
triggered_rules = self.rule_parser.get_triggered_rules(
|
||||
old_data_sources, options.get_column_families()
|
||||
)
|
||||
print('\nTriggered:')
|
||||
self.rule_parser.print_rules(triggered_rules)
|
||||
backtrack = False
|
||||
rules_tried = set()
|
||||
curr_rule, rules_tried, curr_conf, updated_conf = (
|
||||
ConfigOptimizer.apply_suggestions(
|
||||
triggered_rules,
|
||||
None,
|
||||
rules_tried,
|
||||
backtrack,
|
||||
options,
|
||||
self.rule_parser.get_suggestions_dict()
|
||||
)
|
||||
)
|
||||
# the optimizer loop
|
||||
while curr_rule:
|
||||
print('\nRule picked for next iteration:')
|
||||
print(curr_rule.name)
|
||||
print('\ncurrent config:')
|
||||
print(curr_conf)
|
||||
print('updated config:')
|
||||
print(updated_conf)
|
||||
options.update_options(updated_conf)
|
||||
# run bench_runner with updated config
|
||||
new_data_sources, new_metric = (
|
||||
self.bench_runner.run_experiment(options, self.base_db_path)
|
||||
)
|
||||
print('\nnew metric: ' + str(new_metric))
|
||||
backtrack = not self.bench_runner.is_metric_better(
|
||||
new_metric, old_metric
|
||||
)
|
||||
# update triggered_rules, metric, data_sources, if required
|
||||
if backtrack:
|
||||
# revert changes to options config
|
||||
print('\nBacktracking to previous configuration')
|
||||
backtrack_conf = ConfigOptimizer.get_backtrack_config(
|
||||
curr_conf, updated_conf
|
||||
)
|
||||
options.update_options(backtrack_conf)
|
||||
else:
|
||||
# run advisor on new data sources
|
||||
self.rule_parser.load_rules_from_spec() # reboot the advisor
|
||||
self.rule_parser.perform_section_checks()
|
||||
triggered_rules = self.rule_parser.get_triggered_rules(
|
||||
new_data_sources, options.get_column_families()
|
||||
)
|
||||
print('\nTriggered:')
|
||||
self.rule_parser.print_rules(triggered_rules)
|
||||
old_metric = new_metric
|
||||
old_data_sources = new_data_sources
|
||||
rules_tried = set()
|
||||
# pick rule to work on and set curr_rule to that
|
||||
curr_rule, rules_tried, curr_conf, updated_conf = (
|
||||
ConfigOptimizer.apply_suggestions(
|
||||
triggered_rules,
|
||||
curr_rule.name,
|
||||
rules_tried,
|
||||
backtrack,
|
||||
options,
|
||||
self.rule_parser.get_suggestions_dict()
|
||||
)
|
||||
)
|
||||
# return the final database options configuration
|
||||
return options
|
@ -4,18 +4,21 @@
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from calendar import timegm
|
||||
from enum import Enum
|
||||
import glob
|
||||
import re
|
||||
from enum import Enum
|
||||
import time
|
||||
|
||||
|
||||
NO_COL_FAMILY = 'DB_WIDE'
|
||||
|
||||
|
||||
class DataSource(ABC):
|
||||
class Type(Enum):
|
||||
LOG = 1
|
||||
DB_OPTIONS = 2
|
||||
STATS = 3
|
||||
PERF_CONTEXT = 4
|
||||
ODS = 5
|
||||
TIME_SERIES = 3
|
||||
|
||||
def __init__(self, type):
|
||||
self.type = type
|
||||
@ -33,15 +36,30 @@ class Log:
|
||||
date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}'
|
||||
return re.match(date_regex, log_line)
|
||||
|
||||
def __init__(self, log_line):
|
||||
def __init__(self, log_line, column_families):
|
||||
token_list = log_line.strip().split()
|
||||
self.time = token_list[0]
|
||||
self.context = token_list[1]
|
||||
self.message = " ".join(token_list[2:])
|
||||
self.column_family = None
|
||||
# example log for 'default' column family:
|
||||
# "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634]
|
||||
# [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n"
|
||||
for col_fam in column_families:
|
||||
search_for_str = '\[' + col_fam + '\]'
|
||||
if re.search(search_for_str, self.message):
|
||||
self.column_family = col_fam
|
||||
break
|
||||
if not self.column_family:
|
||||
self.column_family = NO_COL_FAMILY
|
||||
|
||||
def get_time(self):
|
||||
def get_human_readable_time(self):
|
||||
# example from a log line: '2018/07/25-11:25:45.782710'
|
||||
return self.time
|
||||
|
||||
def get_column_family(self):
|
||||
return self.column_family
|
||||
|
||||
def get_context(self):
|
||||
return self.context
|
||||
|
||||
@ -49,48 +67,65 @@ class Log:
|
||||
return self.message
|
||||
|
||||
def append_message(self, remaining_log):
|
||||
self.message = self.message + remaining_log
|
||||
self.message = self.message + '\n' + remaining_log.strip()
|
||||
|
||||
def get_timestamp(self):
|
||||
# example: '2018/07/25-11:25:45.782710' will be converted to the GMT
|
||||
# Unix timestamp 1532517945 (note: this method assumes that self.time
|
||||
# is in GMT)
|
||||
hr_time = self.time + 'GMT'
|
||||
timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z"))
|
||||
return timestamp
|
||||
|
||||
def __repr__(self):
|
||||
return 'time: ' + self.time + ', context: ' + self.context +\
|
||||
', message: ' + self.message
|
||||
return (
|
||||
'time: ' + self.time + '; context: ' + self.context +
|
||||
'; col_fam: ' + self.column_family +
|
||||
'; message: ' + self.message
|
||||
)
|
||||
|
||||
|
||||
class DatabaseLogs(DataSource):
|
||||
def __init__(self, logs_path_prefix):
|
||||
def __init__(self, logs_path_prefix, column_families):
|
||||
super().__init__(DataSource.Type.LOG)
|
||||
self.logs_path_prefix = logs_path_prefix
|
||||
self.column_families = column_families
|
||||
|
||||
def trigger_appropriate_conditions(self, conditions, log):
|
||||
conditions_to_be_removed = []
|
||||
def trigger_conditions_for_log(self, conditions, log):
|
||||
# For a LogCondition object, trigger is:
|
||||
# Dict[column_family_name, List[Log]]. This explains why the condition
|
||||
# was triggered and for which column families.
|
||||
for cond in conditions:
|
||||
if re.search(cond.regex, log.get_message(), re.IGNORECASE):
|
||||
cond.set_trigger(log)
|
||||
conditions_to_be_removed.append(cond)
|
||||
for remove_cond in conditions_to_be_removed:
|
||||
conditions.remove(remove_cond)
|
||||
return conditions
|
||||
trigger = cond.get_trigger()
|
||||
if not trigger:
|
||||
trigger = {}
|
||||
if log.get_column_family() not in trigger:
|
||||
trigger[log.get_column_family()] = []
|
||||
trigger[log.get_column_family()].append(log)
|
||||
cond.set_trigger(trigger)
|
||||
|
||||
def check_and_trigger_conditions(self, conditions):
|
||||
for file_name in glob.glob(self.logs_path_prefix + '*'):
|
||||
# TODO(poojam23): find a way to distinguish between log files
|
||||
# - generated in the current experiment but are labeled 'old'
|
||||
# because they LOGs exceeded the file size limit AND
|
||||
# - generated in some previous experiment that are also labeled
|
||||
# 'old' and were not deleted for some reason
|
||||
if re.search('old', file_name, re.IGNORECASE):
|
||||
continue
|
||||
with open(file_name, 'r') as db_logs:
|
||||
new_log = None
|
||||
for line in db_logs:
|
||||
if not conditions:
|
||||
break
|
||||
if Log.is_new_log(line):
|
||||
if new_log:
|
||||
conditions = self.trigger_appropriate_conditions(
|
||||
conditions,
|
||||
new_log
|
||||
self.trigger_conditions_for_log(
|
||||
conditions, new_log
|
||||
)
|
||||
new_log = Log(line)
|
||||
new_log = Log(line, self.column_families)
|
||||
else:
|
||||
# To account for logs split into multiple lines
|
||||
new_log.append_message(line)
|
||||
# Check for the last log in the file.
|
||||
if new_log and conditions:
|
||||
conditions = self.trigger_appropriate_conditions(
|
||||
conditions,
|
||||
new_log
|
||||
)
|
||||
if new_log:
|
||||
self.trigger_conditions_for_log(conditions, new_log)
|
||||
|
@ -3,8 +3,11 @@
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from advisor.db_log_parser import DataSource
|
||||
import copy
|
||||
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
|
||||
from advisor.ini_parser import IniParser
|
||||
from advisor.rule_parser import Condition, OptionCondition
|
||||
import os
|
||||
|
||||
|
||||
class OptionsSpecParser(IniParser):
|
||||
@ -16,7 +19,8 @@ class OptionsSpecParser(IniParser):
|
||||
def get_section_type(line):
|
||||
'''
|
||||
Example section header: [TableOptions/BlockBasedTable "default"]
|
||||
Here section_type returned would be 'TableOptions.BlockBasedTable'
|
||||
Here ConfigurationOptimizer returned would be
|
||||
'TableOptions.BlockBasedTable'
|
||||
'''
|
||||
section_path = line.strip()[1:-1].split()[0]
|
||||
section_type = '.'.join(section_path.split('/'))
|
||||
@ -29,79 +33,407 @@ class OptionsSpecParser(IniParser):
|
||||
return None
|
||||
return token_list[1]
|
||||
|
||||
@staticmethod
|
||||
def get_section_str(section_type, section_name):
|
||||
# Example:
|
||||
# Case 1: get_section_str('DBOptions', NO_COL_FAMILY)
|
||||
# Case 2: get_section_str('TableOptions.BlockBasedTable', 'default')
|
||||
section_type = '/'.join(section_type.strip().split('.'))
|
||||
# Case 1: section_type = 'DBOptions'
|
||||
# Case 2: section_type = 'TableOptions/BlockBasedTable'
|
||||
section_str = '[' + section_type
|
||||
if section_name == NO_COL_FAMILY:
|
||||
# Case 1: '[DBOptions]'
|
||||
return (section_str + ']')
|
||||
else:
|
||||
# Case 2: '[TableOptions/BlockBasedTable "default"]'
|
||||
return section_str + ' "' + section_name + '"]'
|
||||
|
||||
@staticmethod
|
||||
def get_option_str(key, values):
|
||||
option_str = key + '='
|
||||
# get_option_str('db_log_dir', None), returns 'db_log_dir='
|
||||
if values:
|
||||
# example:
|
||||
# get_option_str('max_bytes_for_level_multiplier_additional',
|
||||
# [1,1,1,1,1,1,1]), returned string:
|
||||
# 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1'
|
||||
if isinstance(values, list):
|
||||
for value in values:
|
||||
option_str += (str(value) + ':')
|
||||
option_str = option_str[:-1]
|
||||
else:
|
||||
# example: get_option_str('write_buffer_size', 1048576)
|
||||
# returned string: 'write_buffer_size=1048576'
|
||||
option_str += str(values)
|
||||
return option_str
|
||||
|
||||
|
||||
class DatabaseOptions(DataSource):
|
||||
def __init__(self, rocksdb_options):
|
||||
|
||||
@staticmethod
|
||||
def is_misc_option(option_name):
|
||||
return '.' not in option_name
|
||||
|
||||
@staticmethod
|
||||
def get_options_diff(opt_old, opt_new):
|
||||
# type: Dict[option, Dict[col_fam, value]] X 2 ->
|
||||
# Dict[option, Dict[col_fam, Tuple(old_value, new_value)]]
|
||||
# note: diff should contain a tuple of values only if they are
|
||||
# different from each other
|
||||
options_union = set(opt_old.keys()).union(set(opt_new.keys()))
|
||||
diff = {}
|
||||
for opt in options_union:
|
||||
diff[opt] = {}
|
||||
# if option in options_union, then it must be in one of the configs
|
||||
if opt not in opt_old:
|
||||
for col_fam in opt_new[opt]:
|
||||
diff[opt][col_fam] = (None, opt_new[opt][col_fam])
|
||||
elif opt not in opt_new:
|
||||
for col_fam in opt_old[opt]:
|
||||
diff[opt][col_fam] = (opt_old[opt][col_fam], None)
|
||||
else:
|
||||
for col_fam in opt_old[opt]:
|
||||
if col_fam in opt_new[opt]:
|
||||
if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
|
||||
diff[opt][col_fam] = (
|
||||
opt_old[opt][col_fam],
|
||||
opt_new[opt][col_fam]
|
||||
)
|
||||
else:
|
||||
diff[opt][col_fam] = (opt_old[opt][col_fam], None)
|
||||
for col_fam in opt_new[opt]:
|
||||
if col_fam in opt_old[opt]:
|
||||
if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
|
||||
diff[opt][col_fam] = (
|
||||
opt_old[opt][col_fam],
|
||||
opt_new[opt][col_fam]
|
||||
)
|
||||
else:
|
||||
diff[opt][col_fam] = (None, opt_new[opt][col_fam])
|
||||
if not diff[opt]:
|
||||
diff.pop(opt)
|
||||
return diff
|
||||
|
||||
def __init__(self, rocksdb_options, misc_options=None):
|
||||
super().__init__(DataSource.Type.DB_OPTIONS)
|
||||
self.options_path = rocksdb_options
|
||||
# Load the options from the given file to a dictionary.
|
||||
self.load_from_source()
|
||||
# The options are stored in the following data structure:
|
||||
# Dict[section_type, Dict[section_name, Dict[option_name, value]]]
|
||||
self.options_dict = None
|
||||
self.column_families = None
|
||||
# Load the options from the given file to a dictionary.
|
||||
self.load_from_source(rocksdb_options)
|
||||
# Setup the miscellaneous options expected to be List[str], where each
|
||||
# element in the List has the format "<option_name>=<option_value>"
|
||||
# These options are the ones that are not yet supported by the Rocksdb
|
||||
# OPTIONS file, so they are provided separately
|
||||
self.setup_misc_options(misc_options)
|
||||
|
||||
def load_from_source(self):
|
||||
def setup_misc_options(self, misc_options):
|
||||
self.misc_options = {}
|
||||
if misc_options:
|
||||
for option_pair_str in misc_options:
|
||||
option_name = option_pair_str.split('=')[0].strip()
|
||||
option_value = option_pair_str.split('=')[1].strip()
|
||||
self.misc_options[option_name] = option_value
|
||||
|
||||
def load_from_source(self, options_path):
|
||||
self.options_dict = {}
|
||||
with open(self.options_path, 'r') as db_options:
|
||||
with open(options_path, 'r') as db_options:
|
||||
for line in db_options:
|
||||
line = OptionsSpecParser.remove_trailing_comment(line)
|
||||
if not line:
|
||||
continue
|
||||
if OptionsSpecParser.is_section_header(line):
|
||||
curr_sec_type = OptionsSpecParser.get_section_type(line)
|
||||
curr_sec_type = (
|
||||
OptionsSpecParser.get_section_type(line)
|
||||
)
|
||||
curr_sec_name = OptionsSpecParser.get_section_name(line)
|
||||
if curr_sec_name:
|
||||
option_prefix = curr_sec_name + '.' + curr_sec_type
|
||||
if curr_sec_type == 'CFOptions':
|
||||
if not self.column_families:
|
||||
self.column_families = []
|
||||
self.column_families.append(curr_sec_name)
|
||||
else:
|
||||
option_prefix = curr_sec_type
|
||||
if curr_sec_type not in self.options_dict:
|
||||
self.options_dict[curr_sec_type] = {}
|
||||
if not curr_sec_name:
|
||||
curr_sec_name = NO_COL_FAMILY
|
||||
self.options_dict[curr_sec_type][curr_sec_name] = {}
|
||||
# example: if the line read from the Rocksdb OPTIONS file
|
||||
# is [CFOptions "default"], then the section type is
|
||||
# CFOptions and 'default' is the name of a column family
|
||||
# that for this database, so it's added to the list of
|
||||
# column families stored in this object
|
||||
if curr_sec_type == 'CFOptions':
|
||||
if not self.column_families:
|
||||
self.column_families = []
|
||||
self.column_families.append(curr_sec_name)
|
||||
elif OptionsSpecParser.is_new_option(line):
|
||||
key, value = OptionsSpecParser.get_key_value_pair(line)
|
||||
if not self.options_dict:
|
||||
self.options_dict = {}
|
||||
self.options_dict[option_prefix + '.' + key] = value
|
||||
self.options_dict[curr_sec_type][curr_sec_name][key] = (
|
||||
value
|
||||
)
|
||||
else:
|
||||
error = 'Not able to parse line in Options file.'
|
||||
OptionsSpecParser.exit_with_parse_error(line, error)
|
||||
|
||||
def check_and_trigger_conditions(self, conditions):
|
||||
'''
|
||||
For every condition, if the fields are not present set_trigger will
|
||||
not be called for it. Or if all the fields are present, then the
|
||||
trigger will be set to whatever the expression evaluates to.
|
||||
'''
|
||||
for cond in conditions:
|
||||
# This contains the indices of options to whose name the column
|
||||
# family name needs to be prepended in order to create the full
|
||||
# option name as parsed from the options file.
|
||||
incomplete_option_ix = []
|
||||
ix = 0
|
||||
options = []
|
||||
for option in cond.options:
|
||||
if option in self.options_dict.keys():
|
||||
options.append(self.options_dict[option])
|
||||
else:
|
||||
incomplete_option_ix.append(ix)
|
||||
options.append(0)
|
||||
ix += 1
|
||||
def get_misc_options(self):
|
||||
# these are options that are not yet supported by the Rocksdb OPTIONS
|
||||
# file, hence they are provided and stored separately
|
||||
return self.misc_options
|
||||
|
||||
# if all the options were present as is:
|
||||
if not incomplete_option_ix:
|
||||
if not eval(cond.eval_expr):
|
||||
cond.set_trigger(cond.eval_expr)
|
||||
def get_column_families(self):
|
||||
return self.column_families
|
||||
|
||||
def get_all_options(self):
|
||||
# This method returns all the options that are stored in this object as
|
||||
# a: Dict[<sec_type>.<option_name>: Dict[col_fam, option_value]]
|
||||
all_options = []
|
||||
# Example: in the section header '[CFOptions "default"]' read from the
|
||||
# OPTIONS file, sec_type='CFOptions'
|
||||
for sec_type in self.options_dict:
|
||||
for col_fam in self.options_dict[sec_type]:
|
||||
for opt_name in self.options_dict[sec_type][col_fam]:
|
||||
option = sec_type + '.' + opt_name
|
||||
all_options.append(option)
|
||||
all_options.extend(list(self.misc_options.keys()))
|
||||
return self.get_options(all_options)
|
||||
|
||||
def get_options(self, reqd_options):
|
||||
# type: List[str] -> Dict[str, Dict[str, Any]]
|
||||
# List[option] -> Dict[option, Dict[col_fam, value]]
|
||||
reqd_options_dict = {}
|
||||
for option in reqd_options:
|
||||
if DatabaseOptions.is_misc_option(option):
|
||||
# the option is not prefixed by '<section_type>.' because it is
|
||||
# not yet supported by the Rocksdb OPTIONS file; so it has to
|
||||
# be fetched from the misc_options dictionary
|
||||
if option not in self.misc_options:
|
||||
continue
|
||||
if option not in reqd_options_dict:
|
||||
reqd_options_dict[option] = {}
|
||||
reqd_options_dict[option][NO_COL_FAMILY] = (
|
||||
self.misc_options[option]
|
||||
)
|
||||
else:
|
||||
# Example: option = 'TableOptions.BlockBasedTable.block_align'
|
||||
# then, sec_type = 'TableOptions.BlockBasedTable'
|
||||
sec_type = '.'.join(option.split('.')[:-1])
|
||||
# opt_name = 'block_align'
|
||||
opt_name = option.split('.')[-1]
|
||||
if sec_type not in self.options_dict:
|
||||
continue
|
||||
for col_fam in self.options_dict[sec_type]:
|
||||
if opt_name in self.options_dict[sec_type][col_fam]:
|
||||
if option not in reqd_options_dict:
|
||||
reqd_options_dict[option] = {}
|
||||
reqd_options_dict[option][col_fam] = (
|
||||
self.options_dict[sec_type][col_fam][opt_name]
|
||||
)
|
||||
return reqd_options_dict
|
||||
|
||||
def update_options(self, options):
|
||||
# An example 'options' object looks like:
|
||||
# {'DBOptions.max_background_jobs': {NO_COL_FAMILY: 2},
|
||||
# 'CFOptions.write_buffer_size': {'default': 1048576, 'cf_A': 128000},
|
||||
# 'bloom_bits': {NO_COL_FAMILY: 4}}
|
||||
for option in options:
|
||||
if DatabaseOptions.is_misc_option(option):
|
||||
# this is a misc_option i.e. an option that is not yet
|
||||
# supported by the Rocksdb OPTIONS file, so it is not prefixed
|
||||
# by '<section_type>.' and must be stored in the separate
|
||||
# misc_options dictionary
|
||||
if NO_COL_FAMILY not in options[option]:
|
||||
print(
|
||||
'WARNING(DatabaseOptions.update_options): not ' +
|
||||
'updating option ' + option + ' because it is in ' +
|
||||
'misc_option format but its scope is not ' +
|
||||
NO_COL_FAMILY + '. Check format of option.'
|
||||
)
|
||||
continue
|
||||
self.misc_options[option] = options[option][NO_COL_FAMILY]
|
||||
else:
|
||||
sec_name = '.'.join(option.split('.')[:-1])
|
||||
opt_name = option.split('.')[-1]
|
||||
if sec_name not in self.options_dict:
|
||||
self.options_dict[sec_name] = {}
|
||||
for col_fam in options[option]:
|
||||
# if the option is not already present in the dictionary,
|
||||
# it will be inserted, else it will be updated to the new
|
||||
# value
|
||||
if col_fam not in self.options_dict[sec_name]:
|
||||
self.options_dict[sec_name][col_fam] = {}
|
||||
self.options_dict[sec_name][col_fam][opt_name] = (
|
||||
copy.deepcopy(options[option][col_fam])
|
||||
)
|
||||
|
||||
def generate_options_config(self, nonce):
|
||||
# this method generates a Rocksdb OPTIONS file in the INI format from
|
||||
# the options stored in self.options_dict
|
||||
this_path = os.path.abspath(os.path.dirname(__file__))
|
||||
file_name = '../temp/OPTIONS_' + str(nonce) + '.tmp'
|
||||
file_path = os.path.join(this_path, file_name)
|
||||
with open(file_path, 'w') as fp:
|
||||
for section in self.options_dict:
|
||||
for col_fam in self.options_dict[section]:
|
||||
fp.write(
|
||||
OptionsSpecParser.get_section_str(section, col_fam) +
|
||||
'\n'
|
||||
)
|
||||
for option in self.options_dict[section][col_fam]:
|
||||
values = self.options_dict[section][col_fam][option]
|
||||
fp.write(
|
||||
OptionsSpecParser.get_option_str(option, values) +
|
||||
'\n'
|
||||
)
|
||||
fp.write('\n')
|
||||
return file_path
|
||||
|
||||
def check_and_trigger_conditions(self, conditions):
|
||||
for cond in conditions:
|
||||
reqd_options_dict = self.get_options(cond.options)
|
||||
# This contains the indices of options that are specific to some
|
||||
# column family and are not database-wide options.
|
||||
incomplete_option_ix = []
|
||||
options = []
|
||||
missing_reqd_option = False
|
||||
for ix, option in enumerate(cond.options):
|
||||
if option not in reqd_options_dict:
|
||||
print(
|
||||
'WARNING(DatabaseOptions.check_and_trigger): ' +
|
||||
'skipping condition ' + cond.name + ' because it '
|
||||
'requires option ' + option + ' but this option is' +
|
||||
' not available'
|
||||
)
|
||||
missing_reqd_option = True
|
||||
break # required option is absent
|
||||
if NO_COL_FAMILY in reqd_options_dict[option]:
|
||||
options.append(reqd_options_dict[option][NO_COL_FAMILY])
|
||||
else:
|
||||
options.append(None)
|
||||
incomplete_option_ix.append(ix)
|
||||
|
||||
if missing_reqd_option:
|
||||
continue
|
||||
|
||||
# for all the options that were not present as is, we prepend them
|
||||
# their names with every column family found in options file.
|
||||
# if all the options are database-wide options
|
||||
if not incomplete_option_ix:
|
||||
try:
|
||||
if eval(cond.eval_expr):
|
||||
cond.set_trigger({NO_COL_FAMILY: options})
|
||||
except Exception as e:
|
||||
print(
|
||||
'WARNING(DatabaseOptions) check_and_trigger:' + str(e)
|
||||
)
|
||||
continue
|
||||
|
||||
# for all the options that are not database-wide, we look for their
|
||||
# values specific to column families
|
||||
col_fam_options_dict = {}
|
||||
for col_fam in self.column_families:
|
||||
present = True
|
||||
for ix in incomplete_option_ix:
|
||||
full_option = col_fam + '.' + cond.options[ix]
|
||||
if full_option not in self.options_dict.keys():
|
||||
option = cond.options[ix]
|
||||
if col_fam not in reqd_options_dict[option]:
|
||||
present = False
|
||||
break
|
||||
options[ix] = self.options_dict[full_option]
|
||||
if present and not eval(cond.eval_expr):
|
||||
cond.set_trigger(cond.eval_expr)
|
||||
options[ix] = reqd_options_dict[option][col_fam]
|
||||
if present:
|
||||
try:
|
||||
if eval(cond.eval_expr):
|
||||
col_fam_options_dict[col_fam] = (
|
||||
copy.deepcopy(options)
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
'WARNING(DatabaseOptions) check_and_trigger: ' +
|
||||
str(e)
|
||||
)
|
||||
# Trigger for an OptionCondition object is of the form:
|
||||
# Dict[col_fam_name: List[option_value]]
|
||||
# where col_fam_name is the name of a column family for which
|
||||
# 'eval_expr' evaluated to True and List[option_value] is the list
|
||||
# of values of the options specified in the condition's 'options'
|
||||
# field
|
||||
if col_fam_options_dict:
|
||||
cond.set_trigger(col_fam_options_dict)
|
||||
|
||||
|
||||
# TODO(poojam23): remove these methods once the unit tests for this class are
|
||||
# in place
|
||||
def main():
|
||||
options_file = 'temp/OPTIONS_default.tmp'
|
||||
misc_options = ["misc_opt1=10", "misc_opt2=100", "misc_opt3=1000"]
|
||||
db_options = DatabaseOptions(options_file, misc_options)
|
||||
print(db_options.get_column_families())
|
||||
get_op = db_options.get_options([
|
||||
'DBOptions.db_log_dir',
|
||||
'DBOptions.is_fd_close_on_exec',
|
||||
'CFOptions.memtable_prefix_bloom_size_ratio',
|
||||
'TableOptions.BlockBasedTable.verify_compression',
|
||||
'misc_opt1',
|
||||
'misc_opt3'
|
||||
])
|
||||
print(get_op)
|
||||
get_op['DBOptions.db_log_dir'][NO_COL_FAMILY] = 'some_random_path'
|
||||
get_op['CFOptions.memtable_prefix_bloom_size_ratio']['default'] = 2.31
|
||||
get_op['TableOptions.BlockBasedTable.verify_compression']['default'] = 4.4
|
||||
get_op['misc_opt2'] = {}
|
||||
get_op['misc_opt2'][NO_COL_FAMILY] = 2
|
||||
db_options.update_options(get_op)
|
||||
print('options updated in ' + db_options.generate_options_config(123))
|
||||
print('misc options ' + repr(db_options.get_misc_options()))
|
||||
|
||||
options_file = 'temp/OPTIONS_123.tmp'
|
||||
db_options = DatabaseOptions(options_file, misc_options)
|
||||
# only CFOptions
|
||||
cond1 = Condition('opt-cond-1')
|
||||
cond1 = OptionCondition.create(cond1)
|
||||
cond1.set_parameter(
|
||||
'options', [
|
||||
'CFOptions.level0_file_num_compaction_trigger',
|
||||
'CFOptions.write_buffer_size',
|
||||
'CFOptions.max_bytes_for_level_base'
|
||||
]
|
||||
)
|
||||
cond1.set_parameter(
|
||||
'evaluate',
|
||||
'int(options[0])*int(options[1])-int(options[2])>=0'
|
||||
)
|
||||
# only DBOptions
|
||||
cond2 = Condition('opt-cond-2')
|
||||
cond2 = OptionCondition.create(cond2)
|
||||
cond2.set_parameter(
|
||||
'options', [
|
||||
'DBOptions.max_file_opening_threads',
|
||||
'DBOptions.table_cache_numshardbits',
|
||||
'misc_opt2',
|
||||
'misc_opt3'
|
||||
]
|
||||
)
|
||||
cond2_expr = (
|
||||
'(int(options[0])*int(options[2]))-' +
|
||||
'((4*int(options[1])*int(options[3]))/10)==0'
|
||||
)
|
||||
cond2.set_parameter('evaluate', cond2_expr)
|
||||
# mix of CFOptions and DBOptions
|
||||
cond3 = Condition('opt-cond-3')
|
||||
cond3 = OptionCondition.create(cond3)
|
||||
cond3.set_parameter(
|
||||
'options', [
|
||||
'DBOptions.max_background_jobs', # 2
|
||||
'DBOptions.write_thread_slow_yield_usec', # 3
|
||||
'CFOptions.num_levels', # 7
|
||||
'misc_opt1' # 10
|
||||
]
|
||||
)
|
||||
cond3_expr = (
|
||||
'(int(options[3])*int(options[2]))-' +
|
||||
'(int(options[1])*int(options[0]))==64'
|
||||
)
|
||||
cond3.set_parameter('evaluate', cond3_expr)
|
||||
|
||||
db_options.check_and_trigger_conditions([cond1, cond2, cond3])
|
||||
print(cond1.get_trigger()) # {'col-fam-B': ['4', '10', '10']}
|
||||
print(cond2.get_trigger()) # {'DB_WIDE': ['16', '4']}
|
||||
# {'col-fam-B': ['2', '3', '10'], 'col-fam-A': ['2', '3', '7']}
|
||||
print(cond3.get_trigger())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
421
tools/advisor/advisor/db_stats_fetcher.py
Executable file
421
tools/advisor/advisor/db_stats_fetcher.py
Executable file
@ -0,0 +1,421 @@
|
||||
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
# This source code is licensed under both the GPLv2 (found in the
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from advisor.db_log_parser import Log
|
||||
from advisor.db_timeseries_parser import TimeSeriesData, NO_ENTITY
|
||||
from advisor.rule_parser import Condition, TimeSeriesCondition
|
||||
import copy
|
||||
import glob
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
|
||||
class LogStatsParser(TimeSeriesData):
|
||||
STATS = 'STATISTICS:'
|
||||
|
||||
@staticmethod
|
||||
def parse_log_line_for_stats(log_line):
|
||||
# Example stat line (from LOG file):
|
||||
# "rocksdb.db.get.micros P50 : 8.4 P95 : 21.8 P99 : 33.9 P100 : 92.0\n"
|
||||
token_list = log_line.strip().split()
|
||||
# token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':',
|
||||
# '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0']
|
||||
stat_prefix = token_list[0] + '.' # 'rocksdb.db.get.micros.'
|
||||
stat_values = [
|
||||
token
|
||||
for token in token_list[1:]
|
||||
if token != ':'
|
||||
]
|
||||
# stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100',
|
||||
# '92.0']
|
||||
stat_dict = {}
|
||||
for ix, metric in enumerate(stat_values):
|
||||
if ix % 2 == 0:
|
||||
stat_name = stat_prefix + metric
|
||||
stat_name = stat_name.lower() # Note: case insensitive names
|
||||
else:
|
||||
stat_dict[stat_name] = float(metric)
|
||||
# stat_dict = {'rocksdb.db.get.micros.p50': 8.4,
|
||||
# 'rocksdb.db.get.micros.p95': 21.8, 'rocksdb.db.get.micros.p99': 33.9,
|
||||
# 'rocksdb.db.get.micros.p100': 92.0}
|
||||
return stat_dict
|
||||
|
||||
def __init__(self, logs_path_prefix, stats_freq_sec):
|
||||
super().__init__()
|
||||
self.logs_file_prefix = logs_path_prefix
|
||||
self.stats_freq_sec = stats_freq_sec
|
||||
self.duration_sec = 60
|
||||
|
||||
def get_keys_from_conditions(self, conditions):
|
||||
# Note: case insensitive stat names
|
||||
reqd_stats = []
|
||||
for cond in conditions:
|
||||
for key in cond.keys:
|
||||
key = key.lower()
|
||||
# some keys are prepended with '[]' for OdsStatsFetcher to
|
||||
# replace this with the appropriate key_prefix, remove these
|
||||
# characters here since the LogStatsParser does not need
|
||||
# a prefix
|
||||
if key.startswith('[]'):
|
||||
reqd_stats.append(key[2:])
|
||||
else:
|
||||
reqd_stats.append(key)
|
||||
return reqd_stats
|
||||
|
||||
def add_to_timeseries(self, log, reqd_stats):
|
||||
# this method takes in the Log object that contains the Rocksdb stats
|
||||
# and a list of required stats, then it parses the stats line by line
|
||||
# to fetch required stats and add them to the keys_ts object
|
||||
# Example: reqd_stats = ['rocksdb.block.cache.hit.count',
|
||||
# 'rocksdb.db.get.micros.p99']
|
||||
# Let log.get_message() returns following string:
|
||||
# "[WARN] [db/db_impl.cc:485] STATISTICS:\n
|
||||
# rocksdb.block.cache.miss COUNT : 1459\n
|
||||
# rocksdb.block.cache.hit COUNT : 37\n
|
||||
# ...
|
||||
# rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n
|
||||
# ..."
|
||||
new_lines = log.get_message().split('\n')
|
||||
# let log_ts = 1532518219
|
||||
log_ts = log.get_timestamp()
|
||||
# example updates to keys_ts:
|
||||
# keys_ts[NO_ENTITY]['rocksdb.db.get.micros.p99'][1532518219] = 62.6
|
||||
# keys_ts[NO_ENTITY]['rocksdb.block.cache.hit.count'][1532518219] = 37
|
||||
for line in new_lines[1:]: # new_lines[0] does not contain any stats
|
||||
stats_on_line = self.parse_log_line_for_stats(line)
|
||||
for stat in stats_on_line:
|
||||
if stat in reqd_stats:
|
||||
if stat not in self.keys_ts[NO_ENTITY]:
|
||||
self.keys_ts[NO_ENTITY][stat] = {}
|
||||
self.keys_ts[NO_ENTITY][stat][log_ts] = stats_on_line[stat]
|
||||
|
||||
def fetch_timeseries(self, reqd_stats):
|
||||
# this method parses the Rocksdb LOG file and generates timeseries for
|
||||
# each of the statistic in the list reqd_stats
|
||||
self.keys_ts = {NO_ENTITY: {}}
|
||||
for file_name in glob.glob(self.logs_file_prefix + '*'):
|
||||
# TODO(poojam23): find a way to distinguish between 'old' log files
|
||||
# from current and previous experiments, present in the same
|
||||
# directory
|
||||
if re.search('old', file_name, re.IGNORECASE):
|
||||
continue
|
||||
with open(file_name, 'r') as db_logs:
|
||||
new_log = None
|
||||
for line in db_logs:
|
||||
if Log.is_new_log(line):
|
||||
if (
|
||||
new_log and
|
||||
re.search(self.STATS, new_log.get_message())
|
||||
):
|
||||
self.add_to_timeseries(new_log, reqd_stats)
|
||||
new_log = Log(line, column_families=[])
|
||||
else:
|
||||
# To account for logs split into multiple lines
|
||||
new_log.append_message(line)
|
||||
# Check for the last log in the file.
|
||||
if new_log and re.search(self.STATS, new_log.get_message()):
|
||||
self.add_to_timeseries(new_log, reqd_stats)
|
||||
|
||||
|
||||
class DatabasePerfContext(TimeSeriesData):
|
||||
# TODO(poojam23): check if any benchrunner provides PerfContext sampled at
|
||||
# regular intervals
|
||||
def __init__(self, perf_context_ts, stats_freq_sec=0, cumulative=True):
|
||||
'''
|
||||
perf_context_ts is expected to be in the following format:
|
||||
Dict[metric, Dict[timestamp, value]], where for
|
||||
each (metric, timestamp) pair, the value is database-wide (i.e.
|
||||
summed over all the threads involved)
|
||||
if stats_freq_sec == 0, per-metric only one value is reported
|
||||
'''
|
||||
super().__init__()
|
||||
self.stats_freq_sec = stats_freq_sec
|
||||
self.keys_ts = {NO_ENTITY: perf_context_ts}
|
||||
if cumulative:
|
||||
self.unaccumulate_metrics()
|
||||
|
||||
def unaccumulate_metrics(self):
|
||||
# if the perf context metrics provided are cumulative in nature, this
|
||||
# method can be used to convert them to a disjoint format
|
||||
epoch_ts = copy.deepcopy(self.keys_ts)
|
||||
for stat in self.keys_ts[NO_ENTITY]:
|
||||
timeseries = sorted(
|
||||
list(self.keys_ts[NO_ENTITY][stat].keys()), reverse=True
|
||||
)
|
||||
if len(timeseries) < 2:
|
||||
continue
|
||||
for ix, ts in enumerate(timeseries[:-1]):
|
||||
epoch_ts[NO_ENTITY][stat][ts] = (
|
||||
epoch_ts[NO_ENTITY][stat][ts] -
|
||||
epoch_ts[NO_ENTITY][stat][timeseries[ix+1]]
|
||||
)
|
||||
if epoch_ts[NO_ENTITY][stat][ts] < 0:
|
||||
raise ValueError('DBPerfContext: really cumulative?')
|
||||
# drop the smallest timestamp in the timeseries for this metric
|
||||
epoch_ts[NO_ENTITY][stat].pop(timeseries[-1])
|
||||
self.keys_ts = epoch_ts
|
||||
|
||||
def get_keys_from_conditions(self, conditions):
|
||||
reqd_stats = []
|
||||
for cond in conditions:
|
||||
reqd_stats.extend([key.lower() for key in cond.keys])
|
||||
return reqd_stats
|
||||
|
||||
def fetch_timeseries(self, statistics):
|
||||
# this method is redundant for DatabasePerfContext because the __init__
|
||||
# does the job of populating 'keys_ts'
|
||||
pass
|
||||
|
||||
|
||||
class OdsStatsFetcher(TimeSeriesData):
|
||||
# class constants
|
||||
OUTPUT_FILE = 'temp/stats_out.tmp'
|
||||
ERROR_FILE = 'temp/stats_err.tmp'
|
||||
RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime"
|
||||
ODS_COMMAND = '%s %s %s' # client, entities, keys
|
||||
|
||||
# static methods
|
||||
@staticmethod
|
||||
def _get_string_in_quotes(value):
|
||||
return '"' + str(value) + '"'
|
||||
|
||||
@staticmethod
|
||||
def _get_time_value_pair(pair_string):
|
||||
# example pair_string: '[1532544591, 97.3653601828]'
|
||||
pair_string = pair_string.replace('[', '')
|
||||
pair_string = pair_string.replace(']', '')
|
||||
pair = pair_string.split(',')
|
||||
first = int(pair[0].strip())
|
||||
second = float(pair[1].strip())
|
||||
return [first, second]
|
||||
|
||||
def __init__(self, client, entities, key_prefix=None):
|
||||
super().__init__()
|
||||
self.client = client
|
||||
self.entities = entities
|
||||
self.key_prefix = key_prefix
|
||||
self.stats_freq_sec = 60
|
||||
self.duration_sec = 60
|
||||
# Fetch last 3 hours data by default
|
||||
self.end_time = int(time.time())
|
||||
self.start_time = self.end_time - (3 * 60 * 60)
|
||||
|
||||
def execute_script(self, command):
|
||||
print('executing...')
|
||||
print(command)
|
||||
out_file = open(self.OUTPUT_FILE, "w+")
|
||||
err_file = open(self.ERROR_FILE, "w+")
|
||||
subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
|
||||
out_file.close()
|
||||
err_file.close()
|
||||
|
||||
def parse_rapido_output(self):
|
||||
# Output looks like the following:
|
||||
# <entity_name>\t<key_name>\t[[ts, value], [ts, value], ...]
|
||||
# ts = timestamp; value = value of key_name in entity_name at time ts
|
||||
self.keys_ts = {}
|
||||
with open(self.OUTPUT_FILE, 'r') as fp:
|
||||
for line in fp:
|
||||
token_list = line.strip().split('\t')
|
||||
entity = token_list[0]
|
||||
key = token_list[1]
|
||||
if entity not in self.keys_ts:
|
||||
self.keys_ts[entity] = {}
|
||||
if key not in self.keys_ts[entity]:
|
||||
self.keys_ts[entity][key] = {}
|
||||
list_of_lists = [
|
||||
self._get_time_value_pair(pair_string)
|
||||
for pair_string in token_list[2].split('],')
|
||||
]
|
||||
value = {pair[0]: pair[1] for pair in list_of_lists}
|
||||
self.keys_ts[entity][key] = value
|
||||
|
||||
def parse_ods_output(self):
|
||||
# Output looks like the following:
|
||||
# <entity_name>\t<key_name>\t<timestamp>\t<value>
|
||||
# there is one line per (entity_name, key_name, timestamp)
|
||||
self.keys_ts = {}
|
||||
with open(self.OUTPUT_FILE, 'r') as fp:
|
||||
for line in fp:
|
||||
token_list = line.split()
|
||||
entity = token_list[0]
|
||||
if entity not in self.keys_ts:
|
||||
self.keys_ts[entity] = {}
|
||||
key = token_list[1]
|
||||
if key not in self.keys_ts[entity]:
|
||||
self.keys_ts[entity][key] = {}
|
||||
self.keys_ts[entity][key][token_list[2]] = token_list[3]
|
||||
|
||||
def fetch_timeseries(self, statistics):
|
||||
# this method fetches the timeseries of required stats from the ODS
|
||||
# service and populates the 'keys_ts' object appropriately
|
||||
print('OdsStatsFetcher: fetching ' + str(statistics))
|
||||
if re.search('rapido', self.client, re.IGNORECASE):
|
||||
command = self.RAPIDO_COMMAND % (
|
||||
self.client,
|
||||
self._get_string_in_quotes(self.entities),
|
||||
self._get_string_in_quotes(','.join(statistics)),
|
||||
self._get_string_in_quotes(self.start_time),
|
||||
self._get_string_in_quotes(self.end_time)
|
||||
)
|
||||
# Run the tool and fetch the time-series data
|
||||
self.execute_script(command)
|
||||
# Parse output and populate the 'keys_ts' map
|
||||
self.parse_rapido_output()
|
||||
elif re.search('ods', self.client, re.IGNORECASE):
|
||||
command = self.ODS_COMMAND % (
|
||||
self.client,
|
||||
self._get_string_in_quotes(self.entities),
|
||||
self._get_string_in_quotes(','.join(statistics))
|
||||
)
|
||||
# Run the tool and fetch the time-series data
|
||||
self.execute_script(command)
|
||||
# Parse output and populate the 'keys_ts' map
|
||||
self.parse_ods_output()
|
||||
|
||||
def get_keys_from_conditions(self, conditions):
|
||||
reqd_stats = []
|
||||
for cond in conditions:
|
||||
for key in cond.keys:
|
||||
use_prefix = False
|
||||
if key.startswith('[]'):
|
||||
use_prefix = True
|
||||
key = key[2:]
|
||||
# TODO(poojam23): this is very hacky and needs to be improved
|
||||
if key.startswith("rocksdb"):
|
||||
key += ".60"
|
||||
if use_prefix:
|
||||
if not self.key_prefix:
|
||||
print('Warning: OdsStatsFetcher might need key prefix')
|
||||
print('for the key: ' + key)
|
||||
else:
|
||||
key = self.key_prefix + "." + key
|
||||
reqd_stats.append(key)
|
||||
return reqd_stats
|
||||
|
||||
def fetch_rate_url(self, entities, keys, window_len, percent, display):
|
||||
# type: (List[str], List[str], str, str, bool) -> str
|
||||
transform_desc = (
|
||||
"rate(" + str(window_len) + ",duration=" + str(self.duration_sec)
|
||||
)
|
||||
if percent:
|
||||
transform_desc = transform_desc + ",%)"
|
||||
else:
|
||||
transform_desc = transform_desc + ")"
|
||||
|
||||
command = self.RAPIDO_COMMAND + " --transform=%s --url=%s"
|
||||
command = command % (
|
||||
self.client,
|
||||
self._get_string_in_quotes(','.join(entities)),
|
||||
self._get_string_in_quotes(','.join(keys)),
|
||||
self._get_string_in_quotes(self.start_time),
|
||||
self._get_string_in_quotes(self.end_time),
|
||||
self._get_string_in_quotes(transform_desc),
|
||||
self._get_string_in_quotes(display)
|
||||
)
|
||||
self.execute_script(command)
|
||||
url = ""
|
||||
with open(self.OUTPUT_FILE, 'r') as fp:
|
||||
url = fp.readline()
|
||||
return url
|
||||
|
||||
|
||||
# TODO(poojam23): remove these blocks once the unittests for LogStatsParser are
|
||||
# in place
|
||||
def main():
|
||||
# populating the statistics
|
||||
log_stats = LogStatsParser('temp/db_stats_fetcher_main_LOG.tmp', 20)
|
||||
print(log_stats.type)
|
||||
print(log_stats.keys_ts)
|
||||
print(log_stats.logs_file_prefix)
|
||||
print(log_stats.stats_freq_sec)
|
||||
print(log_stats.duration_sec)
|
||||
statistics = [
|
||||
'rocksdb.number.rate_limiter.drains.count',
|
||||
'rocksdb.number.block.decompressed.count',
|
||||
'rocksdb.db.get.micros.p50',
|
||||
'rocksdb.manifest.file.sync.micros.p99',
|
||||
'rocksdb.db.get.micros.p99'
|
||||
]
|
||||
log_stats.fetch_timeseries(statistics)
|
||||
print()
|
||||
print(log_stats.keys_ts)
|
||||
# aggregated statistics
|
||||
print()
|
||||
print(log_stats.fetch_aggregated_values(
|
||||
NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.latest
|
||||
))
|
||||
print(log_stats.fetch_aggregated_values(
|
||||
NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.oldest
|
||||
))
|
||||
print(log_stats.fetch_aggregated_values(
|
||||
NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.max
|
||||
))
|
||||
print(log_stats.fetch_aggregated_values(
|
||||
NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.min
|
||||
))
|
||||
print(log_stats.fetch_aggregated_values(
|
||||
NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.avg
|
||||
))
|
||||
# condition 'evaluate_expression' that evaluates to true
|
||||
cond1 = Condition('cond-1')
|
||||
cond1 = TimeSeriesCondition.create(cond1)
|
||||
cond1.set_parameter('keys', statistics)
|
||||
cond1.set_parameter('behavior', 'evaluate_expression')
|
||||
cond1.set_parameter('evaluate', 'keys[3]-keys[2]>=0')
|
||||
cond1.set_parameter('aggregation_op', 'avg')
|
||||
# condition 'evaluate_expression' that evaluates to false
|
||||
cond2 = Condition('cond-2')
|
||||
cond2 = TimeSeriesCondition.create(cond2)
|
||||
cond2.set_parameter('keys', statistics)
|
||||
cond2.set_parameter('behavior', 'evaluate_expression')
|
||||
cond2.set_parameter('evaluate', '((keys[1]-(2*keys[0]))/100)<3000')
|
||||
cond2.set_parameter('aggregation_op', 'latest')
|
||||
# condition 'evaluate_expression' that evaluates to true; no aggregation_op
|
||||
cond3 = Condition('cond-3')
|
||||
cond3 = TimeSeriesCondition.create(cond3)
|
||||
cond3.set_parameter('keys', [statistics[2], statistics[3]])
|
||||
cond3.set_parameter('behavior', 'evaluate_expression')
|
||||
cond3.set_parameter('evaluate', '(keys[1]/keys[0])>23')
|
||||
# check remaining methods
|
||||
conditions = [cond1, cond2, cond3]
|
||||
print()
|
||||
print(log_stats.get_keys_from_conditions(conditions))
|
||||
log_stats.check_and_trigger_conditions(conditions)
|
||||
print()
|
||||
print(cond1.get_trigger())
|
||||
print(cond2.get_trigger())
|
||||
print(cond3.get_trigger())
|
||||
|
||||
|
||||
# TODO(poojam23): shift this code to the unit tests for DatabasePerfContext
|
||||
def check_perf_context_code():
|
||||
string = (
|
||||
" user_key_comparison_count = 675903942, " +
|
||||
"block_cache_hit_count = 830086, " +
|
||||
"get_from_output_files_time = 85088293818, " +
|
||||
"seek_on_memtable_time = 0,"
|
||||
)
|
||||
token_list = string.split(',')
|
||||
perf_context = {
|
||||
token.split('=')[0].strip(): int(token.split('=')[1].strip())
|
||||
for token in token_list
|
||||
if token
|
||||
}
|
||||
timestamp = int(time.time())
|
||||
perf_ts = {}
|
||||
for key in perf_context:
|
||||
perf_ts[key] = {}
|
||||
start_val = perf_context[key]
|
||||
for ix in range(5):
|
||||
perf_ts[key][timestamp+(ix*10)] = start_val + (2 * ix)
|
||||
db_perf_context = DatabasePerfContext(perf_ts, 10, True)
|
||||
print(db_perf_context.keys_ts)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
check_perf_context_code()
|
208
tools/advisor/advisor/db_timeseries_parser.py
Normal file
208
tools/advisor/advisor/db_timeseries_parser.py
Normal file
@ -0,0 +1,208 @@
|
||||
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
# This source code is licensed under both the GPLv2 (found in the
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from abc import abstractmethod
|
||||
from advisor.db_log_parser import DataSource
|
||||
from enum import Enum
|
||||
import math
|
||||
|
||||
|
||||
NO_ENTITY = 'ENTITY_PLACEHOLDER'
|
||||
|
||||
|
||||
class TimeSeriesData(DataSource):
|
||||
class Behavior(Enum):
|
||||
bursty = 1
|
||||
evaluate_expression = 2
|
||||
|
||||
class AggregationOperator(Enum):
|
||||
avg = 1
|
||||
max = 2
|
||||
min = 3
|
||||
latest = 4
|
||||
oldest = 5
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(DataSource.Type.TIME_SERIES)
|
||||
self.keys_ts = None # Dict[entity, Dict[key, Dict[timestamp, value]]]
|
||||
self.stats_freq_sec = None
|
||||
|
||||
@abstractmethod
|
||||
def get_keys_from_conditions(self, conditions):
|
||||
# This method takes in a list of time-series conditions; for each
|
||||
# condition it manipulates the 'keys' in the way that is supported by
|
||||
# the subclass implementing this method
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def fetch_timeseries(self, required_statistics):
|
||||
# this method takes in a list of statistics and fetches the timeseries
|
||||
# for each of them and populates the 'keys_ts' dictionary
|
||||
pass
|
||||
|
||||
def fetch_burst_epochs(
|
||||
self, entities, statistic, window_sec, threshold, percent
|
||||
):
|
||||
# type: (str, int, float, bool) -> Dict[str, Dict[int, float]]
|
||||
# this method calculates the (percent) rate change in the 'statistic'
|
||||
# for each entity (over 'window_sec' seconds) and returns the epochs
|
||||
# where this rate change is greater than or equal to the 'threshold'
|
||||
# value
|
||||
if self.stats_freq_sec == 0:
|
||||
# not time series data, cannot check for bursty behavior
|
||||
return
|
||||
if window_sec < self.stats_freq_sec:
|
||||
window_sec = self.stats_freq_sec
|
||||
# 'window_samples' is the number of windows to go back to
|
||||
# compare the current window with, while calculating rate change.
|
||||
window_samples = math.ceil(window_sec / self.stats_freq_sec)
|
||||
burst_epochs = {}
|
||||
# if percent = False:
|
||||
# curr_val = value at window for which rate change is being calculated
|
||||
# prev_val = value at window that is window_samples behind curr_window
|
||||
# Then rate_without_percent =
|
||||
# ((curr_val-prev_val)*duration_sec)/(curr_timestamp-prev_timestamp)
|
||||
# if percent = True:
|
||||
# rate_with_percent = (rate_without_percent * 100) / prev_val
|
||||
# These calculations are in line with the rate() transform supported
|
||||
# by ODS
|
||||
for entity in entities:
|
||||
if statistic not in self.keys_ts[entity]:
|
||||
continue
|
||||
timestamps = sorted(list(self.keys_ts[entity][statistic].keys()))
|
||||
for ix in range(window_samples, len(timestamps), 1):
|
||||
first_ts = timestamps[ix - window_samples]
|
||||
last_ts = timestamps[ix]
|
||||
first_val = self.keys_ts[entity][statistic][first_ts]
|
||||
last_val = self.keys_ts[entity][statistic][last_ts]
|
||||
diff = last_val - first_val
|
||||
if percent:
|
||||
diff = diff * 100 / first_val
|
||||
rate = (diff * self.duration_sec) / (last_ts - first_ts)
|
||||
# if the rate change is greater than the provided threshold,
|
||||
# then the condition is triggered for entity at time 'last_ts'
|
||||
if rate >= threshold:
|
||||
if entity not in burst_epochs:
|
||||
burst_epochs[entity] = {}
|
||||
burst_epochs[entity][last_ts] = rate
|
||||
return burst_epochs
|
||||
|
||||
def fetch_aggregated_values(self, entity, statistics, aggregation_op):
|
||||
# type: (str, AggregationOperator) -> Dict[str, float]
|
||||
# this method performs the aggregation specified by 'aggregation_op'
|
||||
# on the timeseries of 'statistics' for 'entity' and returns:
|
||||
# Dict[statistic, aggregated_value]
|
||||
result = {}
|
||||
for stat in statistics:
|
||||
if stat not in self.keys_ts[entity]:
|
||||
continue
|
||||
agg_val = None
|
||||
if aggregation_op is self.AggregationOperator.latest:
|
||||
latest_timestamp = max(list(self.keys_ts[entity][stat].keys()))
|
||||
agg_val = self.keys_ts[entity][stat][latest_timestamp]
|
||||
elif aggregation_op is self.AggregationOperator.oldest:
|
||||
oldest_timestamp = min(list(self.keys_ts[entity][stat].keys()))
|
||||
agg_val = self.keys_ts[entity][stat][oldest_timestamp]
|
||||
elif aggregation_op is self.AggregationOperator.max:
|
||||
agg_val = max(list(self.keys_ts[entity][stat].values()))
|
||||
elif aggregation_op is self.AggregationOperator.min:
|
||||
agg_val = min(list(self.keys_ts[entity][stat].values()))
|
||||
elif aggregation_op is self.AggregationOperator.avg:
|
||||
values = list(self.keys_ts[entity][stat].values())
|
||||
agg_val = sum(values) / len(values)
|
||||
result[stat] = agg_val
|
||||
return result
|
||||
|
||||
def check_and_trigger_conditions(self, conditions):
|
||||
# get the list of statistics that need to be fetched
|
||||
reqd_keys = self.get_keys_from_conditions(conditions)
|
||||
# fetch the required statistics and populate the map 'keys_ts'
|
||||
self.fetch_timeseries(reqd_keys)
|
||||
# Trigger the appropriate conditions
|
||||
for cond in conditions:
|
||||
complete_keys = self.get_keys_from_conditions([cond])
|
||||
# Get the entities that have all statistics required by 'cond':
|
||||
# an entity is checked for a given condition only if we possess all
|
||||
# of the condition's 'keys' for that entity
|
||||
entities_with_stats = []
|
||||
for entity in self.keys_ts:
|
||||
stat_missing = False
|
||||
for stat in complete_keys:
|
||||
if stat not in self.keys_ts[entity]:
|
||||
stat_missing = True
|
||||
break
|
||||
if not stat_missing:
|
||||
entities_with_stats.append(entity)
|
||||
if not entities_with_stats:
|
||||
continue
|
||||
if cond.behavior is self.Behavior.bursty:
|
||||
# for a condition that checks for bursty behavior, only one key
|
||||
# should be present in the condition's 'keys' field
|
||||
result = self.fetch_burst_epochs(
|
||||
entities_with_stats,
|
||||
complete_keys[0], # there should be only one key
|
||||
cond.window_sec,
|
||||
cond.rate_threshold,
|
||||
True
|
||||
)
|
||||
# Trigger in this case is:
|
||||
# Dict[entity_name, Dict[timestamp, rate_change]]
|
||||
# where the inner dictionary contains rate_change values when
|
||||
# the rate_change >= threshold provided, with the
|
||||
# corresponding timestamps
|
||||
if result:
|
||||
cond.set_trigger(result)
|
||||
elif cond.behavior is self.Behavior.evaluate_expression:
|
||||
self.handle_evaluate_expression(
|
||||
cond,
|
||||
complete_keys,
|
||||
entities_with_stats
|
||||
)
|
||||
|
||||
def handle_evaluate_expression(self, condition, statistics, entities):
|
||||
trigger = {}
|
||||
# check 'condition' for each of these entities
|
||||
for entity in entities:
|
||||
if hasattr(condition, 'aggregation_op'):
|
||||
# in this case, the aggregation operation is performed on each
|
||||
# of the condition's 'keys' and then with aggregated values
|
||||
# condition's 'expression' is evaluated; if it evaluates to
|
||||
# True, then list of the keys values is added to the
|
||||
# condition's trigger: Dict[entity_name, List[stats]]
|
||||
result = self.fetch_aggregated_values(
|
||||
entity, statistics, condition.aggregation_op
|
||||
)
|
||||
keys = [result[key] for key in statistics]
|
||||
try:
|
||||
if eval(condition.expression):
|
||||
trigger[entity] = keys
|
||||
except Exception as e:
|
||||
print(
|
||||
'WARNING(TimeSeriesData) check_and_trigger: ' + str(e)
|
||||
)
|
||||
else:
|
||||
# assumption: all stats have same series of timestamps
|
||||
# this is similar to the above but 'expression' is evaluated at
|
||||
# each timestamp, since there is no aggregation, and all the
|
||||
# epochs are added to the trigger when the condition's
|
||||
# 'expression' evaluated to true; so trigger is:
|
||||
# Dict[entity, Dict[timestamp, List[stats]]]
|
||||
for epoch in self.keys_ts[entity][statistics[0]].keys():
|
||||
keys = [
|
||||
self.keys_ts[entity][key][epoch]
|
||||
for key in statistics
|
||||
]
|
||||
try:
|
||||
if eval(condition.expression):
|
||||
if entity not in trigger:
|
||||
trigger[entity] = {}
|
||||
trigger[entity][epoch] = keys
|
||||
except Exception as e:
|
||||
print(
|
||||
'WARNING(TimeSeriesData) check_and_trigger: ' +
|
||||
str(e)
|
||||
)
|
||||
if trigger:
|
||||
condition.set_trigger(trigger)
|
@ -62,8 +62,8 @@ class IniParser:
|
||||
def get_key_value_pair(line):
|
||||
line = line.strip()
|
||||
key = line.split('=')[0].strip()
|
||||
value = line.split('=')[1].strip()
|
||||
if not value:
|
||||
value = "=".join(line.split('=')[1:])
|
||||
if value == "": # if the option has no value
|
||||
return (key, None)
|
||||
values = IniParser.get_list_from_value(value)
|
||||
if len(values) == 1:
|
||||
|
@ -4,11 +4,11 @@
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import argparse
|
||||
from advisor.db_log_parser import DatabaseLogs, DataSource
|
||||
from advisor.db_options_parser import DatabaseOptions
|
||||
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
|
||||
from advisor.db_timeseries_parser import TimeSeriesData
|
||||
from enum import Enum
|
||||
from advisor.ini_parser import IniParser
|
||||
import re
|
||||
|
||||
|
||||
class Section(ABC):
|
||||
@ -29,6 +29,9 @@ class Rule(Section):
|
||||
super().__init__(name)
|
||||
self.conditions = None
|
||||
self.suggestions = None
|
||||
self.overlap_time_seconds = None
|
||||
self.trigger_entities = None
|
||||
self.trigger_column_families = None
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
# If the Rule is associated with a single suggestion/condition, then
|
||||
@ -45,6 +48,8 @@ class Rule(Section):
|
||||
self.suggestions = [value]
|
||||
else:
|
||||
self.suggestions = value
|
||||
elif key == 'overlap_time_period':
|
||||
self.overlap_time_seconds = value
|
||||
|
||||
def get_suggestions(self):
|
||||
return self.suggestions
|
||||
@ -58,12 +63,133 @@ class Rule(Section):
|
||||
raise ValueError(
|
||||
self.name + ': rule must have at least one suggestion'
|
||||
)
|
||||
if self.overlap_time_seconds:
|
||||
if len(self.conditions) != 2:
|
||||
raise ValueError(
|
||||
self.name + ": rule must be associated with 2 conditions\
|
||||
in order to check for a time dependency between them"
|
||||
)
|
||||
time_format = '^\d+[s|m|h|d]$'
|
||||
if (
|
||||
not
|
||||
re.match(time_format, self.overlap_time_seconds, re.IGNORECASE)
|
||||
):
|
||||
raise ValueError(
|
||||
self.name + ": overlap_time_seconds format: \d+[s|m|h|d]"
|
||||
)
|
||||
else: # convert to seconds
|
||||
in_seconds = int(self.overlap_time_seconds[:-1])
|
||||
if self.overlap_time_seconds[-1] == 'm':
|
||||
in_seconds *= 60
|
||||
elif self.overlap_time_seconds[-1] == 'h':
|
||||
in_seconds *= (60 * 60)
|
||||
elif self.overlap_time_seconds[-1] == 'd':
|
||||
in_seconds *= (24 * 60 * 60)
|
||||
self.overlap_time_seconds = in_seconds
|
||||
|
||||
def is_triggered(self, conditions_dict):
|
||||
condition_triggers = []
|
||||
for cond in self.conditions:
|
||||
condition_triggers.append(conditions_dict[cond].is_triggered())
|
||||
return all(condition_triggers)
|
||||
def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs):
|
||||
# this method takes in 2 timeseries i.e. timestamps at which the
|
||||
# rule's 2 TIME_SERIES conditions were triggered and it finds
|
||||
# (if present) the first pair of timestamps at which the 2 conditions
|
||||
# were triggered within 'overlap_time_seconds' of each other
|
||||
key1_lower_bounds = [
|
||||
epoch - self.overlap_time_seconds
|
||||
for epoch in key1_trigger_epochs
|
||||
]
|
||||
key1_lower_bounds.sort()
|
||||
key2_trigger_epochs.sort()
|
||||
trigger_ix = 0
|
||||
overlap_pair = None
|
||||
for key1_lb in key1_lower_bounds:
|
||||
while (
|
||||
key2_trigger_epochs[trigger_ix] < key1_lb and
|
||||
trigger_ix < len(key2_trigger_epochs)
|
||||
):
|
||||
trigger_ix += 1
|
||||
if trigger_ix >= len(key2_trigger_epochs):
|
||||
break
|
||||
if (
|
||||
key2_trigger_epochs[trigger_ix] <=
|
||||
key1_lb + (2 * self.overlap_time_seconds)
|
||||
):
|
||||
overlap_pair = (
|
||||
key2_trigger_epochs[trigger_ix],
|
||||
key1_lb + self.overlap_time_seconds
|
||||
)
|
||||
break
|
||||
return overlap_pair
|
||||
|
||||
def get_trigger_entities(self):
|
||||
return self.trigger_entities
|
||||
|
||||
def get_trigger_column_families(self):
|
||||
return self.trigger_column_families
|
||||
|
||||
def is_triggered(self, conditions_dict, column_families):
|
||||
if self.overlap_time_seconds:
|
||||
condition1 = conditions_dict[self.conditions[0]]
|
||||
condition2 = conditions_dict[self.conditions[1]]
|
||||
if not (
|
||||
condition1.get_data_source() is DataSource.Type.TIME_SERIES and
|
||||
condition2.get_data_source() is DataSource.Type.TIME_SERIES
|
||||
):
|
||||
raise ValueError(self.name + ': need 2 timeseries conditions')
|
||||
|
||||
map1 = condition1.get_trigger()
|
||||
map2 = condition2.get_trigger()
|
||||
if not (map1 and map2):
|
||||
return False
|
||||
|
||||
self.trigger_entities = {}
|
||||
is_triggered = False
|
||||
entity_intersection = (
|
||||
set(map1.keys()).intersection(set(map2.keys()))
|
||||
)
|
||||
for entity in entity_intersection:
|
||||
overlap_timestamps_pair = (
|
||||
self.get_overlap_timestamps(
|
||||
list(map1[entity].keys()), list(map2[entity].keys())
|
||||
)
|
||||
)
|
||||
if overlap_timestamps_pair:
|
||||
self.trigger_entities[entity] = overlap_timestamps_pair
|
||||
is_triggered = True
|
||||
if is_triggered:
|
||||
self.trigger_column_families = set(column_families)
|
||||
return is_triggered
|
||||
else:
|
||||
all_conditions_triggered = True
|
||||
self.trigger_column_families = set(column_families)
|
||||
for cond_name in self.conditions:
|
||||
cond = conditions_dict[cond_name]
|
||||
if not cond.get_trigger():
|
||||
all_conditions_triggered = False
|
||||
break
|
||||
if (
|
||||
cond.get_data_source() is DataSource.Type.LOG or
|
||||
cond.get_data_source() is DataSource.Type.DB_OPTIONS
|
||||
):
|
||||
cond_col_fam = set(cond.get_trigger().keys())
|
||||
if NO_COL_FAMILY in cond_col_fam:
|
||||
cond_col_fam = set(column_families)
|
||||
self.trigger_column_families = (
|
||||
self.trigger_column_families.intersection(cond_col_fam)
|
||||
)
|
||||
elif cond.get_data_source() is DataSource.Type.TIME_SERIES:
|
||||
cond_entities = set(cond.get_trigger().keys())
|
||||
if self.trigger_entities is None:
|
||||
self.trigger_entities = cond_entities
|
||||
else:
|
||||
self.trigger_entities = (
|
||||
self.trigger_entities.intersection(cond_entities)
|
||||
)
|
||||
if not (self.trigger_entities or self.trigger_column_families):
|
||||
all_conditions_triggered = False
|
||||
break
|
||||
if not all_conditions_triggered: # clean up if rule not triggered
|
||||
self.trigger_column_families = None
|
||||
self.trigger_entities = None
|
||||
return all_conditions_triggered
|
||||
|
||||
def __repr__(self):
|
||||
# Append conditions
|
||||
@ -84,6 +210,10 @@ class Rule(Section):
|
||||
is_first = False
|
||||
else:
|
||||
rule_string += (", " + sugg)
|
||||
if self.trigger_entities:
|
||||
rule_string += (', entities:: ' + str(self.trigger_entities))
|
||||
if self.trigger_column_families:
|
||||
rule_string += (', col_fam:: ' + str(self.trigger_column_families))
|
||||
# Return constructed string
|
||||
return rule_string
|
||||
|
||||
@ -98,18 +228,27 @@ class Suggestion(Section):
|
||||
super().__init__(name)
|
||||
self.option = None
|
||||
self.action = None
|
||||
self.suggested_value = None
|
||||
self.suggested_values = None
|
||||
self.description = None
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
if key == 'option':
|
||||
# Note:
|
||||
# case 1: 'option' is supported by Rocksdb OPTIONS file; in this
|
||||
# case the option belongs to one of the sections in the config
|
||||
# file and it's name is prefixed by "<section_type>."
|
||||
# case 2: 'option' is not supported by Rocksdb OPTIONS file; the
|
||||
# option is not expected to have the character '.' in its name
|
||||
self.option = value
|
||||
elif key == 'action':
|
||||
if self.option and not value:
|
||||
raise ValueError(self.name + ': provide action for option')
|
||||
self.action = self.Action[value]
|
||||
elif key == 'suggested_value':
|
||||
self.suggested_value = value
|
||||
elif key == 'suggested_values':
|
||||
if isinstance(value, str):
|
||||
self.suggested_values = [value]
|
||||
else:
|
||||
self.suggested_values = value
|
||||
elif key == 'description':
|
||||
self.description = value
|
||||
|
||||
@ -119,33 +258,28 @@ class Suggestion(Section):
|
||||
raise ValueError(self.name + ': provide option or description')
|
||||
if not self.action:
|
||||
raise ValueError(self.name + ': provide action for option')
|
||||
if self.action is self.Action.set and not self.suggested_value:
|
||||
if self.action is self.Action.set and not self.suggested_values:
|
||||
raise ValueError(
|
||||
self.name + ': provide suggested value for option'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
sugg_string = "Suggestion: " + self.name
|
||||
if self.description:
|
||||
return self.description
|
||||
sugg_string = ""
|
||||
if self.action is self.Action.set:
|
||||
sugg_string = (
|
||||
self.name + ' suggests setting ' + self.option +
|
||||
' to ' + self.suggested_value
|
||||
)
|
||||
sugg_string += (' description : ' + self.description)
|
||||
else:
|
||||
sugg_string = self.name + ' suggests ' + self.action.name + ' in '
|
||||
sugg_string += (self.option + '.')
|
||||
if self.suggested_value:
|
||||
sugg_string += (
|
||||
' option : ' + self.option + ' action : ' + self.action.name
|
||||
)
|
||||
if self.suggested_values:
|
||||
sugg_string += (
|
||||
' The suggested value is ' + self.suggested_value
|
||||
' suggested_values : ' + str(self.suggested_values)
|
||||
)
|
||||
return sugg_string
|
||||
|
||||
|
||||
class Condition(Section):
|
||||
def __init__(self, name):
|
||||
# a rule is identified by its name, so there should be no duplicates
|
||||
super().__init__(name)
|
||||
self.data_source = None
|
||||
self.trigger = None
|
||||
@ -166,6 +300,9 @@ class Condition(Section):
|
||||
def set_trigger(self, condition_trigger):
|
||||
self.trigger = condition_trigger
|
||||
|
||||
def get_trigger(self):
|
||||
return self.trigger
|
||||
|
||||
def is_triggered(self):
|
||||
if self.trigger:
|
||||
return True
|
||||
@ -173,7 +310,7 @@ class Condition(Section):
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
# must be defined by the subclass
|
||||
raise ValueError(self.name + ': provide source for condition')
|
||||
raise NotImplementedError(self.name + ': provide source for condition')
|
||||
|
||||
|
||||
class LogCondition(Condition):
|
||||
@ -183,15 +320,9 @@ class LogCondition(Condition):
|
||||
base_condition.__class__ = cls
|
||||
return base_condition
|
||||
|
||||
class Scope(Enum):
|
||||
database = 1
|
||||
column_family = 2
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
if key == 'regex':
|
||||
self.regex = value
|
||||
elif key == 'scope':
|
||||
self.scope = self.Scope[value]
|
||||
|
||||
def perform_checks(self):
|
||||
super().perform_checks()
|
||||
@ -199,10 +330,10 @@ class LogCondition(Condition):
|
||||
raise ValueError(self.name + ': provide regex for log condition')
|
||||
|
||||
def __repr__(self):
|
||||
log_cond_str = (
|
||||
self.name + ' checks if the regex ' + self.regex + ' is found ' +
|
||||
' in the LOG file in the scope of ' + self.scope.name
|
||||
)
|
||||
log_cond_str = "LogCondition: " + self.name
|
||||
log_cond_str += (" regex: " + self.regex)
|
||||
# if self.trigger:
|
||||
# log_cond_str += (" trigger: " + str(self.trigger))
|
||||
return log_cond_str
|
||||
|
||||
|
||||
@ -215,8 +346,11 @@ class OptionCondition(Condition):
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
if key == 'options':
|
||||
self.options = value
|
||||
if key == 'evaluate':
|
||||
if isinstance(value, str):
|
||||
self.options = [value]
|
||||
else:
|
||||
self.options = value
|
||||
elif key == 'evaluate':
|
||||
self.eval_expr = value
|
||||
|
||||
def perform_checks(self):
|
||||
@ -227,15 +361,77 @@ class OptionCondition(Condition):
|
||||
raise ValueError(self.name + ': expression missing in condition')
|
||||
|
||||
def __repr__(self):
|
||||
log_cond_str = (
|
||||
self.name + ' checks if the given expression evaluates to true'
|
||||
)
|
||||
return log_cond_str
|
||||
opt_cond_str = "OptionCondition: " + self.name
|
||||
opt_cond_str += (" options: " + str(self.options))
|
||||
opt_cond_str += (" expression: " + self.eval_expr)
|
||||
if self.trigger:
|
||||
opt_cond_str += (" trigger: " + str(self.trigger))
|
||||
return opt_cond_str
|
||||
|
||||
|
||||
class TimeSeriesCondition(Condition):
|
||||
@classmethod
|
||||
def create(cls, base_condition):
|
||||
base_condition.set_data_source(DataSource.Type['TIME_SERIES'])
|
||||
base_condition.__class__ = cls
|
||||
return base_condition
|
||||
|
||||
def set_parameter(self, key, value):
|
||||
if key == 'keys':
|
||||
if isinstance(value, str):
|
||||
self.keys = [value]
|
||||
else:
|
||||
self.keys = value
|
||||
elif key == 'behavior':
|
||||
self.behavior = TimeSeriesData.Behavior[value]
|
||||
elif key == 'rate_threshold':
|
||||
self.rate_threshold = float(value)
|
||||
elif key == 'window_sec':
|
||||
self.window_sec = int(value)
|
||||
elif key == 'evaluate':
|
||||
self.expression = value
|
||||
elif key == 'aggregation_op':
|
||||
self.aggregation_op = TimeSeriesData.AggregationOperator[value]
|
||||
|
||||
def perform_checks(self):
|
||||
if not self.keys:
|
||||
raise ValueError(self.name + ': specify timeseries key')
|
||||
if not self.behavior:
|
||||
raise ValueError(self.name + ': specify triggering behavior')
|
||||
if self.behavior is TimeSeriesData.Behavior.bursty:
|
||||
if not self.rate_threshold:
|
||||
raise ValueError(self.name + ': specify rate burst threshold')
|
||||
if not self.window_sec:
|
||||
self.window_sec = 300 # default window length is 5 minutes
|
||||
if len(self.keys) > 1:
|
||||
raise ValueError(self.name + ': specify only one key')
|
||||
elif self.behavior is TimeSeriesData.Behavior.evaluate_expression:
|
||||
if not (self.expression):
|
||||
raise ValueError(self.name + ': specify evaluation expression')
|
||||
else:
|
||||
raise ValueError(self.name + ': trigger behavior not supported')
|
||||
|
||||
def __repr__(self):
|
||||
ts_cond_str = "TimeSeriesCondition: " + self.name
|
||||
ts_cond_str += (" statistics: " + str(self.keys))
|
||||
ts_cond_str += (" behavior: " + self.behavior.name)
|
||||
if self.behavior is TimeSeriesData.Behavior.bursty:
|
||||
ts_cond_str += (" rate_threshold: " + str(self.rate_threshold))
|
||||
ts_cond_str += (" window_sec: " + str(self.window_sec))
|
||||
if self.behavior is TimeSeriesData.Behavior.evaluate_expression:
|
||||
ts_cond_str += (" expression: " + self.expression)
|
||||
if hasattr(self, 'aggregation_op'):
|
||||
ts_cond_str += (" aggregation_op: " + self.aggregation_op.name)
|
||||
if self.trigger:
|
||||
ts_cond_str += (" trigger: " + str(self.trigger))
|
||||
return ts_cond_str
|
||||
|
||||
|
||||
class RulesSpec:
|
||||
def __init__(self, rules_path):
|
||||
self.file_path = rules_path
|
||||
|
||||
def initialise_fields(self):
|
||||
self.rules_dict = {}
|
||||
self.conditions_dict = {}
|
||||
self.suggestions_dict = {}
|
||||
@ -249,9 +445,13 @@ class RulesSpec:
|
||||
sugg.perform_checks()
|
||||
|
||||
def load_rules_from_spec(self):
|
||||
self.initialise_fields()
|
||||
with open(self.file_path, 'r') as db_rules:
|
||||
curr_section = None
|
||||
for line in db_rules:
|
||||
line = IniParser.remove_trailing_comment(line)
|
||||
if not line:
|
||||
continue
|
||||
element = IniParser.get_element(line)
|
||||
if element is IniParser.Element.comment:
|
||||
continue
|
||||
@ -277,6 +477,8 @@ class RulesSpec:
|
||||
new_cond = LogCondition.create(new_cond)
|
||||
elif value == 'OPTIONS':
|
||||
new_cond = OptionCondition.create(new_cond)
|
||||
elif value == 'TIME_SERIES':
|
||||
new_cond = TimeSeriesCondition.create(new_cond)
|
||||
else:
|
||||
new_cond.set_parameter(key, value)
|
||||
elif curr_section is IniParser.Element.sugg:
|
||||
@ -291,75 +493,36 @@ class RulesSpec:
|
||||
def get_suggestions_dict(self):
|
||||
return self.suggestions_dict
|
||||
|
||||
def get_triggered_rules(self, data_sources, column_families):
|
||||
self.trigger_conditions(data_sources)
|
||||
triggered_rules = []
|
||||
for rule in self.rules_dict.values():
|
||||
if rule.is_triggered(self.conditions_dict, column_families):
|
||||
triggered_rules.append(rule)
|
||||
return triggered_rules
|
||||
|
||||
def trigger_conditions(data_sources, conditions_dict):
|
||||
for source in data_sources:
|
||||
cond_subset = [
|
||||
cond
|
||||
for cond in conditions_dict.values()
|
||||
if cond.get_data_source() is source.type
|
||||
]
|
||||
if not cond_subset:
|
||||
continue
|
||||
source.check_and_trigger_conditions(cond_subset)
|
||||
def trigger_conditions(self, data_sources):
|
||||
for source_type in data_sources:
|
||||
cond_subset = [
|
||||
cond
|
||||
for cond in self.conditions_dict.values()
|
||||
if cond.get_data_source() is source_type
|
||||
]
|
||||
if not cond_subset:
|
||||
continue
|
||||
for source in data_sources[source_type]:
|
||||
source.check_and_trigger_conditions(cond_subset)
|
||||
|
||||
|
||||
def get_triggered_rules(rules_dict, conditions_dict):
|
||||
triggered_rules = []
|
||||
for rule in rules_dict.values():
|
||||
if rule.is_triggered(conditions_dict):
|
||||
triggered_rules.append(rule)
|
||||
return triggered_rules
|
||||
|
||||
|
||||
def main(args):
|
||||
# Load the rules with their conditions and suggestions.
|
||||
db_rules = RulesSpec(args.rules_spec)
|
||||
db_rules.load_rules_from_spec()
|
||||
# Perform some basic sanity checks for each section.
|
||||
db_rules.perform_section_checks()
|
||||
rules_dict = db_rules.get_rules_dict()
|
||||
conditions_dict = db_rules.get_conditions_dict()
|
||||
suggestions_dict = db_rules.get_suggestions_dict()
|
||||
print()
|
||||
print('RULES')
|
||||
for rule in rules_dict.values():
|
||||
print(repr(rule))
|
||||
print()
|
||||
print('CONDITIONS')
|
||||
for cond in conditions_dict.values():
|
||||
print(repr(cond))
|
||||
print()
|
||||
print('SUGGESTIONS')
|
||||
for sugg in suggestions_dict.values():
|
||||
print(repr(sugg))
|
||||
|
||||
# Initialise the data sources.
|
||||
data_sources = []
|
||||
data_sources.append(DatabaseOptions(args.rocksdb_options))
|
||||
data_sources.append(DatabaseLogs(args.rocksdb_log_prefix))
|
||||
|
||||
# Initialise the ConditionChecker with the provided data sources.
|
||||
trigger_conditions(data_sources, conditions_dict)
|
||||
|
||||
# Check for the conditions read in from the Rules spec, if triggered.
|
||||
print()
|
||||
triggered_rules = get_triggered_rules(rules_dict, conditions_dict)
|
||||
for rule in triggered_rules:
|
||||
print('Rule: ' + rule.name + ' has been triggered and:')
|
||||
rule_suggestions = rule.get_suggestions()
|
||||
for sugg_name in rule_suggestions:
|
||||
print(suggestions_dict[sugg_name])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='This script is used for\
|
||||
gauging rocksdb performance using as input: Rocksdb LOG, OPTIONS,\
|
||||
performance context, command-line statistics and statistics published\
|
||||
on ODS and providing as output: suggestions to improve Rocksdb\
|
||||
performance')
|
||||
parser.add_argument('--rules_spec', required=True, type=str)
|
||||
parser.add_argument('--rocksdb_options', required=True, type=str)
|
||||
parser.add_argument('--rocksdb_log_prefix', required=True, type=str)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
def print_rules(self, rules):
|
||||
for rule in rules:
|
||||
print('\nRule: ' + rule.name)
|
||||
for cond_name in rule.conditions:
|
||||
print(repr(self.conditions_dict[cond_name]))
|
||||
for sugg_name in rule.suggestions:
|
||||
print(repr(self.suggestions_dict[sugg_name]))
|
||||
if rule.trigger_entities:
|
||||
print('scope: entities:')
|
||||
print(rule.trigger_entities)
|
||||
if rule.trigger_column_families:
|
||||
print('scope: col_fam:')
|
||||
print(rule.trigger_column_families)
|
||||
|
@ -3,24 +3,28 @@
|
||||
# COPYING file in the root directory) and Apache 2.0 License
|
||||
# (found in the LICENSE.Apache file in the root directory).
|
||||
#
|
||||
# This ini file is very similar to the Rocksdb ini file in terms of syntax.
|
||||
# FORMAT: very similar to the Rocksdb ini file in terms of syntax
|
||||
# (refer rocksdb/examples/rocksdb_option_file_example.ini)
|
||||
# It is made up of multiple sections and each section is made up of multiple
|
||||
# key-value pairs. Each section must have a name. The recognized sections are
|
||||
# Rule, Suggestion, Condition followed by their name in "" that acts as an
|
||||
# identifier. There should be at least one Rule section in the file.
|
||||
#
|
||||
# Each rule must be associated with at least one condition and one suggestion.
|
||||
# If a Rule is associated with multiple Conditions, then all the conditions
|
||||
# must be triggered in order for the Rule to be triggered.
|
||||
# The suggestions don't have any ordering amongst them as of now.
|
||||
# The Rules INI file is made up of multiple sections and each section is made
|
||||
# up of multiple key-value pairs. The recognized section types are:
|
||||
# Rule, Suggestion, Condition. Each section must have a name specified in ""
|
||||
# in the section header. This name acts as an identifier in that section
|
||||
# type's namespace. A section header looks like:
|
||||
# [<section_type> "<section_name_identifier>"]
|
||||
#
|
||||
# A Condition must be associated to a data source specified by the parameter
|
||||
# There should be at least one Rule section in the file with its corresponding
|
||||
# Condition and Suggestion sections. A Rule is triggered only when all of its
|
||||
# conditions are triggered. The order in which a Rule's conditions and
|
||||
# suggestions are specified has no significance.
|
||||
#
|
||||
# A Condition must be associated with a data source specified by the parameter
|
||||
# 'source' and this must be the first parameter specified for the Condition.
|
||||
# A condition can be associated with one or more Rules.
|
||||
#
|
||||
# A suggestion is an advised change to a database or column_family option to
|
||||
# improve the performance of the database in some way. Every suggestion is
|
||||
# is associated with one or more Rules.
|
||||
# A Suggestion is an advised change to a Rocksdb option to improve the
|
||||
# performance of the database in some way. Every suggestion can be a part of
|
||||
# one or more Rules.
|
||||
|
||||
[Rule "stall-too-many-memtables"]
|
||||
suggestions=inc-bg-flush:inc-write-buffer
|
||||
@ -29,7 +33,6 @@ conditions=stall-too-many-memtables
|
||||
[Condition "stall-too-many-memtables"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Rule "stall-too-many-L0"]
|
||||
suggestions=inc-max-subcompactions:inc-max-bg-compactions:inc-write-buffer-size:dec-max-bytes-for-level-base:inc-l0-slowdown-writes-trigger
|
||||
@ -38,7 +41,6 @@ conditions=stall-too-many-L0
|
||||
[Condition "stall-too-many-L0"]
|
||||
source=LOG
|
||||
regex=Stalling writes because we have \d+ level-0 files
|
||||
scope=column_family
|
||||
|
||||
[Rule "stop-too-many-L0"]
|
||||
suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-l0-stop-writes-trigger
|
||||
@ -47,7 +49,6 @@ conditions=stop-too-many-L0
|
||||
[Condition "stop-too-many-L0"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ level-0 files
|
||||
scope=column_family
|
||||
|
||||
[Rule "stall-too-many-compaction-bytes"]
|
||||
suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-hard-pending-compaction-bytes-limit:inc-soft-pending-compaction-bytes-limit
|
||||
@ -56,11 +57,11 @@ conditions=stall-too-many-compaction-bytes
|
||||
[Condition "stall-too-many-compaction-bytes"]
|
||||
source=LOG
|
||||
regex=Stalling writes because of estimated pending compaction bytes \d+
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "inc-bg-flush"]
|
||||
option=DBOptions.max_background_flushes
|
||||
action=increase
|
||||
suggested_values=2
|
||||
|
||||
[Suggestion "inc-write-buffer"]
|
||||
option=CFOptions.max_write_buffer_number
|
||||
@ -73,6 +74,7 @@ action=increase
|
||||
[Suggestion "inc-max-bg-compactions"]
|
||||
option=DBOptions.max_background_compactions
|
||||
action=increase
|
||||
suggested_values=2
|
||||
|
||||
[Suggestion "inc-write-buffer-size"]
|
||||
option=CFOptions.write_buffer_size
|
||||
@ -100,12 +102,113 @@ action=increase
|
||||
|
||||
[Rule "level0-level1-ratio"]
|
||||
conditions=level0-level1-ratio
|
||||
suggestions=l0-l1-ratio-health-check
|
||||
suggestions=inc-base-max-bytes
|
||||
|
||||
[Condition "level0-level1-ratio"]
|
||||
source=OPTIONS
|
||||
options=CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size:CFOptions.max_bytes_for_level_base
|
||||
evaluate=int(options[0])*int(options[1])-int(options[2])<(-251659456) # should evaluate to a boolean
|
||||
evaluate=int(options[0])*int(options[1])-int(options[2])>=1 # should evaluate to a boolean, condition triggered if evaluates to true
|
||||
|
||||
[Suggestion "l0-l1-ratio-health-check"]
|
||||
description='modify options such that (level0_file_num_compaction_trigger * write_buffer_size - max_bytes_for_level_base < 5) is satisfied'
|
||||
[Suggestion "inc-base-max-bytes"]
|
||||
option=CFOptions.max_bytes_for_level_base
|
||||
action=increase
|
||||
|
||||
[Rules "tuning-iostat-burst"]
|
||||
conditions=large-db-get-p99
|
||||
suggestions=bytes-per-sync-non0:wal-bytes-per-sync-non0:set-rate-limiter
|
||||
#overlap_time_period=10m
|
||||
|
||||
[Condition "write-burst"]
|
||||
source=TIME_SERIES
|
||||
keys=dyno.flash_write_bytes_per_sec
|
||||
behavior=bursty
|
||||
window_sec=300 # the smaller this window, the more sensitivity to changes in the time series, so the rate_threshold should be bigger; when it's 60, then same as diff(%)
|
||||
rate_threshold=20
|
||||
|
||||
[Condition "large-p99-read-latency"]
|
||||
source=TIME_SERIES
|
||||
keys=[]rocksdb.read.block.get.micros.p99
|
||||
behavior=bursty
|
||||
window_sec=300
|
||||
rate_threshold=10
|
||||
|
||||
[Condition "large-db-get-p99"]
|
||||
source=TIME_SERIES
|
||||
keys=[]rocksdb.db.get.micros.p50:[]rocksdb.db.get.micros.p99
|
||||
behavior=evaluate_expression
|
||||
evaluate=(keys[1]/keys[0])>5
|
||||
|
||||
[Suggestion "bytes-per-sync-non0"]
|
||||
option=DBOptions.bytes_per_sync
|
||||
action=set
|
||||
suggested_values=1048576
|
||||
|
||||
[Suggestion "wal-bytes-per-sync-non0"]
|
||||
option=DBOptions.wal_bytes_per_sync
|
||||
action=set
|
||||
suggested_values=1048576
|
||||
|
||||
[Suggestion "set-rate-limiter"]
|
||||
option=rate_limiter_bytes_per_sec
|
||||
action=set
|
||||
suggested_values=1024000
|
||||
|
||||
[Rule "bloom-filter-percent-useful"]
|
||||
conditions=bloom-filter-percent-useful
|
||||
suggestions=inc-bloom-bits-per-key
|
||||
|
||||
[Condition "bloom-filter-percent-useful"]
|
||||
source=TIME_SERIES
|
||||
keys=[]rocksdb.bloom.filter.useful.count:[]rocksdb.bloom.filter.full.positive.count:[]rocksdb.bloom.filter.full.true.positive.count
|
||||
behavior=evaluate_expression
|
||||
evaluate=((keys[0]+keys[2])/(keys[0]+keys[1]))<0.9 # should evaluate to a boolean
|
||||
aggregation_op=latest
|
||||
|
||||
[Rule "bloom-not-enabled"]
|
||||
conditions=bloom-not-enabled
|
||||
suggestions=inc-bloom-bits-per-key
|
||||
|
||||
[Condition "bloom-not-enabled"]
|
||||
source=TIME_SERIES
|
||||
keys=[]rocksdb.bloom.filter.useful.count:[]rocksdb.bloom.filter.full.positive.count:[]rocksdb.bloom.filter.full.true.positive.count
|
||||
behavior=evaluate_expression
|
||||
evaluate=keys[0]+keys[1]+keys[2]==0
|
||||
aggregation_op=avg
|
||||
|
||||
[Suggestion "inc-bloom-bits-per-key"]
|
||||
option=bloom_bits
|
||||
action=increase
|
||||
suggested_values=2
|
||||
|
||||
[Rule "small-l0-files"]
|
||||
conditions=small-l0-files
|
||||
suggestions=dec-max-bytes-for-level-base:inc-write-buffer-size
|
||||
|
||||
[Condition "small-l0-files"]
|
||||
source=OPTIONS
|
||||
options=CFOptions.max_bytes_for_level_base:CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size
|
||||
evaluate=int(options[0])>(10*int(options[1])*int(options[2]))
|
||||
|
||||
[Rule "decompress-time-long"]
|
||||
conditions=decompress-time-long
|
||||
suggestions=dec-block-size:inc-block-cache-size:faster-compression-type
|
||||
|
||||
[Condition "decompress-time-long"]
|
||||
source=TIME_SERIES
|
||||
keys=block_decompress_time:block_read_time:block_checksum_time
|
||||
behavior=evaluate_expression
|
||||
evaluate=(keys[0]/(keys[0]+keys[1]+keys[2]))>0.3
|
||||
|
||||
[Suggestion "dec-block-size"]
|
||||
option=TableOptions.BlockBasedTable.block_size
|
||||
action=decrease
|
||||
|
||||
[Suggestion "inc-block-cache-size"]
|
||||
option=cache_size
|
||||
action=increase
|
||||
suggested_values=16000000
|
||||
|
||||
[Suggestion "faster-compression-type"]
|
||||
option=CFOptions.compression
|
||||
action=set
|
||||
suggested_values=kLZ4Compression
|
||||
|
@ -23,3 +23,8 @@
|
||||
2018/05/25-14:34:21.048592 7f82bd676200 [DEBUG] [db/db_impl_files.cc:261] [JOB 45] Delete /tmp/rocksdbtest-155919/dbbench/000084.sst type=2 #84 -- OK
|
||||
2018/05/25-14:34:21.048603 7f82bd676200 EVENT_LOG_v1 {"time_micros": 1527284061048600, "job": 45, "event": "table_file_deletion", "file_number": 84}
|
||||
2018/05/25-14:34:21.048981 7f82bd676200 [db/db_impl.cc:398] Shutdown complete
|
||||
2018/05/25-14:34:21.049000 7f82bd676200 [db/db_impl.cc:563] [col-fam-A] random log message for testing
|
||||
2018/05/25-14:34:21.049010 7f82bd676200 [db/db_impl.cc:234] [col-fam-B] log continuing on next line
|
||||
remaining part of the log
|
||||
2018/05/25-14:34:21.049020 7f82bd676200 [db/db_impl.cc:653] [col-fam-A] another random log message
|
||||
2018/05/25-14:34:21.049025 7f82bd676200 [db/db_impl.cc:331] [unknown] random log message no column family
|
||||
|
@ -5,7 +5,6 @@ conditions=missing-source
|
||||
[Condition "normal-rule"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "inc-bg-flush"]
|
||||
option=DBOptions.max_background_flushes
|
||||
@ -43,7 +42,6 @@ conditions=missing-regex
|
||||
[Condition "missing-regex"]
|
||||
source=LOG
|
||||
regex=
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "missing-option"]
|
||||
option=
|
||||
|
@ -5,7 +5,6 @@ conditions=missing-source
|
||||
[Condition "missing-source"]
|
||||
source=
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "inc-bg-flush"]
|
||||
option=DBOptions.max_background_flushes
|
||||
|
@ -5,7 +5,6 @@ conditions=missing-source
|
||||
[Condition "normal-condition"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "missing-action"]
|
||||
option=DBOptions.max_background_flushes
|
||||
|
@ -5,7 +5,6 @@ conditions=missing-source
|
||||
[Condition "normal-condition"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Suggestion "inc-bg-flush"]
|
||||
option=DBOptions.max_background_flushes
|
||||
|
@ -17,22 +17,18 @@ conditions=log-4-false:options-1-false
|
||||
[Condition "log-1-true"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
scope=column_family
|
||||
|
||||
[Condition "log-2-true"]
|
||||
source=LOG
|
||||
regex=Stalling writes because we have \d+ level-0 files
|
||||
scope=column_family
|
||||
|
||||
[Condition "log-3-true"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ level-0 files
|
||||
scope=column_family
|
||||
|
||||
[Condition "log-4-false"]
|
||||
source=LOG
|
||||
regex=Stalling writes because of estimated pending compaction bytes \d+
|
||||
scope=column_family
|
||||
|
||||
[Condition "options-1-false"]
|
||||
source=OPTIONS
|
||||
|
83
tools/advisor/test/input_files/triggered_rules.ini
Normal file
83
tools/advisor/test/input_files/triggered_rules.ini
Normal file
@ -0,0 +1,83 @@
|
||||
[Rule "stall-too-many-memtables"]
|
||||
suggestions=inc-bg-flush:inc-write-buffer
|
||||
conditions=stall-too-many-memtables
|
||||
|
||||
[Condition "stall-too-many-memtables"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
|
||||
|
||||
[Rule "stall-too-many-L0"]
|
||||
suggestions=inc-max-subcompactions:inc-max-bg-compactions:inc-write-buffer-size:dec-max-bytes-for-level-base:inc-l0-slowdown-writes-trigger
|
||||
conditions=stall-too-many-L0
|
||||
|
||||
[Condition "stall-too-many-L0"]
|
||||
source=LOG
|
||||
regex=Stalling writes because we have \d+ level-0 files
|
||||
|
||||
[Rule "stop-too-many-L0"]
|
||||
suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-l0-stop-writes-trigger
|
||||
conditions=stop-too-many-L0
|
||||
|
||||
[Condition "stop-too-many-L0"]
|
||||
source=LOG
|
||||
regex=Stopping writes because we have \d+ level-0 files
|
||||
|
||||
[Rule "stall-too-many-compaction-bytes"]
|
||||
suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-hard-pending-compaction-bytes-limit:inc-soft-pending-compaction-bytes-limit
|
||||
conditions=stall-too-many-compaction-bytes
|
||||
|
||||
[Condition "stall-too-many-compaction-bytes"]
|
||||
source=LOG
|
||||
regex=Stalling writes because of estimated pending compaction bytes \d+
|
||||
|
||||
[Suggestion "inc-bg-flush"]
|
||||
option=DBOptions.max_background_flushes
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-write-buffer"]
|
||||
option=CFOptions.max_write_buffer_number
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-max-subcompactions"]
|
||||
option=DBOptions.max_subcompactions
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-max-bg-compactions"]
|
||||
option=DBOptions.max_background_compactions
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-write-buffer-size"]
|
||||
option=CFOptions.write_buffer_size
|
||||
action=increase
|
||||
|
||||
[Suggestion "dec-max-bytes-for-level-base"]
|
||||
option=CFOptions.max_bytes_for_level_base
|
||||
action=decrease
|
||||
|
||||
[Suggestion "inc-l0-slowdown-writes-trigger"]
|
||||
option=CFOptions.level0_slowdown_writes_trigger
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-l0-stop-writes-trigger"]
|
||||
option=CFOptions.level0_stop_writes_trigger
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-hard-pending-compaction-bytes-limit"]
|
||||
option=CFOptions.hard_pending_compaction_bytes_limit
|
||||
action=increase
|
||||
|
||||
[Suggestion "inc-soft-pending-compaction-bytes-limit"]
|
||||
option=CFOptions.soft_pending_compaction_bytes_limit
|
||||
action=increase
|
||||
|
||||
[Rule "level0-level1-ratio"]
|
||||
conditions=level0-level1-ratio
|
||||
suggestions=l0-l1-ratio-health-check
|
||||
|
||||
[Condition "level0-level1-ratio"]
|
||||
source=OPTIONS
|
||||
options=CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size:CFOptions.max_bytes_for_level_base
|
||||
evaluate=int(options[0])*int(options[1])-int(options[2])>=-268173312 # should evaluate to a boolean, condition triggered if evaluates to true
|
||||
|
||||
[Suggestion "l0-l1-ratio-health-check"]
|
||||
description='modify options such that (level0_file_num_compaction_trigger * write_buffer_size - max_bytes_for_level_base < -268173312) is satisfied'
|
98
tools/advisor/test/test_db_log_parser.py
Normal file
98
tools/advisor/test/test_db_log_parser.py
Normal file
@ -0,0 +1,98 @@
|
||||
from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
|
||||
from advisor.rule_parser import Condition, LogCondition
|
||||
import os
|
||||
import unittest
|
||||
|
||||
|
||||
class TestLog(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.column_families = ['default', 'col_fam_A']
|
||||
|
||||
def test_get_column_family(self):
|
||||
test_log = (
|
||||
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
|
||||
"[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK"
|
||||
)
|
||||
db_log = Log(test_log, self.column_families)
|
||||
self.assertEqual('col_fam_A', db_log.get_column_family())
|
||||
|
||||
test_log = (
|
||||
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
|
||||
"[JOB 44] Level-0 flush table #84: 1890780 bytes OK"
|
||||
)
|
||||
db_log = Log(test_log, self.column_families)
|
||||
db_log.append_message('[default] some remaining part of log')
|
||||
self.assertEqual(NO_COL_FAMILY, db_log.get_column_family())
|
||||
|
||||
def test_get_methods(self):
|
||||
hr_time = "2018/05/25-14:30:25.491635"
|
||||
context = "7f82ba72e700"
|
||||
message = (
|
||||
"[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " +
|
||||
"#23: started"
|
||||
)
|
||||
test_log = hr_time + " " + context + " " + message
|
||||
db_log = Log(test_log, self.column_families)
|
||||
self.assertEqual(db_log.get_message(), message)
|
||||
remaining_message = "[col_fam_A] some more logs"
|
||||
db_log.append_message(remaining_message)
|
||||
self.assertEqual(
|
||||
db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635"
|
||||
)
|
||||
self.assertEqual(db_log.get_context(), "7f82ba72e700")
|
||||
self.assertEqual(db_log.get_timestamp(), 1527258625)
|
||||
self.assertEqual(
|
||||
db_log.get_message(), str(message + '\n' + remaining_message)
|
||||
)
|
||||
|
||||
def test_is_new_log(self):
|
||||
new_log = "2018/05/25-14:34:21.047233 context random new log"
|
||||
remaining_log = "2018/05/25 not really a new log"
|
||||
self.assertTrue(Log.is_new_log(new_log))
|
||||
self.assertFalse(Log.is_new_log(remaining_log))
|
||||
|
||||
|
||||
class TestDatabaseLogs(unittest.TestCase):
|
||||
def test_check_and_trigger_conditions(self):
|
||||
this_path = os.path.abspath(os.path.dirname(__file__))
|
||||
logs_path_prefix = os.path.join(this_path, 'input_files/LOG-0')
|
||||
column_families = ['default', 'col-fam-A', 'col-fam-B']
|
||||
db_logs = DatabaseLogs(logs_path_prefix, column_families)
|
||||
# matches, has 2 col_fams
|
||||
condition1 = LogCondition.create(Condition('cond-A'))
|
||||
condition1.set_parameter('regex', 'random log message')
|
||||
# matches, multiple lines message
|
||||
condition2 = LogCondition.create(Condition('cond-B'))
|
||||
condition2.set_parameter('regex', 'continuing on next line')
|
||||
# does not match
|
||||
condition3 = LogCondition.create(Condition('cond-C'))
|
||||
condition3.set_parameter('regex', 'this should match no log')
|
||||
db_logs.check_and_trigger_conditions(
|
||||
[condition1, condition2, condition3]
|
||||
)
|
||||
cond1_trigger = condition1.get_trigger()
|
||||
self.assertEqual(2, len(cond1_trigger.keys()))
|
||||
self.assertSetEqual(
|
||||
{'col-fam-A', NO_COL_FAMILY}, set(cond1_trigger.keys())
|
||||
)
|
||||
self.assertEqual(2, len(cond1_trigger['col-fam-A']))
|
||||
messages = [
|
||||
"[db/db_impl.cc:563] [col-fam-A] random log message for testing",
|
||||
"[db/db_impl.cc:653] [col-fam-A] another random log message"
|
||||
]
|
||||
self.assertIn(cond1_trigger['col-fam-A'][0].get_message(), messages)
|
||||
self.assertIn(cond1_trigger['col-fam-A'][1].get_message(), messages)
|
||||
self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY]))
|
||||
self.assertEqual(
|
||||
cond1_trigger[NO_COL_FAMILY][0].get_message(),
|
||||
"[db/db_impl.cc:331] [unknown] random log message no column family"
|
||||
)
|
||||
cond2_trigger = condition2.get_trigger()
|
||||
self.assertEqual(['col-fam-B'], list(cond2_trigger.keys()))
|
||||
self.assertEqual(1, len(cond2_trigger['col-fam-B']))
|
||||
self.assertEqual(
|
||||
cond2_trigger['col-fam-B'][0].get_message(),
|
||||
"[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" +
|
||||
"remaining part of the log"
|
||||
)
|
||||
self.assertIsNone(condition3.get_trigger())
|
@ -5,8 +5,9 @@
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from advisor.rule_parser import RulesSpec, DatabaseLogs, DatabaseOptions
|
||||
from advisor.rule_parser import get_triggered_rules, trigger_conditions
|
||||
from advisor.rule_parser import RulesSpec
|
||||
from advisor.db_log_parser import DatabaseLogs, DataSource
|
||||
from advisor.db_options_parser import DatabaseOptions
|
||||
|
||||
RuleToSuggestions = {
|
||||
"stall-too-many-memtables": [
|
||||
@ -41,16 +42,17 @@ class TestAllRulesTriggered(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# load the Rules
|
||||
this_path = os.path.abspath(os.path.dirname(__file__))
|
||||
ini_path = os.path.join(this_path, '../advisor/rules.ini')
|
||||
ini_path = os.path.join(this_path, 'input_files/triggered_rules.ini')
|
||||
self.db_rules = RulesSpec(ini_path)
|
||||
self.db_rules.load_rules_from_spec()
|
||||
self.db_rules.perform_section_checks()
|
||||
# load the data sources: LOG and OPTIONS
|
||||
log_path = os.path.join(this_path, 'input_files/LOG-0')
|
||||
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
|
||||
self.data_sources = []
|
||||
self.data_sources.append(DatabaseOptions(options_path))
|
||||
self.data_sources.append(DatabaseLogs(log_path))
|
||||
db_options_parser = DatabaseOptions(options_path)
|
||||
self.column_families = db_options_parser.get_column_families()
|
||||
db_logs_parser = DatabaseLogs(log_path, self.column_families)
|
||||
self.data_sources = [db_options_parser, db_logs_parser]
|
||||
|
||||
def test_triggered_conditions(self):
|
||||
conditions_dict = self.db_rules.get_conditions_dict()
|
||||
@ -59,18 +61,25 @@ class TestAllRulesTriggered(unittest.TestCase):
|
||||
for cond in conditions_dict.values():
|
||||
self.assertFalse(cond.is_triggered(), repr(cond))
|
||||
for rule in rules_dict.values():
|
||||
self.assertFalse(rule.is_triggered(conditions_dict), repr(rule))
|
||||
self.assertFalse(
|
||||
rule.is_triggered(conditions_dict, self.column_families),
|
||||
repr(rule)
|
||||
)
|
||||
|
||||
# Trigger the conditions as per the data sources.
|
||||
trigger_conditions(self.data_sources, conditions_dict)
|
||||
# # Trigger the conditions as per the data sources.
|
||||
# trigger_conditions(, conditions_dict)
|
||||
|
||||
# Get the set of rules that have been triggered
|
||||
triggered_rules = self.db_rules.get_triggered_rules(
|
||||
self.data_sources, self.column_families
|
||||
)
|
||||
|
||||
# Make sure each condition and rule is triggered
|
||||
for cond in conditions_dict.values():
|
||||
if cond.get_data_source() is DataSource.Type.TIME_SERIES:
|
||||
continue
|
||||
self.assertTrue(cond.is_triggered(), repr(cond))
|
||||
|
||||
# Get the set of rules that have been triggered
|
||||
triggered_rules = get_triggered_rules(rules_dict, conditions_dict)
|
||||
|
||||
for rule in rules_dict.values():
|
||||
self.assertIn(rule, triggered_rules)
|
||||
# Check the suggestions made by the triggered rules
|
||||
@ -94,9 +103,10 @@ class TestConditionsConjunctions(unittest.TestCase):
|
||||
# load the data sources: LOG and OPTIONS
|
||||
log_path = os.path.join(this_path, 'input_files/LOG-1')
|
||||
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
|
||||
self.data_sources = []
|
||||
self.data_sources.append(DatabaseOptions(options_path))
|
||||
self.data_sources.append(DatabaseLogs(log_path))
|
||||
db_options_parser = DatabaseOptions(options_path)
|
||||
self.column_families = db_options_parser.get_column_families()
|
||||
db_logs_parser = DatabaseLogs(log_path, self.column_families)
|
||||
self.data_sources = [db_options_parser, db_logs_parser]
|
||||
|
||||
def test_condition_conjunctions(self):
|
||||
conditions_dict = self.db_rules.get_conditions_dict()
|
||||
@ -105,10 +115,13 @@ class TestConditionsConjunctions(unittest.TestCase):
|
||||
for cond in conditions_dict.values():
|
||||
self.assertFalse(cond.is_triggered(), repr(cond))
|
||||
for rule in rules_dict.values():
|
||||
self.assertFalse(rule.is_triggered(conditions_dict), repr(rule))
|
||||
self.assertFalse(
|
||||
rule.is_triggered(conditions_dict, self.column_families),
|
||||
repr(rule)
|
||||
)
|
||||
|
||||
# Trigger the conditions as per the data sources.
|
||||
trigger_conditions(self.data_sources, conditions_dict)
|
||||
self.db_rules.trigger_conditions(self.data_sources)
|
||||
|
||||
# Check for the conditions
|
||||
conds_triggered = ['log-1-true', 'log-2-true', 'log-3-true']
|
||||
@ -125,14 +138,16 @@ class TestConditionsConjunctions(unittest.TestCase):
|
||||
'multiple-conds-one-false',
|
||||
'multiple-conds-all-false'
|
||||
]
|
||||
for rule in rules_triggered:
|
||||
for rule_name in rules_triggered:
|
||||
rule = rules_dict[rule_name]
|
||||
self.assertTrue(
|
||||
rules_dict[rule].is_triggered(conditions_dict),
|
||||
rule.is_triggered(conditions_dict, self.column_families),
|
||||
repr(rule)
|
||||
)
|
||||
for rule in rules_not_triggered:
|
||||
for rule_name in rules_not_triggered:
|
||||
rule = rules_dict[rule_name]
|
||||
self.assertFalse(
|
||||
rules_dict[rule].is_triggered(conditions_dict),
|
||||
rule.is_triggered(conditions_dict, self.column_families),
|
||||
repr(rule)
|
||||
)
|
||||
|
||||
@ -191,7 +206,7 @@ class TestParsingErrors(unittest.TestCase):
|
||||
ini_path = os.path.join(self.this_path, 'input_files/rules_err2.ini')
|
||||
db_rules = RulesSpec(ini_path)
|
||||
regex = '.*provide source for condition.*'
|
||||
with self.assertRaisesRegex(ValueError, regex):
|
||||
with self.assertRaisesRegex(NotImplementedError, regex):
|
||||
db_rules.load_rules_from_spec()
|
||||
|
||||
def test_suggestion_missing_action(self):
|
||||
@ -204,7 +219,7 @@ class TestParsingErrors(unittest.TestCase):
|
||||
def test_section_no_name(self):
|
||||
ini_path = os.path.join(self.this_path, 'input_files/rules_err4.ini')
|
||||
db_rules = RulesSpec(ini_path)
|
||||
regex = 'Parsing error: section header be like:.*'
|
||||
regex = 'Parsing error: needed section header:.*'
|
||||
with self.assertRaisesRegex(ValueError, regex):
|
||||
db_rules.load_rules_from_spec()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user