134a52e144
Summary: In https://github.com/facebook/rocksdb/pull/3934 we introduced advisor scripts that make suggestions in the config options based on the log file and stats from a run of rocksdb. The optimizer runs the advisor on a benchmark application in a loop and automatically applies the suggested changes until the config options are optimized. This is a work in progress and the patch is the initial skeleton for the optimizer. The sample application that is run in the loop is currently dbbench. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4169 Reviewed By: maysamyabandeh Differential Revision: D9023671 Pulled By: poojam23 fbshipit-source-id: a6192d475c462cf6eb2b316716f97cb400fcb64d
529 lines
20 KiB
Python
529 lines
20 KiB
Python
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
# This source code is licensed under both the GPLv2 (found in the
|
|
# COPYING file in the root directory) and Apache 2.0 License
|
|
# (found in the LICENSE.Apache file in the root directory).
|
|
|
|
from abc import ABC, abstractmethod
|
|
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
|
|
from advisor.db_timeseries_parser import TimeSeriesData
|
|
from enum import Enum
|
|
from advisor.ini_parser import IniParser
|
|
import re
|
|
|
|
|
|
class Section(ABC):
|
|
def __init__(self, name):
|
|
self.name = name
|
|
|
|
@abstractmethod
|
|
def set_parameter(self, key, value):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def perform_checks(self):
|
|
pass
|
|
|
|
|
|
class Rule(Section):
|
|
def __init__(self, name):
|
|
super().__init__(name)
|
|
self.conditions = None
|
|
self.suggestions = None
|
|
self.overlap_time_seconds = None
|
|
self.trigger_entities = None
|
|
self.trigger_column_families = None
|
|
|
|
def set_parameter(self, key, value):
|
|
# If the Rule is associated with a single suggestion/condition, then
|
|
# value will be a string and not a list. Hence, convert it to a single
|
|
# element list before storing it in self.suggestions or
|
|
# self.conditions.
|
|
if key == 'conditions':
|
|
if isinstance(value, str):
|
|
self.conditions = [value]
|
|
else:
|
|
self.conditions = value
|
|
elif key == 'suggestions':
|
|
if isinstance(value, str):
|
|
self.suggestions = [value]
|
|
else:
|
|
self.suggestions = value
|
|
elif key == 'overlap_time_period':
|
|
self.overlap_time_seconds = value
|
|
|
|
def get_suggestions(self):
|
|
return self.suggestions
|
|
|
|
def perform_checks(self):
|
|
if not self.conditions or len(self.conditions) < 1:
|
|
raise ValueError(
|
|
self.name + ': rule must have at least one condition'
|
|
)
|
|
if not self.suggestions or len(self.suggestions) < 1:
|
|
raise ValueError(
|
|
self.name + ': rule must have at least one suggestion'
|
|
)
|
|
if self.overlap_time_seconds:
|
|
if len(self.conditions) != 2:
|
|
raise ValueError(
|
|
self.name + ": rule must be associated with 2 conditions\
|
|
in order to check for a time dependency between them"
|
|
)
|
|
time_format = '^\d+[s|m|h|d]$'
|
|
if (
|
|
not
|
|
re.match(time_format, self.overlap_time_seconds, re.IGNORECASE)
|
|
):
|
|
raise ValueError(
|
|
self.name + ": overlap_time_seconds format: \d+[s|m|h|d]"
|
|
)
|
|
else: # convert to seconds
|
|
in_seconds = int(self.overlap_time_seconds[:-1])
|
|
if self.overlap_time_seconds[-1] == 'm':
|
|
in_seconds *= 60
|
|
elif self.overlap_time_seconds[-1] == 'h':
|
|
in_seconds *= (60 * 60)
|
|
elif self.overlap_time_seconds[-1] == 'd':
|
|
in_seconds *= (24 * 60 * 60)
|
|
self.overlap_time_seconds = in_seconds
|
|
|
|
def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs):
|
|
# this method takes in 2 timeseries i.e. timestamps at which the
|
|
# rule's 2 TIME_SERIES conditions were triggered and it finds
|
|
# (if present) the first pair of timestamps at which the 2 conditions
|
|
# were triggered within 'overlap_time_seconds' of each other
|
|
key1_lower_bounds = [
|
|
epoch - self.overlap_time_seconds
|
|
for epoch in key1_trigger_epochs
|
|
]
|
|
key1_lower_bounds.sort()
|
|
key2_trigger_epochs.sort()
|
|
trigger_ix = 0
|
|
overlap_pair = None
|
|
for key1_lb in key1_lower_bounds:
|
|
while (
|
|
key2_trigger_epochs[trigger_ix] < key1_lb and
|
|
trigger_ix < len(key2_trigger_epochs)
|
|
):
|
|
trigger_ix += 1
|
|
if trigger_ix >= len(key2_trigger_epochs):
|
|
break
|
|
if (
|
|
key2_trigger_epochs[trigger_ix] <=
|
|
key1_lb + (2 * self.overlap_time_seconds)
|
|
):
|
|
overlap_pair = (
|
|
key2_trigger_epochs[trigger_ix],
|
|
key1_lb + self.overlap_time_seconds
|
|
)
|
|
break
|
|
return overlap_pair
|
|
|
|
def get_trigger_entities(self):
|
|
return self.trigger_entities
|
|
|
|
def get_trigger_column_families(self):
|
|
return self.trigger_column_families
|
|
|
|
def is_triggered(self, conditions_dict, column_families):
|
|
if self.overlap_time_seconds:
|
|
condition1 = conditions_dict[self.conditions[0]]
|
|
condition2 = conditions_dict[self.conditions[1]]
|
|
if not (
|
|
condition1.get_data_source() is DataSource.Type.TIME_SERIES and
|
|
condition2.get_data_source() is DataSource.Type.TIME_SERIES
|
|
):
|
|
raise ValueError(self.name + ': need 2 timeseries conditions')
|
|
|
|
map1 = condition1.get_trigger()
|
|
map2 = condition2.get_trigger()
|
|
if not (map1 and map2):
|
|
return False
|
|
|
|
self.trigger_entities = {}
|
|
is_triggered = False
|
|
entity_intersection = (
|
|
set(map1.keys()).intersection(set(map2.keys()))
|
|
)
|
|
for entity in entity_intersection:
|
|
overlap_timestamps_pair = (
|
|
self.get_overlap_timestamps(
|
|
list(map1[entity].keys()), list(map2[entity].keys())
|
|
)
|
|
)
|
|
if overlap_timestamps_pair:
|
|
self.trigger_entities[entity] = overlap_timestamps_pair
|
|
is_triggered = True
|
|
if is_triggered:
|
|
self.trigger_column_families = set(column_families)
|
|
return is_triggered
|
|
else:
|
|
all_conditions_triggered = True
|
|
self.trigger_column_families = set(column_families)
|
|
for cond_name in self.conditions:
|
|
cond = conditions_dict[cond_name]
|
|
if not cond.get_trigger():
|
|
all_conditions_triggered = False
|
|
break
|
|
if (
|
|
cond.get_data_source() is DataSource.Type.LOG or
|
|
cond.get_data_source() is DataSource.Type.DB_OPTIONS
|
|
):
|
|
cond_col_fam = set(cond.get_trigger().keys())
|
|
if NO_COL_FAMILY in cond_col_fam:
|
|
cond_col_fam = set(column_families)
|
|
self.trigger_column_families = (
|
|
self.trigger_column_families.intersection(cond_col_fam)
|
|
)
|
|
elif cond.get_data_source() is DataSource.Type.TIME_SERIES:
|
|
cond_entities = set(cond.get_trigger().keys())
|
|
if self.trigger_entities is None:
|
|
self.trigger_entities = cond_entities
|
|
else:
|
|
self.trigger_entities = (
|
|
self.trigger_entities.intersection(cond_entities)
|
|
)
|
|
if not (self.trigger_entities or self.trigger_column_families):
|
|
all_conditions_triggered = False
|
|
break
|
|
if not all_conditions_triggered: # clean up if rule not triggered
|
|
self.trigger_column_families = None
|
|
self.trigger_entities = None
|
|
return all_conditions_triggered
|
|
|
|
def __repr__(self):
|
|
# Append conditions
|
|
rule_string = "Rule: " + self.name + " has conditions:: "
|
|
is_first = True
|
|
for cond in self.conditions:
|
|
if is_first:
|
|
rule_string += cond
|
|
is_first = False
|
|
else:
|
|
rule_string += (" AND " + cond)
|
|
# Append suggestions
|
|
rule_string += "\nsuggestions:: "
|
|
is_first = True
|
|
for sugg in self.suggestions:
|
|
if is_first:
|
|
rule_string += sugg
|
|
is_first = False
|
|
else:
|
|
rule_string += (", " + sugg)
|
|
if self.trigger_entities:
|
|
rule_string += (', entities:: ' + str(self.trigger_entities))
|
|
if self.trigger_column_families:
|
|
rule_string += (', col_fam:: ' + str(self.trigger_column_families))
|
|
# Return constructed string
|
|
return rule_string
|
|
|
|
|
|
class Suggestion(Section):
|
|
class Action(Enum):
|
|
set = 1
|
|
increase = 2
|
|
decrease = 3
|
|
|
|
def __init__(self, name):
|
|
super().__init__(name)
|
|
self.option = None
|
|
self.action = None
|
|
self.suggested_values = None
|
|
self.description = None
|
|
|
|
def set_parameter(self, key, value):
|
|
if key == 'option':
|
|
# Note:
|
|
# case 1: 'option' is supported by Rocksdb OPTIONS file; in this
|
|
# case the option belongs to one of the sections in the config
|
|
# file and it's name is prefixed by "<section_type>."
|
|
# case 2: 'option' is not supported by Rocksdb OPTIONS file; the
|
|
# option is not expected to have the character '.' in its name
|
|
self.option = value
|
|
elif key == 'action':
|
|
if self.option and not value:
|
|
raise ValueError(self.name + ': provide action for option')
|
|
self.action = self.Action[value]
|
|
elif key == 'suggested_values':
|
|
if isinstance(value, str):
|
|
self.suggested_values = [value]
|
|
else:
|
|
self.suggested_values = value
|
|
elif key == 'description':
|
|
self.description = value
|
|
|
|
def perform_checks(self):
|
|
if not self.description:
|
|
if not self.option:
|
|
raise ValueError(self.name + ': provide option or description')
|
|
if not self.action:
|
|
raise ValueError(self.name + ': provide action for option')
|
|
if self.action is self.Action.set and not self.suggested_values:
|
|
raise ValueError(
|
|
self.name + ': provide suggested value for option'
|
|
)
|
|
|
|
def __repr__(self):
|
|
sugg_string = "Suggestion: " + self.name
|
|
if self.description:
|
|
sugg_string += (' description : ' + self.description)
|
|
else:
|
|
sugg_string += (
|
|
' option : ' + self.option + ' action : ' + self.action.name
|
|
)
|
|
if self.suggested_values:
|
|
sugg_string += (
|
|
' suggested_values : ' + str(self.suggested_values)
|
|
)
|
|
return sugg_string
|
|
|
|
|
|
class Condition(Section):
|
|
def __init__(self, name):
|
|
super().__init__(name)
|
|
self.data_source = None
|
|
self.trigger = None
|
|
|
|
def perform_checks(self):
|
|
if not self.data_source:
|
|
raise ValueError(self.name + ': condition not tied to data source')
|
|
|
|
def set_data_source(self, data_source):
|
|
self.data_source = data_source
|
|
|
|
def get_data_source(self):
|
|
return self.data_source
|
|
|
|
def reset_trigger(self):
|
|
self.trigger = None
|
|
|
|
def set_trigger(self, condition_trigger):
|
|
self.trigger = condition_trigger
|
|
|
|
def get_trigger(self):
|
|
return self.trigger
|
|
|
|
def is_triggered(self):
|
|
if self.trigger:
|
|
return True
|
|
return False
|
|
|
|
def set_parameter(self, key, value):
|
|
# must be defined by the subclass
|
|
raise NotImplementedError(self.name + ': provide source for condition')
|
|
|
|
|
|
class LogCondition(Condition):
|
|
@classmethod
|
|
def create(cls, base_condition):
|
|
base_condition.set_data_source(DataSource.Type['LOG'])
|
|
base_condition.__class__ = cls
|
|
return base_condition
|
|
|
|
def set_parameter(self, key, value):
|
|
if key == 'regex':
|
|
self.regex = value
|
|
|
|
def perform_checks(self):
|
|
super().perform_checks()
|
|
if not self.regex:
|
|
raise ValueError(self.name + ': provide regex for log condition')
|
|
|
|
def __repr__(self):
|
|
log_cond_str = "LogCondition: " + self.name
|
|
log_cond_str += (" regex: " + self.regex)
|
|
# if self.trigger:
|
|
# log_cond_str += (" trigger: " + str(self.trigger))
|
|
return log_cond_str
|
|
|
|
|
|
class OptionCondition(Condition):
|
|
@classmethod
|
|
def create(cls, base_condition):
|
|
base_condition.set_data_source(DataSource.Type['DB_OPTIONS'])
|
|
base_condition.__class__ = cls
|
|
return base_condition
|
|
|
|
def set_parameter(self, key, value):
|
|
if key == 'options':
|
|
if isinstance(value, str):
|
|
self.options = [value]
|
|
else:
|
|
self.options = value
|
|
elif key == 'evaluate':
|
|
self.eval_expr = value
|
|
|
|
def perform_checks(self):
|
|
super().perform_checks()
|
|
if not self.options:
|
|
raise ValueError(self.name + ': options missing in condition')
|
|
if not self.eval_expr:
|
|
raise ValueError(self.name + ': expression missing in condition')
|
|
|
|
def __repr__(self):
|
|
opt_cond_str = "OptionCondition: " + self.name
|
|
opt_cond_str += (" options: " + str(self.options))
|
|
opt_cond_str += (" expression: " + self.eval_expr)
|
|
if self.trigger:
|
|
opt_cond_str += (" trigger: " + str(self.trigger))
|
|
return opt_cond_str
|
|
|
|
|
|
class TimeSeriesCondition(Condition):
|
|
@classmethod
|
|
def create(cls, base_condition):
|
|
base_condition.set_data_source(DataSource.Type['TIME_SERIES'])
|
|
base_condition.__class__ = cls
|
|
return base_condition
|
|
|
|
def set_parameter(self, key, value):
|
|
if key == 'keys':
|
|
if isinstance(value, str):
|
|
self.keys = [value]
|
|
else:
|
|
self.keys = value
|
|
elif key == 'behavior':
|
|
self.behavior = TimeSeriesData.Behavior[value]
|
|
elif key == 'rate_threshold':
|
|
self.rate_threshold = float(value)
|
|
elif key == 'window_sec':
|
|
self.window_sec = int(value)
|
|
elif key == 'evaluate':
|
|
self.expression = value
|
|
elif key == 'aggregation_op':
|
|
self.aggregation_op = TimeSeriesData.AggregationOperator[value]
|
|
|
|
def perform_checks(self):
|
|
if not self.keys:
|
|
raise ValueError(self.name + ': specify timeseries key')
|
|
if not self.behavior:
|
|
raise ValueError(self.name + ': specify triggering behavior')
|
|
if self.behavior is TimeSeriesData.Behavior.bursty:
|
|
if not self.rate_threshold:
|
|
raise ValueError(self.name + ': specify rate burst threshold')
|
|
if not self.window_sec:
|
|
self.window_sec = 300 # default window length is 5 minutes
|
|
if len(self.keys) > 1:
|
|
raise ValueError(self.name + ': specify only one key')
|
|
elif self.behavior is TimeSeriesData.Behavior.evaluate_expression:
|
|
if not (self.expression):
|
|
raise ValueError(self.name + ': specify evaluation expression')
|
|
else:
|
|
raise ValueError(self.name + ': trigger behavior not supported')
|
|
|
|
def __repr__(self):
|
|
ts_cond_str = "TimeSeriesCondition: " + self.name
|
|
ts_cond_str += (" statistics: " + str(self.keys))
|
|
ts_cond_str += (" behavior: " + self.behavior.name)
|
|
if self.behavior is TimeSeriesData.Behavior.bursty:
|
|
ts_cond_str += (" rate_threshold: " + str(self.rate_threshold))
|
|
ts_cond_str += (" window_sec: " + str(self.window_sec))
|
|
if self.behavior is TimeSeriesData.Behavior.evaluate_expression:
|
|
ts_cond_str += (" expression: " + self.expression)
|
|
if hasattr(self, 'aggregation_op'):
|
|
ts_cond_str += (" aggregation_op: " + self.aggregation_op.name)
|
|
if self.trigger:
|
|
ts_cond_str += (" trigger: " + str(self.trigger))
|
|
return ts_cond_str
|
|
|
|
|
|
class RulesSpec:
|
|
def __init__(self, rules_path):
|
|
self.file_path = rules_path
|
|
|
|
def initialise_fields(self):
|
|
self.rules_dict = {}
|
|
self.conditions_dict = {}
|
|
self.suggestions_dict = {}
|
|
|
|
def perform_section_checks(self):
|
|
for rule in self.rules_dict.values():
|
|
rule.perform_checks()
|
|
for cond in self.conditions_dict.values():
|
|
cond.perform_checks()
|
|
for sugg in self.suggestions_dict.values():
|
|
sugg.perform_checks()
|
|
|
|
def load_rules_from_spec(self):
|
|
self.initialise_fields()
|
|
with open(self.file_path, 'r') as db_rules:
|
|
curr_section = None
|
|
for line in db_rules:
|
|
line = IniParser.remove_trailing_comment(line)
|
|
if not line:
|
|
continue
|
|
element = IniParser.get_element(line)
|
|
if element is IniParser.Element.comment:
|
|
continue
|
|
elif element is not IniParser.Element.key_val:
|
|
curr_section = element # it's a new IniParser header
|
|
section_name = IniParser.get_section_name(line)
|
|
if element is IniParser.Element.rule:
|
|
new_rule = Rule(section_name)
|
|
self.rules_dict[section_name] = new_rule
|
|
elif element is IniParser.Element.cond:
|
|
new_cond = Condition(section_name)
|
|
self.conditions_dict[section_name] = new_cond
|
|
elif element is IniParser.Element.sugg:
|
|
new_suggestion = Suggestion(section_name)
|
|
self.suggestions_dict[section_name] = new_suggestion
|
|
elif element is IniParser.Element.key_val:
|
|
key, value = IniParser.get_key_value_pair(line)
|
|
if curr_section is IniParser.Element.rule:
|
|
new_rule.set_parameter(key, value)
|
|
elif curr_section is IniParser.Element.cond:
|
|
if key == 'source':
|
|
if value == 'LOG':
|
|
new_cond = LogCondition.create(new_cond)
|
|
elif value == 'OPTIONS':
|
|
new_cond = OptionCondition.create(new_cond)
|
|
elif value == 'TIME_SERIES':
|
|
new_cond = TimeSeriesCondition.create(new_cond)
|
|
else:
|
|
new_cond.set_parameter(key, value)
|
|
elif curr_section is IniParser.Element.sugg:
|
|
new_suggestion.set_parameter(key, value)
|
|
|
|
def get_rules_dict(self):
|
|
return self.rules_dict
|
|
|
|
def get_conditions_dict(self):
|
|
return self.conditions_dict
|
|
|
|
def get_suggestions_dict(self):
|
|
return self.suggestions_dict
|
|
|
|
def get_triggered_rules(self, data_sources, column_families):
|
|
self.trigger_conditions(data_sources)
|
|
triggered_rules = []
|
|
for rule in self.rules_dict.values():
|
|
if rule.is_triggered(self.conditions_dict, column_families):
|
|
triggered_rules.append(rule)
|
|
return triggered_rules
|
|
|
|
def trigger_conditions(self, data_sources):
|
|
for source_type in data_sources:
|
|
cond_subset = [
|
|
cond
|
|
for cond in self.conditions_dict.values()
|
|
if cond.get_data_source() is source_type
|
|
]
|
|
if not cond_subset:
|
|
continue
|
|
for source in data_sources[source_type]:
|
|
source.check_and_trigger_conditions(cond_subset)
|
|
|
|
def print_rules(self, rules):
|
|
for rule in rules:
|
|
print('\nRule: ' + rule.name)
|
|
for cond_name in rule.conditions:
|
|
print(repr(self.conditions_dict[cond_name]))
|
|
for sugg_name in rule.suggestions:
|
|
print(repr(self.suggestions_dict[sugg_name]))
|
|
if rule.trigger_entities:
|
|
print('scope: entities:')
|
|
print(rule.trigger_entities)
|
|
if rule.trigger_column_families:
|
|
print('scope: col_fam:')
|
|
print(rule.trigger_column_families)
|