134a52e144
Summary: In https://github.com/facebook/rocksdb/pull/3934 we introduced advisor scripts that make suggestions in the config options based on the log file and stats from a run of rocksdb. The optimizer runs the advisor on a benchmark application in a loop and automatically applies the suggested changes until the config options are optimized. This is a work in progress and the patch is the initial skeleton for the optimizer. The sample application that is run in the loop is currently dbbench. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4169 Reviewed By: maysamyabandeh Differential Revision: D9023671 Pulled By: poojam23 fbshipit-source-id: a6192d475c462cf6eb2b316716f97cb400fcb64d
132 lines
4.7 KiB
Python
132 lines
4.7 KiB
Python
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
# This source code is licensed under both the GPLv2 (found in the
|
|
# COPYING file in the root directory) and Apache 2.0 License
|
|
# (found in the LICENSE.Apache file in the root directory).
|
|
|
|
from abc import ABC, abstractmethod
|
|
from calendar import timegm
|
|
from enum import Enum
|
|
import glob
|
|
import re
|
|
import time
|
|
|
|
|
|
NO_COL_FAMILY = 'DB_WIDE'
|
|
|
|
|
|
class DataSource(ABC):
|
|
class Type(Enum):
|
|
LOG = 1
|
|
DB_OPTIONS = 2
|
|
TIME_SERIES = 3
|
|
|
|
def __init__(self, type):
|
|
self.type = type
|
|
|
|
@abstractmethod
|
|
def check_and_trigger_conditions(self, conditions):
|
|
pass
|
|
|
|
|
|
class Log:
|
|
@staticmethod
|
|
def is_new_log(log_line):
|
|
# The assumption is that a new log will start with a date printed in
|
|
# the below regex format.
|
|
date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}'
|
|
return re.match(date_regex, log_line)
|
|
|
|
def __init__(self, log_line, column_families):
|
|
token_list = log_line.strip().split()
|
|
self.time = token_list[0]
|
|
self.context = token_list[1]
|
|
self.message = " ".join(token_list[2:])
|
|
self.column_family = None
|
|
# example log for 'default' column family:
|
|
# "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634]
|
|
# [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n"
|
|
for col_fam in column_families:
|
|
search_for_str = '\[' + col_fam + '\]'
|
|
if re.search(search_for_str, self.message):
|
|
self.column_family = col_fam
|
|
break
|
|
if not self.column_family:
|
|
self.column_family = NO_COL_FAMILY
|
|
|
|
def get_human_readable_time(self):
|
|
# example from a log line: '2018/07/25-11:25:45.782710'
|
|
return self.time
|
|
|
|
def get_column_family(self):
|
|
return self.column_family
|
|
|
|
def get_context(self):
|
|
return self.context
|
|
|
|
def get_message(self):
|
|
return self.message
|
|
|
|
def append_message(self, remaining_log):
|
|
self.message = self.message + '\n' + remaining_log.strip()
|
|
|
|
def get_timestamp(self):
|
|
# example: '2018/07/25-11:25:45.782710' will be converted to the GMT
|
|
# Unix timestamp 1532517945 (note: this method assumes that self.time
|
|
# is in GMT)
|
|
hr_time = self.time + 'GMT'
|
|
timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z"))
|
|
return timestamp
|
|
|
|
def __repr__(self):
|
|
return (
|
|
'time: ' + self.time + '; context: ' + self.context +
|
|
'; col_fam: ' + self.column_family +
|
|
'; message: ' + self.message
|
|
)
|
|
|
|
|
|
class DatabaseLogs(DataSource):
|
|
def __init__(self, logs_path_prefix, column_families):
|
|
super().__init__(DataSource.Type.LOG)
|
|
self.logs_path_prefix = logs_path_prefix
|
|
self.column_families = column_families
|
|
|
|
def trigger_conditions_for_log(self, conditions, log):
|
|
# For a LogCondition object, trigger is:
|
|
# Dict[column_family_name, List[Log]]. This explains why the condition
|
|
# was triggered and for which column families.
|
|
for cond in conditions:
|
|
if re.search(cond.regex, log.get_message(), re.IGNORECASE):
|
|
trigger = cond.get_trigger()
|
|
if not trigger:
|
|
trigger = {}
|
|
if log.get_column_family() not in trigger:
|
|
trigger[log.get_column_family()] = []
|
|
trigger[log.get_column_family()].append(log)
|
|
cond.set_trigger(trigger)
|
|
|
|
def check_and_trigger_conditions(self, conditions):
|
|
for file_name in glob.glob(self.logs_path_prefix + '*'):
|
|
# TODO(poojam23): find a way to distinguish between log files
|
|
# - generated in the current experiment but are labeled 'old'
|
|
# because they LOGs exceeded the file size limit AND
|
|
# - generated in some previous experiment that are also labeled
|
|
# 'old' and were not deleted for some reason
|
|
if re.search('old', file_name, re.IGNORECASE):
|
|
continue
|
|
with open(file_name, 'r') as db_logs:
|
|
new_log = None
|
|
for line in db_logs:
|
|
if Log.is_new_log(line):
|
|
if new_log:
|
|
self.trigger_conditions_for_log(
|
|
conditions, new_log
|
|
)
|
|
new_log = Log(line, self.column_families)
|
|
else:
|
|
# To account for logs split into multiple lines
|
|
new_log.append_message(line)
|
|
# Check for the last log in the file.
|
|
if new_log:
|
|
self.trigger_conditions_for_log(conditions, new_log)
|