Task #3071144 Enhance ldb (db dump tool for leveldb) to report row counters for each row type

Summary: Added an option --count_delim=<char> which takes the given character as delimiter ('.' by default) and reports count of each row type found in the db

Test Plan:
1. Created test in file (for DBDumperCommand) rocksdb/tools/ldb_test.py which puts various key value pair in db and checks the output using dump --count_delim ,--count_delim="." and --count_delim=",".
2. Created test in file (for InternalDumperCommand) rocksdb/tools/ldb_test.py which puts various key value pair in db and checks the output using dump --count_delim ,--count_delim="." and --count_delim=",".
3. Manually created a database with several keys of several type and verified by running the command
   ./ldb db=<path> dump --count_delim="<char>"
   ./ldb db=<path> idump --count_delim="<char>"

Reviewers: vamsi, dhruba, emayanke, kailiu

Reviewed By: vamsi

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13815
This commit is contained in:
Piyush Garg 2013-11-01 13:59:14 -07:00
parent beeb74be6f
commit 1e4375d2ef
3 changed files with 122 additions and 9 deletions

View File

@ -145,6 +145,24 @@ class LDBTestCase(unittest.TestCase):
self.assertRunFAIL("batchput k1")
self.assertRunFAIL("batchput k1 v1 k2")
def testCountDelimDump(self):
print "Running testCountDelimDump..."
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("dump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("dump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("dump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8")
def testCountDelimIDump(self):
print "Running testCountDelimIDump..."
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("dump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("dump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("dump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8")
def testInvalidCmdLines(self):
print "Running testInvalidCmdLines..."
# db not specified

View File

@ -580,6 +580,7 @@ void PrintBucketCounts(const vector<uint64_t>& bucket_counts, int ttl_start,
}
const string InternalDumpCommand::ARG_COUNT_ONLY = "count_only";
const string InternalDumpCommand::ARG_COUNT_DELIM = "count_delim";
const string InternalDumpCommand::ARG_STATS = "stats";
const string InternalDumpCommand::ARG_INPUT_KEY_HEX = "input_key_hex";
@ -589,12 +590,14 @@ InternalDumpCommand::InternalDumpCommand(const vector<string>& params,
LDBCommand(options, flags, true,
BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX,
ARG_FROM, ARG_TO, ARG_MAX_KEYS,
ARG_COUNT_ONLY, ARG_STATS,
ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_INPUT_KEY_HEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
delim_("."),
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {
@ -602,6 +605,15 @@ InternalDumpCommand::InternalDumpCommand(const vector<string>& params,
has_to_ = ParseStringOption(options, ARG_TO, &to_);
ParseIntOption(options, ARG_MAX_KEYS, max_keys_, exec_state_);
map<string, string>::const_iterator itr = options.find(ARG_COUNT_DELIM);
if (itr != options.end()) {
delim_ = itr->second;
count_delim_ = true;
// fprintf(stdout,"delim = %c\n",delim_[0]);
} else {
count_delim_ = IsFlagPresent(flags, ARG_COUNT_DELIM);
delim_=".";
}
print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
@ -624,6 +636,7 @@ void InternalDumpCommand::Help(string& ret) {
ret.append(" [--" + ARG_INPUT_KEY_HEX + "]");
ret.append(" [--" + ARG_MAX_KEYS + "=<N>]");
ret.append(" [--" + ARG_COUNT_ONLY + "]");
ret.append(" [--" + ARG_COUNT_DELIM + "=<char>]");
ret.append(" [--" + ARG_STATS + "]");
ret.append("\n");
}
@ -646,7 +659,10 @@ void InternalDumpCommand::DoCommand() {
exec_state_ = LDBCommandExecuteResult::FAILED("DB is not DBImpl");
return;
}
string rtype1,rtype2,row,val;
rtype2 = "";
uint64_t c=0;
uint64_t s1=0,s2=0;
// Setup internal key iterator
auto iter = unique_ptr<Iterator>(idb->TEST_NewInternalIterator());
Status st = iter->status();
@ -678,8 +694,32 @@ void InternalDumpCommand::DoCommand() {
}
++count;
int k;
if (count_delim_) {
rtype1 = "";
s1=0;
row = iter->key().ToString();
val = iter->value().ToString();
for(k=0;row[k]!='\x01' && row[k]!='\0';k++)
s1++;
for(k=0;val[k]!='\x01' && val[k]!='\0';k++)
s1++;
for(int j=0;row[j]!=delim_[0] && row[j]!='\0' && row[j]!='\x01';j++)
rtype1+=row[j];
if(rtype2.compare("") && rtype2.compare(rtype1)!=0) {
fprintf(stdout,"%s => count:%lld\tsize:%lld\n",rtype2.c_str(),
(long long)c,(long long)s2);
c=1;
s2=s1;
rtype2 = rtype1;
} else {
c++;
s2+=s1;
rtype2=rtype1;
}
}
if (!count_only_) {
if (!count_only_ && !count_delim_) {
string key = ikey.DebugString(is_key_hex_);
string value = iter->value().ToString(is_value_hex_);
std::cout << key << " => " << value << "\n";
@ -688,12 +728,16 @@ void InternalDumpCommand::DoCommand() {
// Terminate if maximum number of keys have been dumped
if (max_keys_ > 0 && count >= max_keys_) break;
}
if(count_delim_) {
fprintf(stdout,"%s => count:%lld\tsize:%lld\n", rtype2.c_str(),
(long long)c,(long long)s2);
} else
fprintf(stdout, "Internal keys in range: %lld\n", (long long) count);
}
const string DBDumperCommand::ARG_COUNT_ONLY = "count_only";
const string DBDumperCommand::ARG_COUNT_DELIM = "count_delim";
const string DBDumperCommand::ARG_STATS = "stats";
const string DBDumperCommand::ARG_TTL_BUCKET = "bucket";
@ -702,13 +746,15 @@ DBDumperCommand::DBDumperCommand(const vector<string>& params,
LDBCommand(options, flags, true,
BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX,
ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_STATS,
ARG_TTL_START, ARG_TTL_END,
ARG_TTL_BUCKET, ARG_TIMESTAMP})),
ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_TTL_START,
ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {
map<string, string>::const_iterator itr = options.find(ARG_FROM);
@ -735,6 +781,14 @@ DBDumperCommand::DBDumperCommand(const vector<string>& params,
" has a value out-of-range");
}
}
itr = options.find(ARG_COUNT_DELIM);
if (itr != options.end()) {
delim_ = itr->second;
count_delim_ = true;
} else {
count_delim_ = IsFlagPresent(flags, ARG_COUNT_DELIM);
delim_=".";
}
print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
@ -757,6 +811,7 @@ void DBDumperCommand::Help(string& ret) {
ret.append(" [--" + ARG_MAX_KEYS + "=<N>]");
ret.append(" [--" + ARG_TIMESTAMP + "]");
ret.append(" [--" + ARG_COUNT_ONLY + "]");
ret.append(" [--" + ARG_COUNT_DELIM + "=<char>]");
ret.append(" [--" + ARG_STATS + "]");
ret.append(" [--" + ARG_TTL_BUCKET + "=<N>]");
ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
@ -811,11 +866,17 @@ void DBDumperCommand::DoCommand() {
bucket_size <= 0) {
bucket_size = time_range; // Will have just 1 bucket by default
}
//cretaing variables for row count of each type
string rtype1,rtype2,row,val;
rtype2 = "";
uint64_t c=0;
uint64_t s1=0,s2=0;
// At this point, bucket_size=0 => time_range=0
uint64_t num_buckets = (bucket_size >= time_range) ? 1 :
((time_range + bucket_size - 1) / bucket_size);
vector<uint64_t> bucket_counts(num_buckets, 0);
if (is_db_ttl_ && !count_only_ && timestamp_) {
if (is_db_ttl_ && !count_only_ && timestamp_ && !count_delim_) {
fprintf(stdout, "Dumping key-values from %s to %s\n",
ReadableTime(ttl_start).c_str(), ReadableTime(ttl_end).c_str());
}
@ -844,7 +905,30 @@ void DBDumperCommand::DoCommand() {
rawtime, num_buckets);
}
++count;
if (!count_only_) {
if (count_delim_) {
rtype1 = "";
row = iter->key().ToString();
val = iter->value().ToString();
s1 = row.size()+val.size();
for(int j=0;row[j]!=delim_[0] && row[j]!='\0';j++)
rtype1+=row[j];
if(rtype2.compare("") && rtype2.compare(rtype1)!=0) {
fprintf(stdout,"%s => count:%lld\tsize:%lld\n",rtype2.c_str(),
(long long )c,(long long)s2);
c=1;
s2=s1;
rtype2 = rtype1;
} else {
c++;
s2+=s1;
rtype2=rtype1;
}
}
if (!count_only_ && !count_delim_) {
if (is_db_ttl_ && timestamp_) {
fprintf(stdout, "%s ", ReadableTime(rawtime).c_str());
}
@ -854,9 +938,13 @@ void DBDumperCommand::DoCommand() {
fprintf(stdout, "%s\n", str.c_str());
}
}
if (num_buckets > 1 && is_db_ttl_) {
PrintBucketCounts(bucket_counts, ttl_start, ttl_end, bucket_size,
num_buckets);
} else if(count_delim_) {
fprintf(stdout,"%s => count:%lld\tsize:%lld\n",rtype2.c_str(),
(long long )c,(long long)s2);
} else {
fprintf(stdout, "Keys in range: %lld\n", (long long) count);
}

View File

@ -395,10 +395,13 @@ private:
bool null_to_;
string to_;
int max_keys_;
string delim_;
bool count_only_;
bool count_delim_;
bool print_stats_;
static const string ARG_COUNT_ONLY;
static const string ARG_COUNT_DELIM;
static const string ARG_STATS;
static const string ARG_TTL_BUCKET;
};
@ -421,11 +424,15 @@ private:
bool has_to_;
string to_;
int max_keys_;
string delim_;
bool count_only_;
bool count_delim_;
bool print_stats_;
bool is_input_key_hex_;
static const string ARG_DELIM;
static const string ARG_COUNT_ONLY;
static const string ARG_COUNT_DELIM;
static const string ARG_STATS;
static const string ARG_INPUT_KEY_HEX;
};