2013-10-16 23:59:46 +02:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2011-03-18 23:37:00 +01:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/table_cache.h"
|
|
|
|
|
|
|
|
#include "db/filename.h"
|
2014-01-07 05:29:17 +01:00
|
|
|
#include "db/version_edit.h"
|
2013-02-25 22:58:34 +01:00
|
|
|
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/statistics.h"
|
2014-01-28 06:58:46 +01:00
|
|
|
#include "table/table_reader.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
#include "util/coding.h"
|
2013-06-07 19:02:28 +02:00
|
|
|
#include "util/stop_watch.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
namespace rocksdb {
|
2011-03-18 23:37:00 +01:00
|
|
|
|
|
|
|
static void DeleteEntry(const Slice& key, void* value) {
|
2013-10-30 18:52:33 +01:00
|
|
|
TableReader* table_reader = reinterpret_cast<TableReader*>(value);
|
|
|
|
delete table_reader;
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void UnrefEntry(void* arg1, void* arg2) {
|
|
|
|
Cache* cache = reinterpret_cast<Cache*>(arg1);
|
|
|
|
Cache::Handle* h = reinterpret_cast<Cache::Handle*>(arg2);
|
|
|
|
cache->Release(h);
|
|
|
|
}
|
|
|
|
|
2014-01-02 19:29:48 +01:00
|
|
|
static Slice GetSliceForFileNumber(uint64_t* file_number) {
|
|
|
|
return Slice(reinterpret_cast<const char*>(file_number),
|
|
|
|
sizeof(*file_number));
|
2013-12-27 01:25:45 +01:00
|
|
|
}
|
|
|
|
|
2011-03-18 23:37:00 +01:00
|
|
|
TableCache::TableCache(const std::string& dbname,
|
|
|
|
const Options* options,
|
2013-06-08 00:35:17 +02:00
|
|
|
const EnvOptions& storage_options,
|
2011-03-18 23:37:00 +01:00
|
|
|
int entries)
|
|
|
|
: env_(options->env),
|
|
|
|
dbname_(dbname),
|
|
|
|
options_(options),
|
2013-03-15 01:00:04 +01:00
|
|
|
storage_options_(storage_options),
|
2013-10-10 02:04:40 +02:00
|
|
|
cache_(
|
|
|
|
NewLRUCache(entries, options->table_cache_numshardbits,
|
|
|
|
options->table_cache_remove_scan_count_limit)) {
|
|
|
|
}
|
2011-03-18 23:37:00 +01:00
|
|
|
|
|
|
|
TableCache::~TableCache() {
|
|
|
|
}
|
|
|
|
|
2014-01-07 05:29:17 +01:00
|
|
|
TableReader* TableCache::GetTableReaderFromHandle(Cache::Handle* handle) {
|
|
|
|
return reinterpret_cast<TableReader*>(cache_->Value(handle));
|
|
|
|
}
|
|
|
|
|
|
|
|
void TableCache::ReleaseHandle(Cache::Handle* handle) {
|
|
|
|
cache_->Release(handle);
|
|
|
|
}
|
|
|
|
|
2013-03-15 01:00:04 +01:00
|
|
|
Status TableCache::FindTable(const EnvOptions& toptions,
|
2014-01-27 22:53:22 +01:00
|
|
|
const InternalKeyComparator& internal_comparator,
|
2013-03-15 01:00:04 +01:00
|
|
|
uint64_t file_number, uint64_t file_size,
|
2013-07-13 01:56:52 +02:00
|
|
|
Cache::Handle** handle, bool* table_io,
|
|
|
|
const bool no_io) {
|
2012-04-17 17:36:46 +02:00
|
|
|
Status s;
|
2014-01-02 19:29:48 +01:00
|
|
|
Slice key = GetSliceForFileNumber(&file_number);
|
2012-04-17 17:36:46 +02:00
|
|
|
*handle = cache_->Lookup(key);
|
2013-02-25 22:58:34 +01:00
|
|
|
if (*handle == nullptr) {
|
2013-07-13 01:56:52 +02:00
|
|
|
if (no_io) { // Dont do IO and return a not-found status
|
2013-08-25 07:48:51 +02:00
|
|
|
return Status::Incomplete("Table not found in table_cache, no_io is set");
|
2013-07-13 01:56:52 +02:00
|
|
|
}
|
|
|
|
if (table_io != nullptr) {
|
|
|
|
*table_io = true; // we had to do IO from storage
|
2012-09-27 10:05:38 +02:00
|
|
|
}
|
2011-03-18 23:37:00 +01:00
|
|
|
std::string fname = TableFileName(dbname_, file_number);
|
2013-01-20 11:07:13 +01:00
|
|
|
unique_ptr<RandomAccessFile> file;
|
2013-10-30 18:52:33 +01:00
|
|
|
unique_ptr<TableReader> table_reader;
|
2013-03-15 01:00:04 +01:00
|
|
|
s = env_->NewRandomAccessFile(fname, &file, toptions);
|
2013-11-22 23:14:05 +01:00
|
|
|
RecordTick(options_->statistics.get(), NO_FILE_OPENS);
|
2011-03-18 23:37:00 +01:00
|
|
|
if (s.ok()) {
|
2013-05-18 00:53:01 +02:00
|
|
|
if (options_->advise_random_on_open) {
|
|
|
|
file->Hint(RandomAccessFile::RANDOM);
|
|
|
|
}
|
2013-11-22 23:14:05 +01:00
|
|
|
StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS);
|
2014-01-28 06:58:46 +01:00
|
|
|
s = options_->table_factory->NewTableReader(
|
2014-01-27 22:53:22 +01:00
|
|
|
*options_, toptions, internal_comparator, std::move(file), file_size,
|
|
|
|
&table_reader);
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!s.ok()) {
|
2013-10-30 18:52:33 +01:00
|
|
|
assert(table_reader == nullptr);
|
2013-11-22 23:14:05 +01:00
|
|
|
RecordTick(options_->statistics.get(), NO_FILE_ERRORS);
|
2011-03-18 23:37:00 +01:00
|
|
|
// We do not cache error results so that if the error is transient,
|
|
|
|
// or somebody repairs the file, we recover automatically.
|
2012-04-17 17:36:46 +02:00
|
|
|
} else {
|
2013-03-04 22:33:16 +01:00
|
|
|
assert(file.get() == nullptr);
|
2013-10-30 18:52:33 +01:00
|
|
|
*handle = cache_->Insert(key, table_reader.release(), 1, &DeleteEntry);
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
2012-04-17 17:36:46 +02:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
2011-03-18 23:37:00 +01:00
|
|
|
|
2012-04-17 17:36:46 +02:00
|
|
|
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
2013-03-15 01:00:04 +01:00
|
|
|
const EnvOptions& toptions,
|
2014-01-27 22:53:22 +01:00
|
|
|
const InternalKeyComparator& icomparator,
|
2014-01-07 05:29:17 +01:00
|
|
|
const FileMetaData& file_meta,
|
2013-10-30 18:52:33 +01:00
|
|
|
TableReader** table_reader_ptr,
|
2013-05-18 00:53:01 +02:00
|
|
|
bool for_compaction) {
|
2013-10-30 18:52:33 +01:00
|
|
|
if (table_reader_ptr != nullptr) {
|
|
|
|
*table_reader_ptr = nullptr;
|
2012-04-17 17:36:46 +02:00
|
|
|
}
|
2014-01-07 05:29:17 +01:00
|
|
|
Cache::Handle* handle = file_meta.table_reader_handle;
|
|
|
|
Status s;
|
|
|
|
if (!handle) {
|
2014-01-27 22:53:22 +01:00
|
|
|
s = FindTable(toptions, icomparator, file_meta.number, file_meta.file_size,
|
|
|
|
&handle, nullptr, options.read_tier == kBlockCacheTier);
|
2014-01-07 05:29:17 +01:00
|
|
|
}
|
2012-04-17 17:36:46 +02:00
|
|
|
if (!s.ok()) {
|
|
|
|
return NewErrorIterator(s);
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
2014-01-07 05:29:17 +01:00
|
|
|
TableReader* table_reader = GetTableReaderFromHandle(handle);
|
2013-10-30 18:52:33 +01:00
|
|
|
Iterator* result = table_reader->NewIterator(options);
|
2014-01-07 05:29:17 +01:00
|
|
|
if (!file_meta.table_reader_handle) {
|
|
|
|
result->RegisterCleanup(&UnrefEntry, cache_.get(), handle);
|
|
|
|
}
|
2013-10-30 18:52:33 +01:00
|
|
|
if (table_reader_ptr != nullptr) {
|
|
|
|
*table_reader_ptr = table_reader;
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
2013-05-18 00:53:01 +02:00
|
|
|
|
|
|
|
if (for_compaction) {
|
2013-10-30 18:52:33 +01:00
|
|
|
table_reader->SetupForCompaction();
|
2013-05-18 00:53:01 +02:00
|
|
|
}
|
|
|
|
|
2011-03-18 23:37:00 +01:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2012-04-17 17:36:46 +02:00
|
|
|
Status TableCache::Get(const ReadOptions& options,
|
2014-01-27 22:53:22 +01:00
|
|
|
const InternalKeyComparator& internal_comparator,
|
|
|
|
const FileMetaData& file_meta, const Slice& k, void* arg,
|
|
|
|
bool (*saver)(void*, const ParsedInternalKey&,
|
|
|
|
const Slice&, bool),
|
|
|
|
bool* table_io, void (*mark_key_may_exist)(void*)) {
|
2014-01-07 05:29:17 +01:00
|
|
|
Cache::Handle* handle = file_meta.table_reader_handle;
|
|
|
|
Status s;
|
|
|
|
if (!handle) {
|
2014-01-27 22:53:22 +01:00
|
|
|
s = FindTable(storage_options_, internal_comparator, file_meta.number,
|
|
|
|
file_meta.file_size, &handle, table_io,
|
|
|
|
options.read_tier == kBlockCacheTier);
|
2014-01-07 05:29:17 +01:00
|
|
|
}
|
2012-04-17 17:36:46 +02:00
|
|
|
if (s.ok()) {
|
2014-01-07 05:29:17 +01:00
|
|
|
TableReader* t = GetTableReaderFromHandle(handle);
|
2013-10-29 01:54:09 +01:00
|
|
|
s = t->Get(options, k, arg, saver, mark_key_may_exist);
|
2014-01-07 05:29:17 +01:00
|
|
|
if (!file_meta.table_reader_handle) {
|
|
|
|
ReleaseHandle(handle);
|
|
|
|
}
|
2013-08-25 07:48:51 +02:00
|
|
|
} else if (options.read_tier && s.IsIncomplete()) {
|
2013-07-13 01:56:52 +02:00
|
|
|
// Couldnt find Table in cache but treat as kFound if no_io set
|
|
|
|
(*mark_key_may_exist)(arg);
|
|
|
|
return Status::OK();
|
2012-04-17 17:36:46 +02:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-08-23 23:49:57 +02:00
|
|
|
bool TableCache::PrefixMayMatch(const ReadOptions& options,
|
2014-01-27 22:53:22 +01:00
|
|
|
const InternalKeyComparator& icomparator,
|
|
|
|
uint64_t file_number, uint64_t file_size,
|
|
|
|
const Slice& internal_prefix, bool* table_io) {
|
2013-08-23 23:49:57 +02:00
|
|
|
Cache::Handle* handle = nullptr;
|
2014-01-27 22:53:22 +01:00
|
|
|
Status s = FindTable(storage_options_, icomparator, file_number, file_size,
|
|
|
|
&handle, table_io);
|
2013-08-23 23:49:57 +02:00
|
|
|
bool may_match = true;
|
|
|
|
if (s.ok()) {
|
2014-01-07 05:29:17 +01:00
|
|
|
TableReader* t = GetTableReaderFromHandle(handle);
|
2013-08-23 23:49:57 +02:00
|
|
|
may_match = t->PrefixMayMatch(internal_prefix);
|
2014-01-07 05:29:17 +01:00
|
|
|
ReleaseHandle(handle);
|
2013-08-23 23:49:57 +02:00
|
|
|
}
|
|
|
|
return may_match;
|
|
|
|
}
|
|
|
|
|
2011-03-18 23:37:00 +01:00
|
|
|
void TableCache::Evict(uint64_t file_number) {
|
2014-01-02 19:29:48 +01:00
|
|
|
cache_->Erase(GetSliceForFileNumber(&file_number));
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
} // namespace rocksdb
|