Check with PosixEnv before opening LOCK file (#3993)

Summary: Rebased and resubmitting #1831 on behalf of stevelittle. The problem is when a single process attempts to open the same DB twice, the second attempt fails due to LOCK file held. If the second attempt had opened the LOCK file, it'll now need to close it, and closing causes the file to be unlocked. Then, any subsequent attempt to open the DB will succeed, which is the wrong behavior. The solution was to track which files a process has locked in PosixEnv, and check those before opening a LOCK file. Fixes #1780. Closes https://github.com/facebook/rocksdb/pull/3993 Differential Revision: D8398984 Pulled By: ajkr fbshipit-source-id: 2755fe66950a0c9de63075f932f9e15768041918
2018-06-13 17:28:31 -07:00 · 2018-06-13 17:28:31 -07:00 · 1f32dc7d2b
commit 1f32dc7d2b
parent 7497f992e0
2 changed files with 114 additions and 29 deletions
--- a/env/env_posix.cc
+++ b/env/env_posix.cc
@ -83,28 +83,7 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) {
 static std::set<std::string> lockedFiles;
 static port::Mutex mutex_lockedFiles;

-static int LockOrUnlock(const std::string& fname, int fd, bool lock) {
-  mutex_lockedFiles.Lock();
-  if (lock) {
-    // If it already exists in the lockedFiles set, then it is already locked,
-    // and fail this lock attempt. Otherwise, insert it into lockedFiles.
-    // This check is needed because fcntl() does not detect lock conflict
-    // if the fcntl is issued by the same thread that earlier acquired
-    // this lock.
-    if (lockedFiles.insert(fname).second == false) {
-      mutex_lockedFiles.Unlock();
-      errno = ENOLCK;
-      return -1;
-    }
-  } else {
-    // If we are unlocking, then verify that we had locked it earlier,
-    // it should already exist in lockedFiles. Remove it from lockedFiles.
-    if (lockedFiles.erase(fname) != 1) {
-      mutex_lockedFiles.Unlock();
-      errno = ENOLCK;
-      return -1;
-    }
-  }
+static int LockOrUnlock(int fd, bool lock) {
  errno = 0;
  struct flock f;
  memset(&f, 0, sizeof(f));
@ -113,11 +92,7 @@ static int LockOrUnlock(const std::string& fname, int fd, bool lock) {
  f.l_start = 0;
  f.l_len = 0;        // Lock/unlock entire file
  int value = fcntl(fd, F_SETLK, &f);
-  if (value == -1 && lock) {
-    // if there is an error in locking, then remove the pathname from lockedfiles
-    lockedFiles.erase(fname);
-  }
-  mutex_lockedFiles.Unlock();
+
  return value;
 }

@ -661,6 +636,23 @@ class PosixEnv : public Env {
  virtual Status LockFile(const std::string& fname, FileLock** lock) override {
    *lock = nullptr;
    Status result;
+
+    mutex_lockedFiles.Lock();
+    // If it already exists in the lockedFiles set, then it is already locked,
+    // and fail this lock attempt. Otherwise, insert it into lockedFiles.
+    // This check is needed because fcntl() does not detect lock conflict
+    // if the fcntl is issued by the same thread that earlier acquired
+    // this lock.
+    // We must do this check *before* opening the file:
+    // Otherwise, we will open a new file descriptor. Locks are associated with
+    // a process, not a file descriptor and when *any* file descriptor is closed,
+    // all locks the process holds for that *file* are released
+    if (lockedFiles.insert(fname).second == false) {
+      mutex_lockedFiles.Unlock();
+      errno = ENOLCK;
+      return IOError("lock ", fname, errno);
+    }
+
    int fd;
    {
      IOSTATS_TIMER_GUARD(open_nanos);
@ -668,7 +660,9 @@ class PosixEnv : public Env {
    }
    if (fd < 0) {
      result = IOError("while open a file for lock", fname, errno);
-    } else if (LockOrUnlock(fname, fd, true) == -1) {
+    } else if (LockOrUnlock(fd, true) == -1) {
+      // if there is an error in locking, then remove the pathname from lockedfiles
+      lockedFiles.erase(fname);
      result = IOError("While lock file", fname, errno);
      close(fd);
    } else {
@ -678,17 +672,26 @@ class PosixEnv : public Env {
      my_lock->filename = fname;
      *lock = my_lock;
    }
+
+    mutex_lockedFiles.Unlock();
    return result;
  }

  virtual Status UnlockFile(FileLock* lock) override {
    PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
    Status result;
-    if (LockOrUnlock(my_lock->filename, my_lock->fd_, false) == -1) {
+    mutex_lockedFiles.Lock();
+    // If we are unlocking, then verify that we had locked it earlier,
+    // it should already exist in lockedFiles. Remove it from lockedFiles.
+    if (lockedFiles.erase(my_lock->filename) != 1) {
+      errno = ENOLCK;
+      result = IOError("unlock", my_lock->filename, errno);
+    } else if (LockOrUnlock(my_lock->fd_, false) == -1) {
      result = IOError("unlock", my_lock->filename, errno);
    }
    close(my_lock->fd_);
    delete my_lock;
+    mutex_lockedFiles.Unlock();
    return result;
  }

--- a/util/filelock_test.cc
+++ b/util/filelock_test.cc
@ -7,6 +7,7 @@
 #include "rocksdb/env.h"

 #include <vector>
+#include <fcntl.h>
 #include "util/coding.h"
 #include "util/testharness.h"

@ -33,6 +34,78 @@ class LockTest : public testing::Test {
  Status UnlockFile(FileLock* db_lock) {
    return env_->UnlockFile(db_lock);
  }
+
+  bool AssertFileIsLocked(){
+    return CheckFileLock( /* lock_expected = */ true);
+  }
+
+  bool AssertFileIsNotLocked(){
+    return CheckFileLock( /* lock_expected = */ false);
+  }
+
+  bool CheckFileLock(bool lock_expected){
+    // We need to fork to check the fcntl lock as we need
+    // to open and close the file from a different process
+    // to avoid either releasing the lock on close, or not
+    // contending for it when requesting a lock.
+
+#ifdef OS_WIN
+
+    // WaitForSingleObject and GetExitCodeProcess can do what waitpid does.
+    // TODO - implement on Windows
+    return true;
+
+#else
+
+    pid_t pid = fork();
+    if ( 0 == pid ) {
+      // child process
+      int exit_val = EXIT_FAILURE;
+      int fd = open(file_.c_str(), O_RDWR | O_CREAT, 0644);
+      if (fd < 0) {
+        // could not open file, could not check if it was locked
+        fprintf( stderr, "Open on on file %s failed.\n",file_.c_str());
+        exit(exit_val);
+      }
+
+      struct flock f;
+      memset(&f, 0, sizeof(f));
+      f.l_type = (F_WRLCK);
+      f.l_whence = SEEK_SET;
+      f.l_start = 0;
+      f.l_len = 0; // Lock/unlock entire file
+      int value = fcntl(fd, F_SETLK, &f);
+      if( value == -1 ){
+        if( lock_expected ){
+          exit_val = EXIT_SUCCESS;
+        }
+      } else {
+        if( ! lock_expected ){
+          exit_val = EXIT_SUCCESS;
+        }
+      }
+      close(fd); // lock is released for child process
+      exit(exit_val);
+    } else if (pid > 0) {
+      // parent process
+      int status;
+      while (-1 == waitpid(pid, &status, 0));
+      if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+        // child process exited with non success status
+        return false;
+      } else {
+        return true;
+      }
+    } else {
+      fprintf( stderr, "Fork failed\n" );
+      return false;
+    }
+    return false;
+
+#endif
+
+  }
+
 };
 LockTest* LockTest::current_;

@ -43,12 +116,21 @@ TEST_F(LockTest, LockBySameThread) {
  // acquire a lock on a file
  ASSERT_OK(LockFile(&lock1));

+  // check the file is locked
+  ASSERT_TRUE( AssertFileIsLocked() );
+
  // re-acquire the lock on the same file. This should fail.
  ASSERT_TRUE(LockFile(&lock2).IsIOError());

+  // check the file is locked
+  ASSERT_TRUE( AssertFileIsLocked() );
+
  // release the lock
  ASSERT_OK(UnlockFile(lock1));

+  // check the file is not locked
+  ASSERT_TRUE( AssertFileIsNotLocked() );
+
 }

 }  // namespace rocksdb